| diff --git a/source/common/utext.cpp b/source/common/utext.cpp |
| index 6dc31a0..0c7dd1e 100644 |
| --- a/source/common/utext.cpp |
| +++ b/source/common/utext.cpp |
| @@ -1,7 +1,7 @@ |
| /* |
| ******************************************************************************* |
| * |
| -* Copyright (C) 2005-2015, International Business Machines |
| +* Copyright (C) 2005-2016, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| * |
| ******************************************************************************* |
| @@ -2524,6 +2524,7 @@ ucstrTextExtract(UText *ut, |
| ut->chunkLength = si; |
| ut->nativeIndexingLimit = si; |
| strLength = si; |
| + limit32 = si; |
| break; |
| } |
| U_ASSERT(di>=0); /* to ensure di never exceeds INT32_MAX, which must not happen logically */ |
| @@ -2545,16 +2546,21 @@ ucstrTextExtract(UText *ut, |
| // If the limit index points to a lead surrogate of a pair, |
| // add the corresponding trail surrogate to the destination. |
| if (si>0 && U16_IS_LEAD(s[si-1]) && |
| - ((si<strLength || strLength<0) && U16_IS_TRAIL(s[si]))) |
| + ((si<strLength || strLength<0) && U16_IS_TRAIL(s[si]))) |
| { |
| if (di<destCapacity) { |
| // store only if there is space in the output buffer. |
| - dest[di++] = s[si++]; |
| + dest[di++] = s[si]; |
| } |
| + si++; |
| } |
| |
| // Put iteration position at the point just following the extracted text |
| - ut->chunkOffset = uprv_min(strLength, start32 + destCapacity); |
| + if (si <= ut->chunkNativeLimit) { |
| + ut->chunkOffset = si; |
| + } else { |
| + ucstrTextAccess(ut, si, TRUE); |
| + } |
| |
| // Add a terminating NUL if space in the buffer permits, |
| // and set the error status as required. |
| diff --git a/source/test/intltest/utxttest.cpp b/source/test/intltest/utxttest.cpp |
| index d8bedb6..801337c 100644 |
| --- a/source/test/intltest/utxttest.cpp |
| +++ b/source/test/intltest/utxttest.cpp |
| @@ -1,6 +1,6 @@ |
| /******************************************************************** |
| * COPYRIGHT: |
| - * Copyright (c) 2005-2014, International Business Machines Corporation and |
| + * Copyright (c) 2005-2016, International Business Machines Corporation and |
| * others. All Rights Reserved. |
| ********************************************************************/ |
| /************************************************************************ |
| @@ -16,6 +16,8 @@ |
| #include "unicode/utf8.h" |
| #include "unicode/ustring.h" |
| #include "unicode/uchriter.h" |
| +#include "cmemory.h" |
| +#include "cstr.h" |
| #include "utxttest.h" |
| |
| static UBool gFailed = FALSE; |
| @@ -61,6 +63,8 @@ UTextTest::runIndexedTest(int32_t index, UBool exec, |
| if (exec) Ticket10562(); break; |
| case 6: name = "Ticket10983"; |
| if (exec) Ticket10983(); break; |
| + case 7: name = "Ticket12130"; |
| + if (exec) Ticket12130(); break; |
| default: name = ""; break; |
| } |
| } |
| @@ -1501,3 +1505,79 @@ void UTextTest::Ticket10983() { |
| |
| utext_close(ut); |
| } |
| + |
| +// Ticket 12130 - extract on a UText wrapping a null terminated UChar * string |
| +// leaves the iteration position set incorrectly when the |
| +// actual string length is not yet known. |
| +// |
| +// The test text needs to be long enough that UText defers getting the length. |
| + |
| +void UTextTest::Ticket12130() { |
| + UErrorCode status = U_ZERO_ERROR; |
| + |
| + const char *text8 = |
| + "Fundamentally, computers just deal with numbers. They store letters and other characters " |
| + "by assigning a number for each one. Before Unicode was invented, there were hundreds " |
| + "of different encoding systems for assigning these numbers. No single encoding could " |
| + "contain enough characters: for example, the European Union alone requires several " |
| + "different encodings to cover all its languages. Even for a single language like " |
| + "English no single encoding was adequate for all the letters, punctuation, and technical " |
| + "symbols in common use."; |
| + |
| + UnicodeString str(text8); |
| + const UChar *ustr = str.getTerminatedBuffer(); |
| + UText ut = UTEXT_INITIALIZER; |
| + utext_openUChars(&ut, ustr, -1, &status); |
| + UChar extractBuffer[50]; |
| + |
| + for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) { |
| + int32_t endIdx = startIdx + 20; |
| + |
| + u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer)); |
| + utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extractBuffer), &status); |
| + if (U_FAILURE(status)) { |
| + errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status)); |
| + return; |
| + } |
| + int64_t ni = utext_getNativeIndex(&ut); |
| + int64_t expectedni = startIdx + 20; |
| + if (expectedni > str.length()) { |
| + expectedni = str.length(); |
| + } |
| + if (expectedni != ni) { |
| + errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__, __LINE__, expectedni, ni); |
| + } |
| + if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) { |
| + errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"", |
| + __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))(), CStr(UnicodeString(extractBuffer))()); |
| + } |
| + } |
| + utext_close(&ut); |
| + |
| + // Similar utext extract, this time with the string length provided to the UText in advance, |
| + // and a buffer of larger than required capacity. |
| + |
| + utext_openUChars(&ut, ustr, str.length(), &status); |
| + for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) { |
| + int32_t endIdx = startIdx + 20; |
| + u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer)); |
| + utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extractBuffer), &status); |
| + if (U_FAILURE(status)) { |
| + errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status)); |
| + return; |
| + } |
| + int64_t ni = utext_getNativeIndex(&ut); |
| + int64_t expectedni = startIdx + 20; |
| + if (expectedni > str.length()) { |
| + expectedni = str.length(); |
| + } |
| + if (expectedni != ni) { |
| + errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__, __LINE__, expectedni, ni); |
| + } |
| + if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) { |
| + errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"", |
| + __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))(), CStr(UnicodeString(extractBuffer))()); |
| + } |
| + } |
| + utext_close(&ut); |
| +} |
| diff --git a/source/test/intltest/utxttest.h b/source/test/intltest/utxttest.h |
| index 7091655..346563d 100644 |
| --- a/source/test/intltest/utxttest.h |
| +++ b/source/test/intltest/utxttest.h |
| @@ -1,6 +1,6 @@ |
| /******************************************************************** |
| * COPYRIGHT: |
| - * Copyright (c) 2005-2014, International Business Machines Corporation and |
| + * Copyright (c) 2005-2016, International Business Machines Corporation and |
| * others. All Rights Reserved. |
| ********************************************************************/ |
| /************************************************************************ |
| @@ -35,6 +35,7 @@ public: |
| void Ticket6847(); |
| void Ticket10562(); |
| void Ticket10983(); |
| + void Ticket12130(); |
| |
| private: |
| struct m { // Map between native indices & code points. |