src/third_party/icu/patches/utext.patch - cobalt - Git at Google

 diff --git a/source/common/utext.cpp b/source/common/utext.cpp
 index 6dc31a0..0c7dd1e 100644
 --- a/source/common/utext.cpp
 +++ b/source/common/utext.cpp
 @@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
 -*   Copyright (C) 2005-2015, International Business Machines
 +*   Copyright (C) 2005-2016, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
 @@ -2524,6 +2524,7 @@ ucstrTextExtract(UText *ut,
              ut->chunkLength         = si;
              ut->nativeIndexingLimit = si;
              strLength               = si;
 +            limit32                 = si;
              break;
          }
          U_ASSERT(di>=0); /* to ensure di never exceeds INT32_MAX, which must not happen logically */
 @@ -2545,16 +2546,21 @@ ucstrTextExtract(UText *ut,
      // If the limit index points to a lead surrogate of a pair,
      //   add the corresponding trail surrogate to the destination.
      if (si>0 && U16_IS_LEAD(s[si-1]) &&
 -        ((si<strLength || strLength<0)  && U16_IS_TRAIL(s[si])))
 +            ((si<strLength || strLength<0)  && U16_IS_TRAIL(s[si])))
      {
          if (di<destCapacity) {
              // store only if there is space in the output buffer.
 -            dest[di++] = s[si++];
 +            dest[di++] = s[si];
          }
 +        si++;
      }

      // Put iteration position at the point just following the extracted text
 -    ut->chunkOffset = uprv_min(strLength, start32 + destCapacity);
 +    if (si <= ut->chunkNativeLimit) {
 +        ut->chunkOffset = si;
 +    } else {
 +        ucstrTextAccess(ut, si, TRUE);
 +    }

      // Add a terminating NUL if space in the buffer permits,
      // and set the error status as required.
 diff --git a/source/test/intltest/utxttest.cpp b/source/test/intltest/utxttest.cpp
 index d8bedb6..801337c 100644
 --- a/source/test/intltest/utxttest.cpp
 +++ b/source/test/intltest/utxttest.cpp
 @@ -1,6 +1,6 @@
  /********************************************************************
   * COPYRIGHT:
 - * Copyright (c) 2005-2014, International Business Machines Corporation and
 + * Copyright (c) 2005-2016, International Business Machines Corporation and
   * others. All Rights Reserved.
   ********************************************************************/
  /************************************************************************
 @@ -16,6 +16,8 @@
  #include "unicode/utf8.h"
  #include "unicode/ustring.h"
  #include "unicode/uchriter.h"
 +#include "cmemory.h"
 +#include "cstr.h"
  #include "utxttest.h"

  static UBool  gFailed = FALSE;
 @@ -61,6 +63,8 @@ UTextTest::runIndexedTest(int32_t index, UBool exec,
              if (exec) Ticket10562();  break;
          case 6: name = "Ticket10983";
              if (exec) Ticket10983();  break;
 +        case 7: name = "Ticket12130";
 +            if (exec) Ticket12130(); break;
          default: name = "";          break;
      }
  }
 @@ -1501,3 +1505,79 @@ void UTextTest::Ticket10983() {

      utext_close(ut);
  }
 +
 +// Ticket 12130 - extract on a UText wrapping a null terminated UChar * string
 +//                leaves the iteration position set incorrectly when the
 +//                actual string length is not yet known.
 +//
 +//                The test text needs to be long enough that UText defers getting the length.
 +
 +void UTextTest::Ticket12130() {
 +    UErrorCode status = U_ZERO_ERROR;
 +
 +    const char *text8 =
 +        "Fundamentally, computers just deal with numbers. They store letters and other characters "
 +        "by assigning a number for each one. Before Unicode was invented, there were hundreds "
 +        "of different encoding systems for assigning these numbers. No single encoding could "
 +        "contain enough characters: for example, the European Union alone requires several "
 +        "different encodings to cover all its languages. Even for a single language like "
 +        "English no single encoding was adequate for all the letters, punctuation, and technical "
 +        "symbols in common use.";
 +
 +    UnicodeString str(text8);
 +    const UChar *ustr = str.getTerminatedBuffer();
 +    UText ut = UTEXT_INITIALIZER;
 +    utext_openUChars(&ut, ustr, -1, &status);
 +    UChar extractBuffer[50];
 +
 +    for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) {
 +        int32_t endIdx = startIdx + 20;
 +
 +        u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer));
 +        utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extractBuffer), &status);
 +        if (U_FAILURE(status)) {
 +            errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status));
 +            return;
 +        }
 +        int64_t ni  = utext_getNativeIndex(&ut);
 +        int64_t expectedni = startIdx + 20;
 +        if (expectedni > str.length()) {
 +            expectedni = str.length();
 +        }
 +        if (expectedni != ni) {
 +            errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__, __LINE__, expectedni, ni);
 +        }
 +        if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) {
 +            errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"",
 +                    __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))(), CStr(UnicodeString(extractBuffer))());
 +        }
 +    }
 +    utext_close(&ut);
 +
 +    // Similar utext extract, this time with the string length provided to the UText in advance,
 +    // and a buffer of larger than required capacity.
 +
 +    utext_openUChars(&ut, ustr, str.length(), &status);
 +    for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) {
 +        int32_t endIdx = startIdx + 20;
 +        u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer));
 +        utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extractBuffer), &status);
 +        if (U_FAILURE(status)) {
 +            errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status));
 +            return;
 +        }
 +        int64_t ni  = utext_getNativeIndex(&ut);
 +        int64_t expectedni = startIdx + 20;
 +        if (expectedni > str.length()) {
 +            expectedni = str.length();
 +        }
 +        if (expectedni != ni) {
 +            errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__, __LINE__, expectedni, ni);
 +        }
 +        if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) {
 +            errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"",
 +                    __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))(), CStr(UnicodeString(extractBuffer))());
 +        }
 +    }
 +    utext_close(&ut);
 +}
 diff --git a/source/test/intltest/utxttest.h b/source/test/intltest/utxttest.h
 index 7091655..346563d 100644
 --- a/source/test/intltest/utxttest.h
 +++ b/source/test/intltest/utxttest.h
 @@ -1,6 +1,6 @@
  /********************************************************************
   * COPYRIGHT:
 - * Copyright (c) 2005-2014, International Business Machines Corporation and
 + * Copyright (c) 2005-2016, International Business Machines Corporation and
   * others. All Rights Reserved.
   ********************************************************************/
  /************************************************************************
 @@ -35,6 +35,7 @@ public:
      void Ticket6847();
      void Ticket10562();
      void Ticket10983();
 +    void Ticket12130();

  private:
      struct m {                              // Map between native indices & code points.
	diff --git a/source/common/utext.cpp b/source/common/utext.cpp
	index 6dc31a0..0c7dd1e 100644
	--- a/source/common/utext.cpp
	+++ b/source/common/utext.cpp
	@@ -1,7 +1,7 @@
	/*
	*******************************************************************************
	*
	-* Copyright (C) 2005-2015, International Business Machines
	+* Copyright (C) 2005-2016, International Business Machines
	* Corporation and others. All Rights Reserved.
	*
	*******************************************************************************
	@@ -2524,6 +2524,7 @@ ucstrTextExtract(UText *ut,
	ut->chunkLength = si;
	ut->nativeIndexingLimit = si;
	strLength = si;
	+ limit32 = si;
	break;
	}
	U_ASSERT(di>=0); /* to ensure di never exceeds INT32_MAX, which must not happen logically */
	@@ -2545,16 +2546,21 @@ ucstrTextExtract(UText *ut,
	// If the limit index points to a lead surrogate of a pair,
	// add the corresponding trail surrogate to the destination.
	if (si>0 && U16_IS_LEAD(s[si-1]) &&
	- ((si<strLength \|\| strLength<0) && U16_IS_TRAIL(s[si])))
	+ ((si<strLength \|\| strLength<0) && U16_IS_TRAIL(s[si])))
	{
	if (di<destCapacity) {
	// store only if there is space in the output buffer.
	- dest[di++] = s[si++];
	+ dest[di++] = s[si];
	}
	+ si++;
	}

	// Put iteration position at the point just following the extracted text
	- ut->chunkOffset = uprv_min(strLength, start32 + destCapacity);
	+ if (si <= ut->chunkNativeLimit) {
	+ ut->chunkOffset = si;
	+ } else {
	+ ucstrTextAccess(ut, si, TRUE);
	+ }

	// Add a terminating NUL if space in the buffer permits,
	// and set the error status as required.
	diff --git a/source/test/intltest/utxttest.cpp b/source/test/intltest/utxttest.cpp
	index d8bedb6..801337c 100644
	--- a/source/test/intltest/utxttest.cpp
	+++ b/source/test/intltest/utxttest.cpp
	@@ -1,6 +1,6 @@
	/********************************************************************
	* COPYRIGHT:
	- * Copyright (c) 2005-2014, International Business Machines Corporation and
	+ * Copyright (c) 2005-2016, International Business Machines Corporation and
	* others. All Rights Reserved.
	********************************************************************/
	/************************************************************************
	@@ -16,6 +16,8 @@
	#include "unicode/utf8.h"
	#include "unicode/ustring.h"
	#include "unicode/uchriter.h"
	+#include "cmemory.h"
	+#include "cstr.h"
	#include "utxttest.h"

	static UBool gFailed = FALSE;
	@@ -61,6 +63,8 @@ UTextTest::runIndexedTest(int32_t index, UBool exec,
	if (exec) Ticket10562(); break;
	case 6: name = "Ticket10983";
	if (exec) Ticket10983(); break;
	+ case 7: name = "Ticket12130";
	+ if (exec) Ticket12130(); break;
	default: name = ""; break;
	}
	}
	@@ -1501,3 +1505,79 @@ void UTextTest::Ticket10983() {

	utext_close(ut);
	}
	+
	+// Ticket 12130 - extract on a UText wrapping a null terminated UChar * string
	+// leaves the iteration position set incorrectly when the
	+// actual string length is not yet known.
	+//
	+// The test text needs to be long enough that UText defers getting the length.
	+
	+void UTextTest::Ticket12130() {
	+ UErrorCode status = U_ZERO_ERROR;
	+
	+ const char *text8 =
	+ "Fundamentally, computers just deal with numbers. They store letters and other characters "
	+ "by assigning a number for each one. Before Unicode was invented, there were hundreds "
	+ "of different encoding systems for assigning these numbers. No single encoding could "
	+ "contain enough characters: for example, the European Union alone requires several "
	+ "different encodings to cover all its languages. Even for a single language like "
	+ "English no single encoding was adequate for all the letters, punctuation, and technical "
	+ "symbols in common use.";
	+
	+ UnicodeString str(text8);
	+ const UChar *ustr = str.getTerminatedBuffer();
	+ UText ut = UTEXT_INITIALIZER;
	+ utext_openUChars(&ut, ustr, -1, &status);
	+ UChar extractBuffer[50];
	+
	+ for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) {
	+ int32_t endIdx = startIdx + 20;
	+
	+ u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer));
	+ utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extractBuffer), &status);
	+ if (U_FAILURE(status)) {
	+ errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status));
	+ return;
	+ }
	+ int64_t ni = utext_getNativeIndex(&ut);
	+ int64_t expectedni = startIdx + 20;
	+ if (expectedni > str.length()) {
	+ expectedni = str.length();
	+ }
	+ if (expectedni != ni) {
	+ errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__, __LINE__, expectedni, ni);
	+ }
	+ if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) {
	+ errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"",
	+ __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))(), CStr(UnicodeString(extractBuffer))());
	+ }
	+ }
	+ utext_close(&ut);
	+
	+ // Similar utext extract, this time with the string length provided to the UText in advance,
	+ // and a buffer of larger than required capacity.
	+
	+ utext_openUChars(&ut, ustr, str.length(), &status);
	+ for (int32_t startIdx = 0; startIdx<str.length(); ++startIdx) {
	+ int32_t endIdx = startIdx + 20;
	+ u_memset(extractBuffer, 0, UPRV_LENGTHOF(extractBuffer));
	+ utext_extract(&ut, startIdx, endIdx, extractBuffer, UPRV_LENGTHOF(extractBuffer), &status);
	+ if (U_FAILURE(status)) {
	+ errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status));
	+ return;
	+ }
	+ int64_t ni = utext_getNativeIndex(&ut);
	+ int64_t expectedni = startIdx + 20;
	+ if (expectedni > str.length()) {
	+ expectedni = str.length();
	+ }
	+ if (expectedni != ni) {
	+ errln("%s:%d utext_getNativeIndex() expected %d, got %d", __FILE__, __LINE__, expectedni, ni);
	+ }
	+ if (0 != str.tempSubString(startIdx, 20).compare(extractBuffer)) {
	+ errln("%s:%d utext_extract() failed. expected \"%s\", got \"%s\"",
	+ __FILE__, __LINE__, CStr(str.tempSubString(startIdx, 20))(), CStr(UnicodeString(extractBuffer))());
	+ }
	+ }
	+ utext_close(&ut);
	+}
	diff --git a/source/test/intltest/utxttest.h b/source/test/intltest/utxttest.h
	index 7091655..346563d 100644
	--- a/source/test/intltest/utxttest.h
	+++ b/source/test/intltest/utxttest.h
	@@ -1,6 +1,6 @@
	/********************************************************************
	* COPYRIGHT:
	- * Copyright (c) 2005-2014, International Business Machines Corporation and
	+ * Copyright (c) 2005-2016, International Business Machines Corporation and
	* others. All Rights Reserved.
	********************************************************************/
	/************************************************************************
	@@ -35,6 +35,7 @@ public:
	void Ticket6847();
	void Ticket10562();
	void Ticket10983();
	+ void Ticket12130();

	private:
	struct m { // Map between native indices & code points.