| /******************************************************************** |
| * COPYRIGHT: |
| * Copyright (c) 1997-2014, International Business Machines Corporation and |
| * others. All Rights Reserved. |
| ********************************************************************/ |
| |
| #include "unicode/utypes.h" |
| |
| #if !UCONFIG_NO_COLLATION |
| |
| #include "unicode/coll.h" |
| #include "unicode/tblcoll.h" |
| #include "unicode/unistr.h" |
| #include "unicode/sortkey.h" |
| #include "itercoll.h" |
| #include "unicode/schriter.h" |
| #include "unicode/chariter.h" |
| #include "unicode/uchar.h" |
| #include "cmemory.h" |
| |
| #define ARRAY_LENGTH(array) (sizeof array / sizeof array[0]) |
| |
| static UErrorCode status = U_ZERO_ERROR; |
| |
| CollationIteratorTest::CollationIteratorTest() |
| : test1("What subset of all possible test cases?", ""), |
| test2("has the highest probability of detecting", "") |
| { |
| en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), status); |
| if(U_FAILURE(status)) { |
| delete en_us; |
| en_us = 0; |
| errcheckln(status, "Collator creation failed with %s", u_errorName(status)); |
| return; |
| } |
| |
| } |
| |
| CollationIteratorTest::~CollationIteratorTest() |
| { |
| delete en_us; |
| } |
| |
| /** |
| * Test for CollationElementIterator previous and next for the whole set of |
| * unicode characters. |
| */ |
| void CollationIteratorTest::TestUnicodeChar() |
| { |
| CollationElementIterator *iter; |
| UChar codepoint; |
| UnicodeString source; |
| |
| for (codepoint = 1; codepoint < 0xFFFE;) |
| { |
| source.remove(); |
| |
| while (codepoint % 0xFF != 0) |
| { |
| if (u_isdefined(codepoint)) |
| source += codepoint; |
| codepoint ++; |
| } |
| |
| if (u_isdefined(codepoint)) |
| source += codepoint; |
| |
| if (codepoint != 0xFFFF) |
| codepoint ++; |
| |
| iter = en_us->createCollationElementIterator(source); |
| /* A basic test to see if it's working at all */ |
| backAndForth(*iter); |
| delete iter; |
| } |
| } |
| |
| /** |
| * Test for CollationElementIterator.previous() |
| * |
| * @bug 4108758 - Make sure it works with contracting characters |
| * |
| */ |
| void CollationIteratorTest::TestPrevious(/* char* par */) |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| CollationElementIterator *iter = en_us->createCollationElementIterator(test1); |
| |
| // A basic test to see if it's working at all |
| backAndForth(*iter); |
| delete iter; |
| |
| // Test with a contracting character sequence |
| UnicodeString source; |
| RuleBasedCollator *c1 = NULL; |
| c1 = new RuleBasedCollator( |
| (UnicodeString)"&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH", status); |
| |
| if (c1 == NULL || U_FAILURE(status)) |
| { |
| errln("Couldn't create a RuleBasedCollator with a contracting sequence."); |
| delete c1; |
| return; |
| } |
| |
| source = "abchdcba"; |
| iter = c1->createCollationElementIterator(source); |
| backAndForth(*iter); |
| delete iter; |
| delete c1; |
| |
| // Test with an expanding character sequence |
| RuleBasedCollator *c2 = NULL; |
| c2 = new RuleBasedCollator((UnicodeString)"&a < b < c/abd < d", status); |
| |
| if (c2 == NULL || U_FAILURE(status)) |
| { |
| errln("Couldn't create a RuleBasedCollator with an expanding sequence."); |
| delete c2; |
| return; |
| } |
| |
| source = "abcd"; |
| iter = c2->createCollationElementIterator(source); |
| backAndForth(*iter); |
| delete iter; |
| delete c2; |
| |
| // Now try both |
| RuleBasedCollator *c3 = NULL; |
| c3 = new RuleBasedCollator((UnicodeString)"&a < b < c/aba < d < z < ch", status); |
| |
| if (c3 == NULL || U_FAILURE(status)) |
| { |
| errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence."); |
| delete c3; |
| return; |
| } |
| |
| source = "abcdbchdc"; |
| iter = c3->createCollationElementIterator(source); |
| backAndForth(*iter); |
| delete iter; |
| delete c3; |
| |
| status=U_ZERO_ERROR; |
| source= CharsToUnicodeString("\\u0e41\\u0e02\\u0e41\\u0e02\\u0e27abc"); |
| |
| Collator *c4 = Collator::createInstance(Locale("th", "TH", ""), status); |
| if(U_FAILURE(status)){ |
| errln("Couldn't create a collator"); |
| } |
| iter = ((RuleBasedCollator*)c4)->createCollationElementIterator(source); |
| backAndForth(*iter); |
| delete iter; |
| delete c4; |
| |
| source= CharsToUnicodeString("\\u0061\\u30CF\\u3099\\u30FC"); |
| Collator *c5 = Collator::createInstance(Locale("ja", "JP", ""), status); |
| |
| iter = ((RuleBasedCollator*)c5)->createCollationElementIterator(source); |
| if(U_FAILURE(status)){ |
| errln("Couldn't create Japanese collator\n"); |
| } |
| backAndForth(*iter); |
| delete iter; |
| delete c5; |
| } |
| |
| /** |
| * Test for getOffset() and setOffset() |
| */ |
| void CollationIteratorTest::TestOffset(/* char* par */) |
| { |
| CollationElementIterator *iter = en_us->createCollationElementIterator(test1); |
| UErrorCode status = U_ZERO_ERROR; |
| // testing boundaries |
| iter->setOffset(0, status); |
| if (U_FAILURE(status) || iter->previous(status) != CollationElementIterator::NULLORDER) { |
| errln("Error: After setting offset to 0, we should be at the end " |
| "of the backwards iteration"); |
| } |
| iter->setOffset(test1.length(), status); |
| if (U_FAILURE(status) || iter->next(status) != CollationElementIterator::NULLORDER) { |
| errln("Error: After setting offset to end of the string, we should " |
| "be at the end of the backwards iteration"); |
| } |
| |
| // Run all the way through the iterator, then get the offset |
| int32_t orderLength = 0; |
| Order *orders = getOrders(*iter, orderLength); |
| |
| int32_t offset = iter->getOffset(); |
| |
| if (offset != test1.length()) |
| { |
| UnicodeString msg1("offset at end != length: "); |
| UnicodeString msg2(" vs "); |
| |
| errln(msg1 + offset + msg2 + test1.length()); |
| } |
| |
| // Now set the offset back to the beginning and see if it works |
| CollationElementIterator *pristine = en_us->createCollationElementIterator(test1); |
| |
| iter->setOffset(0, status); |
| |
| if (U_FAILURE(status)) |
| { |
| errln("setOffset failed."); |
| } |
| else |
| { |
| assertEqual(*iter, *pristine); |
| } |
| |
| delete pristine; |
| delete[] orders; |
| delete iter; |
| |
| // setting offset in the middle of a contraction |
| UnicodeString contraction = "change"; |
| status = U_ZERO_ERROR; |
| RuleBasedCollator tailored("& a < ch", status); |
| if (U_FAILURE(status)) { |
| errln("Error: in creation of Spanish collator - %s", u_errorName(status)); |
| return; |
| } |
| iter = tailored.createCollationElementIterator(contraction); |
| Order *order = getOrders(*iter, orderLength); |
| iter->setOffset(1, status); // sets offset in the middle of ch |
| int32_t order2Length = 0; |
| Order *order2 = getOrders(*iter, order2Length); |
| if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) { |
| errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction"); |
| } |
| delete[] order; |
| delete[] order2; |
| delete iter; |
| contraction = "peache"; |
| iter = tailored.createCollationElementIterator(contraction); |
| iter->setOffset(3, status); |
| order = getOrders(*iter, orderLength); |
| iter->setOffset(4, status); // sets offset in the middle of ch |
| order2 = getOrders(*iter, order2Length); |
| if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) { |
| errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction"); |
| } |
| delete[] order; |
| delete[] order2; |
| delete iter; |
| // setting offset in the middle of a surrogate pair |
| UnicodeString surrogate = UNICODE_STRING_SIMPLE("\\ud800\\udc00str").unescape(); |
| iter = tailored.createCollationElementIterator(surrogate); |
| order = getOrders(*iter, orderLength); |
| iter->setOffset(1, status); // sets offset in the middle of surrogate |
| order2 = getOrders(*iter, order2Length); |
| if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) { |
| errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair"); |
| } |
| delete[] order; |
| delete[] order2; |
| delete iter; |
| surrogate = UNICODE_STRING_SIMPLE("simple\\ud800\\udc00str").unescape(); |
| iter = tailored.createCollationElementIterator(surrogate); |
| iter->setOffset(6, status); |
| order = getOrders(*iter, orderLength); |
| iter->setOffset(7, status); // sets offset in the middle of surrogate |
| order2 = getOrders(*iter, order2Length); |
| if (orderLength != order2Length || uprv_memcmp(order, order2, orderLength * sizeof(Order)) != 0) { |
| errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair"); |
| } |
| delete[] order; |
| delete[] order2; |
| delete iter; |
| // TODO: try iterating halfway through a messy string. |
| } |
| |
| /** |
| * Test for setText() |
| */ |
| void CollationIteratorTest::TestSetText(/* char* par */) |
| { |
| CollationElementIterator *iter1 = en_us->createCollationElementIterator(test1); |
| CollationElementIterator *iter2 = en_us->createCollationElementIterator(test2); |
| UErrorCode status = U_ZERO_ERROR; |
| |
| // Run through the second iterator just to exercise it |
| int32_t c = iter2->next(status); |
| int32_t i = 0; |
| |
| while ( ++i < 10 && c != CollationElementIterator::NULLORDER) |
| { |
| if (U_FAILURE(status)) |
| { |
| errln("iter2->next() returned an error."); |
| delete iter2; |
| delete iter1; |
| } |
| |
| c = iter2->next(status); |
| } |
| |
| // Now set it to point to the same string as the first iterator |
| iter2->setText(test1, status); |
| |
| if (U_FAILURE(status)) |
| { |
| errln("call to iter2->setText(test1) failed."); |
| } |
| else |
| { |
| assertEqual(*iter1, *iter2); |
| } |
| iter1->reset(); |
| //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text |
| CharacterIterator* chariter = new StringCharacterIterator(test1); |
| iter2->setText(*chariter, status); |
| if (U_FAILURE(status)) |
| { |
| errln("call to iter2->setText(chariter(test1)) failed."); |
| } |
| else |
| { |
| assertEqual(*iter1, *iter2); |
| } |
| |
| // test for an empty string |
| UnicodeString empty(""); |
| iter1->setText(empty, status); |
| if (U_FAILURE(status) |
| || iter1->next(status) != (int32_t)CollationElementIterator::NULLORDER) { |
| errln("Empty string should have no CEs."); |
| } |
| ((StringCharacterIterator *)chariter)->setText(empty); |
| iter1->setText(*chariter, status); |
| if (U_FAILURE(status) |
| || iter1->next(status) != (int32_t)CollationElementIterator::NULLORDER) { |
| errln("Empty string should have no CEs."); |
| } |
| delete chariter; |
| delete iter2; |
| delete iter1; |
| } |
| |
| /** @bug 4108762 |
| * Test for getMaxExpansion() |
| */ |
| void CollationIteratorTest::TestMaxExpansion(/* char* par */) |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| UnicodeString rule("&a < ab < c/aba < d < z < ch"); |
| RuleBasedCollator *coll = new RuleBasedCollator(rule, status); |
| UChar ch = 0; |
| UnicodeString str(ch); |
| |
| CollationElementIterator *iter = coll->createCollationElementIterator(str); |
| |
| while (ch < 0xFFFF && U_SUCCESS(status)) { |
| int count = 1; |
| uint32_t order; |
| ch ++; |
| UnicodeString str(ch); |
| iter->setText(str, status); |
| order = iter->previous(status); |
| |
| /* thai management */ |
| if (CollationElementIterator::isIgnorable(order)) |
| order = iter->previous(status); |
| |
| while (U_SUCCESS(status) |
| && iter->previous(status) != (int32_t)CollationElementIterator::NULLORDER) |
| { |
| count ++; |
| } |
| |
| if (U_FAILURE(status) && iter->getMaxExpansion(order) < count) { |
| errln("Failure at codepoint %d, maximum expansion count < %d\n", |
| ch, count); |
| } |
| } |
| |
| delete iter; |
| delete coll; |
| } |
| |
| /* |
| * @bug 4157299 |
| */ |
| void CollationIteratorTest::TestClearBuffers(/* char* par */) |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| RuleBasedCollator *c = new RuleBasedCollator((UnicodeString)"&a < b < c & ab = d", status); |
| |
| if (c == NULL || U_FAILURE(status)) |
| { |
| errln("Couldn't create a RuleBasedCollator."); |
| delete c; |
| return; |
| } |
| |
| UnicodeString source("abcd"); |
| CollationElementIterator *i = c->createCollationElementIterator(source); |
| int32_t e0 = i->next(status); // save the first collation element |
| |
| if (U_FAILURE(status)) |
| { |
| errln("call to i->next() failed. err=%s", u_errorName(status)); |
| } |
| else |
| { |
| i->setOffset(3, status); // go to the expanding character |
| |
| if (U_FAILURE(status)) |
| { |
| errln("call to i->setOffset(3) failed. err=%s", u_errorName(status)); |
| } |
| else |
| { |
| i->next(status); // but only use up half of it |
| |
| if (U_FAILURE(status)) |
| { |
| errln("call to i->next() failed. err=%s", u_errorName(status)); |
| } |
| else |
| { |
| i->setOffset(0, status); // go back to the beginning |
| |
| if (U_FAILURE(status)) |
| { |
| errln("call to i->setOffset(0) failed. err=%s", u_errorName(status)); |
| } |
| else |
| { |
| int32_t e = i->next(status); // and get this one again |
| |
| if (U_FAILURE(status)) |
| { |
| errln("call to i->next() failed. err=%s", u_errorName(status)); |
| } |
| else if (e != e0) |
| { |
| errln("got 0x%X, expected 0x%X", e, e0); |
| } |
| } |
| } |
| } |
| } |
| |
| delete i; |
| delete c; |
| } |
| |
| /** |
| * Testing the assignment operator |
| */ |
| void CollationIteratorTest::TestAssignment() |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| RuleBasedCollator *coll = |
| (RuleBasedCollator *)Collator::createInstance(status); |
| |
| if (coll == NULL || U_FAILURE(status)) |
| { |
| errln("Couldn't create a default collator."); |
| return; |
| } |
| |
| UnicodeString source("abcd"); |
| CollationElementIterator *iter1 = |
| coll->createCollationElementIterator(source); |
| |
| CollationElementIterator iter2 = *iter1; |
| |
| if (*iter1 != iter2) { |
| errln("Fail collation iterator assignment does not produce the same elements"); |
| } |
| |
| CollationElementIterator iter3(*iter1); |
| |
| if (*iter1 != iter3) { |
| errln("Fail collation iterator copy constructor does not produce the same elements"); |
| } |
| |
| source = CharsToUnicodeString("a\\u0300\\u0325"); |
| coll->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); |
| CollationElementIterator *iter4 |
| = coll->createCollationElementIterator(source); |
| CollationElementIterator iter5(*iter4); |
| if (*iter4 != iter5) { |
| errln("collation iterator assignment does not produce the same elements"); |
| } |
| iter4->next(status); |
| if (U_FAILURE(status) || *iter4 == iter5) { |
| errln("collation iterator not equal"); |
| } |
| iter5.next(status); |
| if (U_FAILURE(status) || *iter4 != iter5) { |
| errln("collation iterator equal"); |
| } |
| iter4->next(status); |
| if (U_FAILURE(status) || *iter4 == iter5) { |
| errln("collation iterator not equal"); |
| } |
| iter5.next(status); |
| if (U_FAILURE(status) || *iter4 != iter5) { |
| errln("collation iterator equal"); |
| } |
| CollationElementIterator iter6(*iter4); |
| if (*iter4 != iter6) { |
| errln("collation iterator equal"); |
| } |
| iter4->next(status); |
| if (U_FAILURE(status) || *iter4 == iter5) { |
| errln("collation iterator not equal"); |
| } |
| iter5.next(status); |
| if (U_FAILURE(status) || *iter4 != iter5) { |
| errln("collation iterator equal"); |
| } |
| iter4->next(status); |
| if (U_FAILURE(status) || *iter4 == iter5) { |
| errln("collation iterator not equal"); |
| } |
| iter5.next(status); |
| if (U_FAILURE(status) || *iter4 != iter5) { |
| errln("collation iterator equal"); |
| } |
| delete iter1; |
| delete iter4; |
| delete coll; |
| } |
| |
| /** |
| * Testing the constructors |
| */ |
| void CollationIteratorTest::TestConstructors() |
| { |
| UErrorCode status = U_ZERO_ERROR; |
| RuleBasedCollator *coll = |
| (RuleBasedCollator *)Collator::createInstance(status); |
| if (coll == NULL || U_FAILURE(status)) |
| { |
| errln("Couldn't create a default collator."); |
| return; |
| } |
| |
| // testing protected constructor with character iterator as argument |
| StringCharacterIterator chariter(test1); |
| CollationElementIterator *iter1 = |
| coll->createCollationElementIterator(chariter); |
| if (U_FAILURE(status)) { |
| errln("Couldn't create collation element iterator with character iterator."); |
| return; |
| } |
| CollationElementIterator *iter2 = |
| coll->createCollationElementIterator(test1); |
| |
| // initially the 2 collation element iterators should be the same |
| if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2 |
| || *iter2 != *iter1) { |
| errln("CollationElementIterators constructed with the same string data should be the same at the start"); |
| } |
| assertEqual(*iter1, *iter2); |
| |
| delete iter1; |
| delete iter2; |
| |
| // tests empty strings |
| UnicodeString empty(""); |
| iter1 = coll->createCollationElementIterator(empty); |
| chariter.setText(empty); |
| iter2 = coll->createCollationElementIterator(chariter); |
| if (*iter1 != *iter1 || *iter2 != *iter2 || *iter1 != *iter2 |
| || *iter2 != *iter1) { |
| errln("CollationElementIterators constructed with the same string data should be the same at the start"); |
| } |
| if (iter1->next(status) != (int32_t)CollationElementIterator::NULLORDER) { |
| errln("Empty string should have no CEs."); |
| } |
| if (iter2->next(status) != (int32_t)CollationElementIterator::NULLORDER) { |
| errln("Empty string should have no CEs."); |
| } |
| delete iter1; |
| delete iter2; |
| delete coll; |
| } |
| |
| /** |
| * Testing the strength order |
| */ |
| void CollationIteratorTest::TestStrengthOrder() |
| { |
| int order = 0x0123ABCD; |
| |
| UErrorCode status = U_ZERO_ERROR; |
| RuleBasedCollator *coll = |
| (RuleBasedCollator *)Collator::createInstance(status); |
| if (coll == NULL || U_FAILURE(status)) |
| { |
| errln("Couldn't create a default collator."); |
| return; |
| } |
| |
| coll->setStrength(Collator::PRIMARY); |
| CollationElementIterator *iter = |
| coll->createCollationElementIterator(test1); |
| |
| if (iter == NULL) { |
| errln("Couldn't create a collation element iterator from default collator"); |
| return; |
| } |
| |
| if (iter->strengthOrder(order) != 0x01230000) { |
| errln("Strength order for a primary strength collator should be the first 2 bytes"); |
| return; |
| } |
| |
| coll->setStrength(Collator::SECONDARY); |
| if (iter->strengthOrder(order) != 0x0123AB00) { |
| errln("Strength order for a secondary strength collator should be the third byte"); |
| return; |
| } |
| |
| coll->setStrength(Collator::TERTIARY); |
| if (iter->strengthOrder(order) != order) { |
| errln("Strength order for a tertiary strength collator should be the third byte"); |
| return; |
| } |
| delete iter; |
| delete coll; |
| } |
| |
| /** |
| * Return a string containing all of the collation orders |
| * returned by calls to next on the specified iterator |
| */ |
| UnicodeString &CollationIteratorTest::orderString(CollationElementIterator &iter, UnicodeString &target) |
| { |
| int32_t order; |
| UErrorCode status = U_ZERO_ERROR; |
| |
| while ((order = iter.next(status)) != CollationElementIterator::NULLORDER) |
| { |
| target += "0x"; |
| appendHex(order, 8, target); |
| target += " "; |
| } |
| |
| return target; |
| } |
| |
| void CollationIteratorTest::assertEqual(CollationElementIterator &i1, CollationElementIterator &i2) |
| { |
| int32_t c1, c2, count = 0; |
| UErrorCode status = U_ZERO_ERROR; |
| |
| do |
| { |
| c1 = i1.next(status); |
| c2 = i2.next(status); |
| |
| if (c1 != c2) |
| { |
| errln(" %d: strength(0x%X) != strength(0x%X)", count, c1, c2); |
| break; |
| } |
| |
| count += 1; |
| } |
| while (c1 != CollationElementIterator::NULLORDER); |
| } |
| |
| void CollationIteratorTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) |
| { |
| if (exec) |
| { |
| logln("Collation Iteration Tests: "); |
| } |
| |
| if(en_us) { |
| switch (index) |
| { |
| case 0: name = "TestPrevious"; if (exec) TestPrevious(/* par */); break; |
| case 1: name = "TestOffset"; if (exec) TestOffset(/* par */); break; |
| case 2: name = "TestSetText"; if (exec) TestSetText(/* par */); break; |
| case 3: name = "TestMaxExpansion"; if (exec) TestMaxExpansion(/* par */); break; |
| case 4: name = "TestClearBuffers"; if (exec) TestClearBuffers(/* par */); break; |
| case 5: name = "TestUnicodeChar"; if (exec) TestUnicodeChar(/* par */); break; |
| case 6: name = "TestAssignment"; if (exec) TestAssignment(/* par */); break; |
| case 7: name = "TestConstructors"; if (exec) TestConstructors(/* par */); break; |
| case 8: name = "TestStrengthOrder"; if (exec) TestStrengthOrder(/* par */); break; |
| default: name = ""; break; |
| } |
| } else { |
| dataerrln("Class iterator not instantiated"); |
| name = ""; |
| } |
| } |
| |
| #endif /* #if !UCONFIG_NO_COLLATION */ |