| <?xml version="1.0" encoding="UTF-8"?> |
| |
| <!-- Copyright (c) 2007-2009 IBM Corporation and others. All rights reserved --> |
| |
| <!-- Test data file for string search --> |
| <!DOCTYPE stringsearch-tests [ |
| <!ELEMENT stringsearch-tests (test-case+)> |
| <!ATTLIST stringsearch-tests debug IDREF #IMPLIED > |
| <!ELEMENT test-case (pattern, pre?, m?, post?)> |
| <!ATTLIST test-case |
| id ID #REQUIRED |
| locale CDATA "en" |
| strength (PRIMARY | SECONDARY | TERTIARY | QUATERNARY | IDENTICAL) "TERTIARY" |
| norm (ON | OFF) "OFF" |
| alternate_handling (NON_IGNORABLE | SHIFTED) "NON_IGNORABLE" |
| > |
| |
| <!ELEMENT pattern (#PCDATA)> |
| <!ELEMENT pre (#PCDATA)> |
| <!ELEMENT m (#PCDATA)> |
| <!ELEMENT post (#PCDATA)> |
| ]> |
| |
| <stringsearch-tests> |
| <!-- debug="test11" (for copying into the above element) --> |
| |
| <!-- Very simple match --> |
| <test-case id="test01" > |
| <pattern>abc</pattern> |
| <pre>xxx</pre><m>abc</m><post>yyy</post> |
| </test-case> |
| |
| <!-- Very simple no-match --> |
| <test-case id="test02" > |
| <pattern>abc</pattern> |
| <pre>xxx</pre><post>yyy</post> |
| </test-case> |
| |
| <!-- Match after several near-misses. --> |
| <test-case id="test03" > |
| <pattern>string</pattern> |
| <pre>silly spring stling strxng strilg strinx stri</pre><m>string</m><post> fling</post> |
| </test-case> |
| |
| <test-case id="test04" strength="PRIMARY" > |
| <pattern>FUSS</pattern> |
| <pre>abc</pre><m>fuss</m><post>sss</post> |
| </test-case> |
| |
| <test-case id="test05" strength="PRIMARY" > |
| <pattern>FUSS</pattern> |
| <pre>abc</pre><m>fuß</m><post>sss</post> |
| </test-case> |
| |
| <test-case id="test05.5" strength="PRIMARY" > |
| <pattern>fuss</pattern> |
| <pre>a </pre> |
| <m>fuß</m> |
| <post>ball table</post> |
| </test-case> |
| |
| <test-case id="test06" strength="PRIMARY" > |
| <pattern>fuß</pattern> |
| <pre>abc</pre><m>fuss</m><post>xyz</post> |
| </test-case> |
| |
| <test-case id="test07" strength="SECONDARY" > |
| <pattern>fuß</pattern> |
| <pre>abcfussxyz</pre> |
| </test-case> |
| |
| <test-case id="test08" strength="PRIMARY" > |
| <pattern>fus</pattern> |
| <pre>abcfuß</pre><post>xyz</post> |
| </test-case> |
| |
| <!-- A good match following an initial match that failed because |
| of not ending on a character boundary --> |
| <test-case id="test09" strength="PRIMARY"> |
| <pattern>fus</pattern> |
| <pre>fuß </pre><m>fus</m><post>sss</post> |
| </test-case> |
| |
| |
| <!-- Test cases from usrchdat.c BREAKITERATOREXACT --> |
| |
| <test-case id="test10" strength="TERTIARY"> |
| <pattern>fox</pattern> |
| <m>fox</m><post>y fox</post> |
| </test-case> |
| |
| <test-case id="test11" strength="PRIMARY" locale="de_DE@collation=phonebook"> |
| <pattern>toe</pattern> |
| <pre>This is a </pre><m>Tö</m><post>ne</post> |
| </test-case> |
| |
| <test-case id="test11a" strength="SECONDARY" locale="de_DE@collation=phonebook"> |
| <pattern>toe</pattern> |
| <pre>This is a </pre><post>Töne</post> |
| </test-case> |
| |
| <test-case id="test12" strength="TERTIARY"> |
| <pattern>e</pattern> |
| <pre>tésting that é doés not match </pre><m>e</m><post></post> |
| </test-case> |
| |
| <test-case id="test13" strength="PRIMARY" locale="fr"> |
| <pattern>e</pattern> |
| <pre></pre><m>É</m><post>É</post> |
| </test-case> |
| |
| <test-case id="test14" strength="PRIMARY" locale="fr"> |
| <pattern>O</pattern> |
| <pre>C</pre><m>O\u0302</m><post>TÉ</post> |
| </test-case> |
| |
| |
| <!-- Test cases from usrchdat.c STRENGTH --> |
| |
| |
| <test-case id="test15" strength="PRIMARY" locale="en"> |
| <pattern>fox</pattern> |
| <pre>The quick brown </pre><m>fox</m><post> jumps over the lazy foxes</post> |
| </test-case> |
| |
| <test-case id="test16" strength="PRIMARY" locale="fr"> |
| <pattern>peche</pattern> |
| <pre>blackbirds pat </pre><m>p\u00E9ch\u00E9</m><post> </post> |
| </test-case> |
| |
| <test-case id="test17" strength="PRIMARY" locale="fr"> |
| <pattern>peche</pattern> |
| <pre>blackbirds pat </pre><m>p\u00EAche</m><post> </post> |
| </test-case> |
| |
| <test-case id="test18" strength="PRIMARY" locale="fr"> |
| <pattern>peche</pattern> |
| <pre>blackbirds pat </pre><m>p\u00E9che</m><post>r </post> |
| </test-case> |
| |
| <test-case id="test19" strength="PRIMARY" locale="fr"> |
| <pattern>peche</pattern> |
| <pre>blackbirds pat </pre><m>p\u00EAche</m><post>r </post> |
| </test-case> |
| |
| <test-case id="test20" strength="PRIMARY" locale="es"> |
| <pattern>channel</pattern> |
| <pre>A </pre><m>channel</m><post>, </post> |
| </test-case> |
| |
| <test-case id="test21" strength="PRIMARY" locale="es"> |
| <pattern>channel</pattern> |
| <pre>A </pre><m>CHANNEL</m><post>, </post> |
| </test-case> |
| |
| <test-case id="test22" strength="PRIMARY" locale="es"> |
| <pattern>channel</pattern> |
| <pre>A </pre><m>Channel</m><post>s, </post> |
| </test-case> |
| |
| <test-case id="test23" strength="PRIMARY" locale="es"> |
| <pattern>channel</pattern> |
| <pre>A </pre><m>channel</m><post>... </post> |
| </test-case> |
| |
| <test-case id="test24" strength="TERTIARY" locale="en"> |
| <pattern>A\u0300</pattern> |
| <pre>A miss, and then </pre><m>\u00c0</m><post> should match but not A"</post> |
| </test-case> |
| |
| <!-- TODO: In the original test data, this test matched at IDENTICAL strength. |
| Doesn't seem right. The characters are different. |
| --> |
| <test-case id="test24a" strength="IDENTICAL" locale="en"> |
| <pattern>A\u0300</pattern> |
| <pre>At IDENTICAL, shoud this match? </pre><m>\u00c0</m><post></post> |
| </test-case> |
| |
| <test-case id="test24b" strength="IDENTICAL" alternate_handling="SHIFTED" locale="en"> |
| <pattern>A\u0300</pattern> |
| <pre>At IDENTICAL, shoud this match? </pre> |
| <m>\u00c0</m> |
| <post></post> |
| </test-case> |
| |
| <test-case id="test25" strength="SECONDARY" locale="en"> |
| <pattern>Ű</pattern> |
| <pre>12</pre><m>ű</m><post> Ű</post> |
| </test-case> |
| |
| <test-case id="test26" strength="SECONDARY" locale="en"> |
| <pattern>A</pattern> |
| <pre>12</pre><m>a</m><post>...</post> |
| </test-case> |
| |
| |
| <!-- Test Cases from usrchdat.c, VARIABLE --> |
| <test-case id="test27" strength="TERTIARY" locale="en"> |
| <pattern>blackbird</pattern> |
| <pre>black-bird </pre><m>blackbird</m><post>...</post> |
| </test-case> |
| |
| <test-case id="test28" strength="TERTIARY" locale="en"> |
| <pattern>go</pattern> |
| <pre> on</pre> |
| </test-case> |
| |
| <!-- TODO: this gives an U_ILLEGAL_ARGUMENT error when opening |
| the UStringSearch. How did the orignal test run? --> |
| <!-- |
| <test-case id="test29" strength="PRIMARY" locale="en"> |
| <pattern> </pattern> |
| <pre></pre><m></m><post>abc</post> |
| </test-case> |
| --> |
| |
| <test-case id="test30" strength="SECONDARY" locale="en"> |
| <pattern>abc</pattern> |
| <pre> a bc ab c a bc ab c"</pre> |
| </test-case> |
| |
| <test-case id="test31" strength="SECONDARY" locale="en"> |
| <pattern>abc</pattern> |
| <pre> ---------------</pre> |
| </test-case> |
| |
| |
| <!-- Normalization test cases from usrchdat.c --> |
| <test-case id="test32" strength="TERTIARY" norm="ON"> |
| <pattern>a\u0325\u0300</pattern> |
| <pre></pre><m>a\u0300\u0325</m> |
| </test-case> |
| |
| |
| <test-case id="test32a" strength="TERTIARY" norm="OFF"> |
| <pattern>a\u0325\u0300</pattern> |
| <pre>a\u0300\u0325</pre> |
| </test-case> |
| |
| |
| <!-- COMPOSITEBOUNDARIES from usrchdat.c |
| Boundaries are not identical to orignal test data because |
| of matching only full combining sequences |
| --> |
| <test-case id="test40" strength="TERTIARY"> |
| <pattern>A</pattern> |
| <pre>À</pre> <!-- \u00C0 --> |
| </test-case> |
| |
| <test-case id="test41" strength="TERTIARY"> |
| <pattern>A</pattern> |
| <pre>À</pre><m>A</m><post>C</post> |
| </test-case> |
| |
| <test-case id="test42" strength="TERTIARY"> |
| <pattern>A\u030A</pattern> |
| <pre>À\u01FA</pre> |
| </test-case> |
| |
| |
| |
| <!-- SUPPLEMENTARYCANONICAL from usrchdat.c --> |
| <test-case id="test50" strength="TERTIARY"> |
| <pattern>\uD800\uDC00</pattern> |
| <pre>abc \uD802\uDC00 \uD800\uDC01 \uD801\uDC00 </pre><m>\uD800\uDC00</m> |
| <post>abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00</post> |
| </test-case> |
| |
| <test-case id="test51" strength="TERTIARY"> |
| <pattern>\\uD834\\uDDB9</pattern> |
| <pre>and</pre><m>\\uD834\\uDDB9</m><post>this sentence</post> |
| </test-case> |
| |
| <test-case id="test52" strength="TERTIARY"> |
| <pattern> \\uD834\\uDDB9 </pattern> |
| <pre>and</pre><m> \\uD834\\uDDB9 </m><post>this sentence</post> |
| </test-case> |
| |
| <test-case id="test53" strength="TERTIARY"> |
| <pattern>-\\uD834\\uDDB9-</pattern> |
| <pre>and</pre><m>-\\uD834\\uDDB9-</m><post>this sentence</post> |
| </test-case> |
| |
| <test-case id="test54" strength="TERTIARY"> |
| <pattern>,\\uD834\\uDDB9,</pattern> |
| <pre>and</pre><m>,\\uD834\\uDDB9,</m><post>this sentence</post> |
| </test-case> |
| |
| <test-case id="test55" strength="TERTIARY"> |
| <pattern>?\\uD834\\uDDB9?</pattern> |
| <pre>and</pre><m>?\\uD834\\uDDB9?</m><post>this sentence</post> |
| </test-case> |
| |
| |
| <!-- Long combining sequences --> |
| <!-- Backwards search fails because patterns ends w/ ignorables |
| <test-case id="test60" strength="PRIMARY"> |
| <pattern>A\u0301\u0301\u0301\u0301</pattern> |
| <m>A\u0301\u0301\u0301\u0301\u0301</m> |
| </test-case> |
| --> |
| |
| <test-case id="test61" strength="TERTIARY"> |
| <pattern>A\u0301\u0301\u0301\u0301</pattern> |
| <pre>A\u0301\u0301\u0301\u0301\u0301</pre> |
| </test-case> |
| |
| <test-case id="test62" strength="TERTIARY"> |
| <pattern>A\u0301\u0301\u0301\u0301</pattern> |
| <m>A\u0301\u0301\u0301\u0301</m> |
| </test-case> |
| |
| <!-- stand-alone combining marks don't match attached marks --> |
| <test-case id="test63" strength="TERTIARY"> |
| <pattern>\u0301</pattern> |
| <pre>A\u0301\u0301\u0301\u0301</pre> |
| </test-case> |
| |
| <test-case id="test64" strength="TERTIARY"> |
| <pattern>\u0301</pattern> |
| <post>\u0301\u0301\u0301\u0301</post> |
| </test-case> |
| |
| <!-- stand-alone combining mark does match an un-attached combining mark --> |
| <test-case id="test65" strength="TERTIARY"> |
| <pattern>\u0301</pattern> |
| <m>\u0301</m><post>A\u0301\u0301</post> |
| </test-case> |
| |
| <test-case id="test66" strength="TERTIARY"> |
| <pattern>\u0301</pattern> |
| <m>\u0301</m> |
| </test-case> |
| |
| <!-- stand-alone combining marks at end of the target text --> |
| <test-case id="test67" strength="TERTIARY"> |
| <pattern>\u0301</pattern> |
| <pre>abcd\r</pre><m>\u0301</m> |
| </test-case> |
| |
| <!-- attached combining marks at end of the target text, no match --> |
| <test-case id="test68" strength="TERTIARY"> |
| <pattern>\u0301</pattern> |
| <pre>abcd\u0301</pre> |
| </test-case> |
| |
| |
| |
| <!-- no match within expansions at the start --> |
| <test-case id="test70" strength="PRIMARY"> |
| <pattern>Eligature</pattern> |
| <pre>Æligature</pre> |
| </test-case> |
| |
| <test-case id="test71" strength="PRIMARY"> |
| <pattern>AEligature</pattern> |
| <m>Æligature</m> |
| </test-case> |
| |
| <test-case id="test72" strength="PRIMARY"> |
| <pattern>AEligature</pattern> |
| <m>Æligature</m> |
| </test-case> |
| |
| <!-- unattached combining Tilde will not match a Tilde that is |
| part of a composed Ñ (\u00D1) --> |
| <test-case id="test73" strength="SECONDARY"> |
| <pattern>\u0303</pattern> <!-- combining tilde --> |
| <pre>Ñ
</pre><m>\u0303</m> |
| </test-case> |
| |
| <test-case id="test74" strength="SECONDARY"> |
| <pattern>\u0303</pattern> <!-- combining tilde --> |
| <pre>Ñ 
</pre><m>\u0303</m><post>a</post> |
| </test-case> |
| |
| <test-case id="test75" strength="TERTIARY" locale="fr"> |
| <pattern>\u00EA</pattern> |
| <pre>p</pre><m>\u00EA</m><post>che</post> |
| </test-case> |
| |
| <test-case id="test76" strength="TERTIARY" locale="fr"> |
| <pattern>\u00EA</pattern> |
| <pre>p</pre><m>e\u0302</m><post>che</post> |
| </test-case> |
| |
| <test-case id="test77" strength="TERTIARY" locale="fr"> |
| <pattern>e\u0302</pattern> |
| <pre>p</pre><m>\u00EA</m><post>che</post> |
| </test-case> |
| |
| <!-- Test cases from ticket:5382 --> |
| <test-case id="test78" strength="SECONDARY" locale="hu_HU"> |
| <pattern>\u0170</pattern> |
| <m>\u0171</m> |
| <post>12</post> |
| </test-case> |
| |
| <test-case id="test79" strength="SECONDARY" locale="hu_HU"> |
| <pattern>\u0170</pattern> |
| <pre>1</pre> |
| <m>\u0171</m> |
| <post>2</post> |
| </test-case> |
| |
| <test-case id="test80" strength="SECONDARY" locale="hu_HU"> |
| <pattern>\u0170</pattern> |
| <pre>12</pre> |
| <m>\u0171</m> |
| </test-case> |
| |
| <!-- Test cases from ticket:5959 --> |
| <test-case id="test81" strength="SECONDARY"> |
| <pattern>\u2166</pattern> |
| <m>VII</m> |
| </test-case> |
| |
| <test-case id="test82" strength="SECONDARY"> |
| <pattern>VII</pattern> |
| <m>\u2166</m> |
| </test-case> |
| |
| <test-case id="test83" strength="IDENTICAL" alternate_handling="SHIFTED" locale="en"> |
| <pattern>Universal Declaration of Human Rights</pattern> |
| <pre>Proclaims this </pre><m>Universal Declaration of Human Rights</m><post> as a common standard of achievement for all peoples and all nations</post> |
| </test-case> |
| |
| <test-case id="test83b" strength="TERTIARY" alternate_handling="SHIFTED" locale="en"> |
| <pattern>Universal Declaration of Human Rights</pattern> |
| <pre>Proclaims this </pre> |
| <m>Universal-Declaration-of-Human-Rights</m> |
| <post> as a common standard of achievement for all peoples and all nations</post> |
| </test-case> |
| |
| <test-case id="test84" strength="TERTIARY" locale="en"> |
| <pattern>\u05E9\u0591\u05E9</pattern> |
| <m>\u05E9\u0592\u05E9</m> |
| </test-case> |
| |
| <test-case id="test84b" strength="IDENTICAL" locale="en"> |
| <pattern>\u05E9\u0591\u05E9</pattern> |
| <pre>\u05E9\u0592\u05E9</pre> |
| </test-case> |
| </stringsearch-tests> |
| |