src/third_party/icu/source/test/testdata/collationtest.txt - cobalt - Git at Google

 # Copyright (c) 2012-2015 International Business Machines
 # Corporation and others. All Rights Reserved.
 #
 # This file should be in UTF-8 with a signature byte sequence ("BOM").
 #
 # collationtest.txt: Collation test data.
 #
 # created on: 2012apr13
 # created by: Markus W. Scherer

 # A line with "** test: description" is used for verbose and error output.

 # A collator can be set with "@ root" or "@ locale language-tag",
 # for example "@ locale de-u-co-phonebk".
 # An old-style locale ID can also be used, for example "@ locale de@collation=phonebook".

 # A collator can be built with "@ rules".
 # An "@ rules" line is followed by one or more lines with the tailoring rules.

 # A collator can be modified with "% attribute=value".

 # "* compare" tests the order (= or <) of the following strings.
 # The relation can be "=" or "<" (the level of the difference is not specified)
 # or "<1", "<2", "<c", "<3", "<4" (indicating the level of the difference).

 # Test sections ("* compare") are terminated by
 # definitions of new collators, changing attributes, or new test sections.

 ** test: simple CEs & expansions
 # Many types of mappings are tested elsewhere, including via the UCA conformance tests.
 # Here we mostly cover a few unusual mappings.
 @ rules
 &\x01                           # most control codes are ignorable
 <<<\u0300                       # tertiary CE
 &9<\x00                         # NUL not ignorable
 &\uA00A\uA00B=\uA002            # two long-primary CEs
 &\uA00A\uA00B\u00050005=\uA003  # three CEs, require 64 bits

 * compare
 =  \x01
 =  \x02
 <3 \u0300
 <1 9
 <1 \x00
 =  \x01\x00\x02
 <1 a
 <3 a\u0300
 <2 a\u0308
 =  ä
 <1 b
 <1 か        # Hiragana Ka (U+304B)
 <2 か\u3099  # plus voiced sound mark
 =  が        # Hiragana Ga (U+304C)
 <1 \uA00A\uA00B
 =  \uA002
 <1 \uA00A\uA00B\u00050004
 <1 \uA00A\uA00B\u00050005
 =  \uA003
 <1 \uA00A\uA00B\u00050006

 ** test: contractions
 # Create some interesting mappings, and map some normalization-inert characters
 # (which are not subject to canonical reordering)
 # to some of the same CEs to check the sequence of CEs.
 @ rules

 # Contractions starting with 'a' should not continue with any character < U+0300
 # so that we can test a shortcut for that.
 &a=ⓐ
 &b<bz=ⓑ
 &d<dz\u0301=ⓓ           # d+z+acute
 &z
 <a\u0301=Ⓐ              # a+acute sorts after z
 <a\u0301\u0301=Ⓑ        # a+acute+acute
 <a\u0301\u0301\u0358=Ⓒ  # a+acute+acute+dot above right
 <a\u030a=Ⓓ              # a+ring
 <a\u0323=Ⓔ              # a+dot below
 <a\u0323\u0358=Ⓕ        # a+dot below+dot above right
 <a\u0327\u0323\u030a=Ⓖ  # a+cedilla+dot below+ring
 <a\u0327\u0323bz=Ⓗ      # a+cedilla+dot below+b+z

 &\U0001D158=⁰           # musical notehead black (has a symbol primary)
 <\U0001D158\U0001D165=¼ # musical quarter note

 # deliberately missing prefix contractions:
 # dz
 # a\u0327
 # a\u0327\u0323
 # a\u0327\u0323b

 &\x01
 <<<\U0001D165=¹         # musical stem (ccc=216)
 <<<\U0001D16D=²         # musical augmentation dot (ccc=226)
 <<<\U0001D165\U0001D16D=³  # stem+dot (ccc=216 226)
 &\u0301=❶               # acute (ccc=230)
 &\u030a=❷               # ring (ccc=230)
 &\u0308=❸               # diaeresis (ccc=230)
 <<\u0308\u0301=❹        # diaeresis+acute (=dialytika tonos) (ccc=230 230)
 &\u0327=❺               # cedilla (ccc=202)
 &\u0323=❻               # dot below (ccc=220)
 &\u0331=❼               # macron below (ccc=220)
 <<\u0331\u0358=❽        # macron below+dot above right (ccc=220 232)
 &\u0334=❾               # tilde overlay (ccc=1)
 &\u0358=❿               # dot above right (ccc=232)

 &\u0f71=①               # tibetan vowel sign aa
 &\u0f72=②               # tibetan vowel sign i
 #  \u0f71\u0f72         # tibetan vowel sign aa + i = ii = U+0F73
 &\u0f73=③               # tibetan vowel sign ii (ccc=0 but lccc=129)

 ** test: simple contractions

 # Some strings are chosen to cause incremental contiguous contraction matching to
 # go into partial matches for prefixes of contractions
 # (where the prefixes are deliberately not also contractions).
 # When there is no complete match, then the matching code must back out of those
 # so that discontiguous contractions work as specified.

 * compare
 # contraction starter with no following text, or mismatch, or blocked
 <1 a
 =  ⓐ
 <1 aa
 =  ⓐⓐ
 <1 ab
 =  ⓐb
 <1 az
 =  ⓐz

 * compare
 <1 a
 <2 a\u0308\u030a  # ring blocked by diaeresis
 =  ⓐ❸❷
 <2 a\u0327
 =  ⓐ❺

 * compare
 <2 \u0308
 =  ❸
 <2 \u0308\u030a\u0301  # acute blocked by ring
 =  ❸❷❶

 * compare
 <1 \U0001D158
 =  ⁰
 <1 \U0001D158\U0001D165
 =  ¼

 # no discontiguous contraction because of missing prefix contraction d+z,
 # and a starter ('z') after the 'd'
 * compare
 <1 dz\u0323\u0301
 =  dz❻❶

 # contiguous contractions
 * compare
 <1 abz
 =  ⓐⓑ
 <1 abzz
 =  ⓐⓑz

 * compare
 <1 a
 <1 z
 <1 a\u0301
 =  Ⓐ
 <1 a\u0301\u0301
 =  Ⓑ
 <1 a\u0301\u0301\u0358
 =  Ⓒ
 <1 a\u030a
 =  Ⓓ
 <1 a\u0323\u0358
 =  Ⓕ
 <1 a\u0327\u0323\u030a  # match despite missing prefix
 =  Ⓖ
 <1 a\u0327\u0323bz
 =  Ⓗ

 * compare
 <2 \u0308\u0308\u0301  # acute blocked from first diaeresis, contracts with second
 =  ❸❹

 * compare
 <1 \U0001D158\U0001D165
 =  ¼

 * compare
 <3 \U0001D165\U0001D16D
 =  ³

 ** test: discontiguous contractions
 * compare
 <1 a\u0327\u030a                # a+ring skips cedilla
 =  Ⓓ❺
 <2 a\u0327\u0327\u030a          # a+ring skips 2 cedillas
 =  Ⓓ❺❺
 <2 a\u0327\u0327\u0327\u030a    # a+ring skips 3 cedillas
 =  Ⓓ❺❺❺
 <2 a\u0334\u0327\u0327\u030a    # a+ring skips tilde overlay & 2 cedillas
 =  Ⓓ❾❺❺
 <1 a\u0327\u0323                # a+dot below skips cedilla
 =  Ⓔ❺
 <1 a\u0323\u0301\u0358          # a+dot below+dot ab.r.: 2-char match, then skips acute
 =  Ⓕ❶
 <2 a\u0334\u0323\u0358          # a+dot below skips tilde overlay
 =  Ⓕ❾

 * compare
 <2 \u0331\u0331\u0358           # macron below+dot ab.r. skips the second macron below
 =  ❽❼

 * compare
 <1 a\u0327\u0331\u0323\u030a    # a+ring skips cedilla, macron below, dot below (dot blocked by macron)
 =  Ⓓ❺❼❻
 <1 a\u0327\u0323\U0001D16D\u030a  # a+dot below skips cedilla
 =  Ⓔ❺²❷
 <2 a\u0327\u0327\u0323\u030a    # a+dot below skips 2 cedillas
 =  Ⓔ❺❺❷
 <2 a\u0327\u0323\u0323\u030a    # a+dot below skips cedilla
 =  Ⓔ❺❻❷
 <2 a\u0334\u0327\u0323\u030a    # a+dot below skips tilde overlay & cedilla
 =  Ⓔ❾❺❷

 * compare
 <1 \U0001D158\u0327\U0001D165   # quarter note skips cedilla
 =  ¼❺
 <1 a\U0001D165\u0323            # a+dot below skips stem
 =  Ⓔ¹

 # partial contiguous match, backs up, matches discontiguous contraction
 <1 a\u0327\u0323b
 =  Ⓔ❺b
 <1 a\u0327\u0323ba
 =  Ⓔ❺bⓐ

 # a+acute+acute+dot above right skips cedilla, continues matching 2 same-ccc combining marks
 * compare
 <1 a\u0327\u0301\u0301\u0358
 =  Ⓒ❺

 # FCD but not NFD
 * compare
 <1 a\u0f73\u0301                # a+acute skips tibetan ii
 =  Ⓐ③

 # FCD but the 0f71 inside the 0f73 must be skipped
 # to match the discontiguous contraction of the first 0f71 with the trailing 0f72 inside the 0f73
 * compare
 <1 \u0f71\u0f73                 # == \u0f73\u0f71 == \u0f71\u0f71\u0f72
 =  ③①

 ** test: discontiguous contractions with nested contractions
 * compare
 <1 a\u0323\u0308\u0301\u0358
 =  Ⓕ❹
 <2 a\u0323\u0308\u0301\u0308\u0301\u0358
 =  Ⓕ❹❹

 ** test: discontiguous contractions with interleaved contractions
 * compare
 # a+ring & cedilla & macron below+dot above right
 <1 a\u0327\u0331\u030a\u0358
 =  Ⓓ❺❽

 # a+ring & 1x..3x macron below+dot above right
 <2 a\u0331\u030a\u0358
 =  Ⓓ❽
 <2 a\u0331\u0331\u030a\u0358\u0358
 =  Ⓓ❽❽
 # also skips acute
 <2 a\u0331\u0331\u0331\u030a\u0301\u0358\u0358\u0358
 =  Ⓓ❽❽❽❶

 # a+dot below & stem+augmentation dot, followed by contiguous d+z+acute
 <1 a\U0001D165\u0323\U0001D16Ddz\u0301
 =  Ⓔ³ⓓ

 ** test: some simple string comparisons
 @ root
 * compare
 # first string compares against ""
 = \u0000
 < a
 <1 b
 <3 B
 = \u0000B\u0000

 ** test: compare with strength=primary
 % strength=primary
 * compare
 <1 a
 <1 b
 = B

 ** test: compare with strength=secondary
 % strength=secondary
 * compare
 <1 a
 <1 b
 = B

 ** test: compare with strength=tertiary
 % strength=tertiary
 * compare
 <1 a
 <1 b
 <3 B

 ** test: compare with strength=quaternary
 % strength=quaternary
 * compare
 <1 a
 <1 b
 <3 B

 ** test: compare with strength=identical
 % strength=identical
 * compare
 <1 a
 <1 b
 <3 B

 ** test: côté with forwards secondary
 @ root
 * compare
 <1 cote
 <2 coté
 <2 côte
 <2 côté

 ** test: côté with forwards secondary vs. U+FFFE merge separator
 # Merged sort keys: On each level, any difference in the first segment
 # must trump any further difference.
 * compare
 <1 cote\uFFFEcôté
 <2 coté\uFFFEcôte
 <2 côte\uFFFEcoté
 <2 côté\uFFFEcote

 ** test: côté with backwards secondary
 % backwards=on
 * compare
 <1 cote
 <2 côte
 <2 coté
 <2 côté

 ** test: côté with backwards secondary vs. U+FFFE merge separator
 # Merged sort keys: On each level, any difference in the first segment
 # must trump any further difference.
 * compare
 <1 cote\uFFFEcôté
 <2 côte\uFFFEcoté
 <2 coté\uFFFEcôte
 <2 côté\uFFFEcote

 ** test: U+FFFE on identical level
 @ root
 % strength=identical
 * compare
 # All of these control codes are completely-ignorable, so that
 # their low code points are compared with the merge separator.
 # The merge separator must compare less than any other character.
 <1 \uFFFE\u0001\u0002\u0003
 <i \u0001\uFFFE\u0002\u0003
 <i \u0001\u0002\uFFFE\u0003
 <i \u0001\u0002\u0003\uFFFE

 * compare
 # The merge separator must even compare less than U+0000.
 <1 \uFFFE\u0000\u0000
 <i \u0000\uFFFE\u0000
 <i \u0000\u0000\uFFFE

 ** test: Hani < surrogates < U+FFFD
 # Note: compareUTF8() treats unpaired surrogates like U+FFFD,
 # so with that the strings with surrogates will compare equal to each other
 # and equal to the string with U+FFFD.
 @ root
 % strength=identical
 * compare
 <1 abz
 <1 a\u4e00z
 <1 a\U00020000z
 <1 a\ud800z
 <1 a\udbffz
 <1 a\udc00z
 <1 a\udfffz
 <1 a\ufffdz

 ** test: script reordering
 @ root
 % reorder Hani Zzzz digit
 * compare
 <1 ?
 <1 +
 <1 丂
 <1 a
 <1 α
 <1 5

 % reorder default
 * compare
 <1 ?
 <1 +
 <1 5
 <1 a
 <1 α
 <1 丂

 ** test: empty rules
 @ rules
 * compare
 <1 a
 <2 ä
 <3 Ä
 <1 b

 ** test: very simple rules
 @ rules
 &a=e<<<<q<<<<r<x<<<X<<y<<<Y;z,Z
 % strength=quaternary
 * compare
 <1 a
 =  e
 <4 q
 <4 r
 <1 x
 <3 X
 <2 y
 <3 Y
 <2 z
 <3 Z

 ** test: tailoring twice before a root position: primary
 @ rules
 &[before 1]b<p
 &[before 1]b<q
 * compare
 <1 a
 <1 p
 <1 q
 <1 b

 ** test: tailoring twice before a root position: secondary
 @ rules
 &[before 2]ſ<<p
 &[before 2]ſ<<q
 * compare
 <1 s
 <2 p
 <2 q
 <2 ſ

 # secondary-before common weight
 @ rules
 &[before 2]b<<p
 &[before 2]b<<q
 * compare
 <1 a
 <1 p
 <2 q
 <2 b

 ** test: tailoring twice before a root position: tertiary
 @ rules
 &[before 3]B<<<p
 &[before 3]B<<<q
 * compare
 <1 b
 <3 p
 <3 q
 <3 B

 # tertiary-before common weight
 @ rules
 &[before 3]b<<<p
 &[before 3]b<<<q
 * compare
 <1 a
 <1 p
 <3 q
 <3 b

 @ rules
 &[before 2]b<<s
 &[before 3]s<<<p
 &[before 3]s<<<q
 * compare
 <1 a
 <1 p
 <3 q
 <3 s
 <2 b

 ** test: tailor after completely ignorable
 @ rules
 &\x00<<<x<<y
 * compare
 = \x00
 = \x1F
 <3 x
 <2 y

 ** test: secondary tailoring gaps, ICU ticket 9362
 @ rules
 &[before 2]s<<'_'
 &s<<r  # secondary between s and ſ (long s)
 &ſ<<*a-q  # more than 15 between ſ and secondary CE boundary
 &[before 2][first primary ignorable]<<u<<v  # between secondary CE boundary & lowest secondary CE
 &[last primary ignorable]<<y<<z

 * compare
 <2 u
 <2 v
 <2 \u0332  # lowest secondary CE
 <2 \u0308
 <2 y
 <2 z
 <1 s_
 <2 ss
 <2 sr
 <2 sſ
 <2 sa
 <2 sb
 <2 sp
 <2 sq
 <2 sus
 <2 svs
 <2 rs

 ** test: tertiary tailoring gaps, ICU ticket 9362
 @ rules
 &[before 3]t<<<'_'
 &t<<<r  # tertiary between t and fullwidth t
 &ᵀ<<<*a-q  # more than 15 between ᵀ (modifier letter T) and tertiary CE boundary
 &[before 3][first secondary ignorable]<<<u<<<v  # between tertiary CE boundary & lowest tertiary CE
 &[last secondary ignorable]<<<y<<<z

 * compare
 <3 u
 <3 v
 # Note: The root collator currently does not map any characters to tertiary CEs.
 <3 y
 <3 z
 <1 t_
 <3 tt
 <3 tr
 <3 tｔ
 <3 tᵀ
 <3 ta
 <3 tb
 <3 tp
 <3 tq
 <3 tut
 <3 tvt
 <3 rt

 ** test: secondary & tertiary around root character
 @ rules
 &[before 2]m<<r
 &m<<s
 &[before 3]m<<<u
 &m<<<v
 * compare
 <1 l
 <1 r
 <2 u
 <3 m
 <3 v
 <2 s
 <1 n

 ** test: secondary & tertiary around tailored item
 @ rules
 &m<x
 &[before 2]x<<r
 &x<<s
 &[before 3]x<<<u
 &x<<<v
 * compare
 <1 m
 <1 r
 <2 u
 <3 x
 <3 v
 <2 s
 <1 n

 ** test: more nesting of secondary & tertiary before
 @ rules
 &[before 3]m<<<u
 &[before 2]m<<r
 &[before 3]r<<<q
 &m<<<w
 &m<<t
 &[before 3]w<<<v
 &w<<<x
 &w<<s
 * compare
 <1 l
 <1 q
 <3 r
 <2 u
 <3 m
 <3 v
 <3 w
 <3 x
 <2 s
 <2 t
 <1 n

 ** test: case bits
 @ rules
 &w<x  # tailored CE getting case bits
   =uv=uV=Uv=UV  # 2 chars -> 1 CE
 &ae=ch=cH=Ch=CH  # 2 chars -> 2 CEs
 &rst=yz=yZ=Yz=YZ  # 2 chars -> 3 CEs
 % caseFirst=lower
 * compare
 <1 ae
 =  ch
 <3 cH
 <3 Ch
 <3 CH
 <1 rst
 =  yz
 <3 yZ
 <3 Yz
 <3 YZ
 <1 w
 <1 x
 =  uv
 <3 uV
 =  Uv  # mixed case on single CE cannot distinguish variations
 <3 UV

 ** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=lower
 @ rules
 &\u0001<<<t<<<T  # tertiary CEs
 % caseFirst=lower
 * compare
 <1 aa
 <3 aat
 <3 aaT
 <3 aA
 <3 aAt
 <3 ata
 <3 aTa

 ** test: tertiary CEs, tertiary, caseLevel=off, caseFirst=upper
 % caseFirst=upper
 * compare
 <1 aA
 <3 aAt
 <3 aa
 <3 aat
 <3 aaT
 <3 ata
 <3 aTa

 ** test: reset on expansion, ICU tickets 9415 & 9593
 @ rules
 &æ<x    # tailor the last primary CE so that x sorts between ae and af
 &æb=bæ  # copy all reset CEs to make bæ sort the same
 &각<h    # copy/tailor 3 CEs to make h sort before the next Hangul syllable 갂
 &⒀<<y   # copy/tailor 4 CEs to make y sort with only a secondary difference
 &l·=z   # handle the pre-context for · when fetching reset CEs
    <<u  # copy/tailor 2 CEs

 * compare
 <1 ae
 <2 æ
 <1 x
 <1 af

 * compare
 <1 aeb
 <2 æb
 =  bæ

 * compare
 <1 각
 <1 h
 <1 갂
 <1 갃

 * compare
 <1 ·    # by itself: primary CE
 <1 l
 <2 l·   # l+middle dot has only a secondary difference from l
 =  z
 <2 u

 * compare
 <1 (13)
 <3 ⒀  # DUCET sets special tertiary weights in all CEs
 <2 y
 <1 (13[

 % alternate=shifted
 * compare
 <1 (13)
 =  13
 <3 ⒀
 =  y  # alternate=shifted removes the tailoring difference on the last CE
 <1 14

 ** test: contraction inside extension, ICU ticket 9378
 @ rules
 &а<<х/й     # all letters are Cyrillic
 * compare
 <1 ай
 <2 х

 ** test: no duplicate tailored CEs for different reset positions with same CEs, ICU ticket 10104
 @ rules
 &t<x &ᵀ<y           # same primary weights
 &q<u &[before 1]ꝗ<v # q and ꝗ are primary adjacent
 * compare
 <1 q
 <1 u
 <1 v
 <1 ꝗ
 <1 t
 <3 ᵀ
 <1 y
 <1 x

 # Principle: Each rule builds on the state of preceding rules and ignores following rules.

 ** test: later rule does not affect earlier reset position, ICU ticket 10105
 @ rules
 &a < u < v < w  &ov < x  &b < v
 * compare
 <1 oa
 <1 ou
 <1 x    # CE(o) followed by CE between u and w
 <1 ow
 <1 ob
 <1 ov

 ** test: later rule does not affect earlier extension (1), ICU ticket 10105
 @ rules
 &a=x/b &v=b
 % strength=secondary
 * compare
 <1 B
 <1 c
 <1 v
 =  b
 * compare
 <1 AB
 =  x
 <1 ac
 <1 av
 =  ab

 ** test: later rule does not affect earlier extension (2), ICU ticket 10105
 @ rules
 &a <<< c / e &g <<< e / l
 % strength=secondary
 * compare
 <1 AE
 =  c
 <2 æ
 <1 agl
 =  ae

 ** test: later rule does not affect earlier extension (3), ICU ticket 10105
 @ rules
 &a = b / c  &d = c / e
 % strength=secondary
 * compare
 <1 AC  # C is still only tertiary different from the original c
 =  b
 <1 ade
 =  ac

 ** test: extension contains tailored character, ICU ticket 10105
 @ rules
 &a=e &b=u/e
 * compare
 <1 a
 =  e
 <1 ba
 =  be
 =  u

 ** test: add simple mappings for characters with root context
 @ rules
 &z=·    # middle dot has a prefix mapping in the CLDR root
 &n=и    # и (U+0438) has contractions in the root
 * compare
 <1 l
 <2 l·   # root mapping for l|· still works
 <1 z
 =  ·
 * compare
 <1 n
 =  и
 <1 И
 <1 и\u0306  # root mapping for й=и\u0306 still works
 =  й
 <3 Й

 ** test: add context mappings around characters with root context
 @ rules
 &z=·h   # middle dot has a prefix mapping in the CLDR root
 &n=ә|и  # и (U+0438) has contractions in the root
 * compare
 <1 l
 <2 l·   # root mapping for l|· still works
 <1 z
 =  ·h
 * compare
 <1 и
 <3 И
 <1 и\u0306  # root mapping for й=и\u0306 still works
 =  й
 * compare
 <1 әn
 =  әи
 <1 әo

 ** test: many secondary CEs at the top of their range
 @ rules
 &[last primary ignorable]<<*\u2801-\u28ff
 * compare
 <2 \u0308
 <2 \u2801
 <2 \u2802
 <2 \u2803
 <2 \u2804
 <2 \u28fd
 <2 \u28fe
 <2 \u28ff
 <1 \x20

 ** test: many tertiary CEs at the top of their range
 @ rules
 &[last secondary ignorable]<<<*a-z
 * compare
 <3 a
 <3 b
 <3 c
 <3 d
 # e..w
 <3 x
 <3 y
 <3 z
 <2 \u0308

 ** test: tailor contraction together with nearly equivalent prefix, ICU ticket 10101
 @ rules
 &a=p|x &b=px &c=op
 * compare
 <1 b
 =  px
 <3 B
 <1 c
 =  op
 <3 C
 * compare
 <1 ca
 =  opx  # first contraction op, then prefix p|x
 <3 cA
 <3 Ca

 ** test: reset position with prefix (pre-context), ICU ticket 10102
 @ rules
 &a=p|x &px=y
 * compare
 <1 pa
 =  px
 =  y
 <3 pA
 <1 q
 <1 x

 ** test: prefix+contraction together (1), ICU ticket 10071
 @ rules
 &x=a|bc
 * compare
 <1 ab
 <1 Abc
 <1 abd
 <1 ac
 <1 aw
 <1 ax
 =  abc
 <3 aX
 <3 Ax
 <1 b
 <1 bb
 <1 bc
 <3 bC
 <3 Bc
 <1 bd

 ** test: prefix+contraction together (2), ICU ticket 10071
 @ rules
 &w=bc &x=a|b
 * compare
 <1 w
 =  bc
 <3 W
 * compare
 <1 aw
 <1 ax
 =  ab
 <3 aX
 <1 axb
 <1 axc
 =  abc  # prefix match a|b takes precedence over contraction match bc
 <3 abC
 <1 abd
 <1 ay

 ** test: prefix+contraction together (3), ICU ticket 10071
 @ rules
 &x=a|b &w=bc    # reverse order of rules as previous test, order should not matter here
 * compare       # same "compare" sequences as previous test
 <1 w
 =  bc
 <3 W
 * compare
 <1 aw
 <1 ax
 =  ab
 <3 aX
 <1 axb
 <1 axc
 =  abc  # prefix match a|b takes precedence over contraction match bc
 <3 abC
 <1 abd
 <1 ay

 ** test: no mapping p|c, falls back to contraction ch, CLDR ticket 5962
 @ rules
 &d=ch &v=p|ci
 * compare
 <1 pc
 <3 pC
 <1 pcH
 <1 pcI
 <1 pd
 =  pch  # no-prefix contraction ch matches
 <3 pD
 <1 pv
 =  pci  # prefix+contraction p|ci matches
 <3 pV

 ** test: tailor in & around compact ranges of root primaries
 # The Ogham characters U+1681..U+169A are in simple ascending order of primary CEs
 # which should be reliably encoded as one range in the root elements data.
 @ rules
 &[before 1]ᚁ<a
 &ᚁ<b
 &[before 1]ᚂ<c
 &ᚂ<d
 &[before 1]ᚚ<y
 &ᚚ<z
 &[before 2]ᚁ<<r
 &ᚁ<<s
 &[before 3]ᚚ<<<t
 &ᚚ<<<u
 * compare
 <1 ᣵ    # U+18F5 last Canadian Aboriginal
 <1 a
 <1 r
 <2 ᚁ
 <2 s
 <1 b
 <1 c
 <1 ᚂ
 <1 d
 <1 ᚃ
 <1 ᚙ
 <1 y
 <1 t
 <3 ᚚ
 <3 u
 <1 z
 <1 ᚠ    # U+16A0 first Runic

 ** test: suppressContractions
 @ rules
 &z<ch<әж [suppressContractions [·cә]]
 * compare
 <1 ch
 <3 cH   # ch was suppressed
 <1 l
 <1 l·   # primary difference, not secondary, because l|· was suppressed
 <1 ә
 <2 ә\u0308  # secondary difference, not primary, because contractions for ә were suppressed
 <1 әж
 <3 әЖ

 ** test: Hangul & Jamo
 @ rules
 &L=\u1100  # first Jamo L
 &V=\u1161  # first Jamo V
 &T=\u11A8  # first Jamo T
 &\uAC01<<*\u4E00-\u4EFF  # first Hangul LVT syllable & lots of secondary diffs
 * compare
 <1 Lv
 <3 LV
 =  \u1100\u1161
 =  \uAC00
 <1 LVt
 <3 LVT
 =  \u1100\u1161\u11A8
 =  \uAC00\u11A8
 =  \uAC01
 <2 LVT\u0308
 <2 \u4E00
 <2 \u4E01
 <2 \u4E80
 <2 \u4EFF
 <2 LV\u0308T
 <1 \uAC02

 ** test: adjust special reset positions according to previous rules, CLDR ticket 6070
 @ rules
 &[last variable]<x
 [maxVariable space]  # has effect only after building, no effect on following rules
 &[last variable]<y
 &[before 1][first regular]<z
 * compare
 <1 ?  # some punctuation
 <1 x
 <1 y
 <1 z
 <1 $  # some symbol

 @ rules
 &[last primary ignorable]<<x<<<y
 &[last primary ignorable]<<z
 * compare
 <2 \u0358
 <2 x
 <3 y
 <2 z
 <1 \x20

 @ rules
 &[last secondary ignorable]<<<x
 &[last secondary ignorable]<<<y
 * compare
 <3 x
 <3 y
 <2 \u0358

 @ rules
 &[before 2][first variable]<<z
 &[before 2][first variable]<<y
 &[before 3][first variable]<<<x
 &[before 3][first variable]<<<w
 &[before 1][first variable]<v
 &[before 2][first variable]<<u
 &[before 3][first variable]<<<t
 &[before 2]\uFDD1\xA0<<s  # FractionalUCA.txt: FDD1 00A0, SPACE first primary
 * compare
 <2 \u0358
 <1 s
 <2 \uFDD1\xA0
 <1 t
 <3 u
 <2 v
 <1 w
 <3 x
 <3 y
 <2 z
 <2 \t

 @ rules
 &[before 2][first regular]<<z
 &[before 3][first regular]<<<y
 &[before 1][first regular]<x
 &[before 3][first regular]<<<w
 &[before 2]\uFDD1\u263A<<v  # FractionalUCA.txt: FDD1 263A, SYMBOL first primary
 &[before 3][first regular]<<<u
 &[before 1][first regular]<p  # primary before the boundary: becomes variable
 &[before 3][first regular]<<<t  # not affected by p
 &[last variable]<q              # after p!
 * compare
 <1 ?
 <1 p
 <1 q
 <1 t
 <3 u
 <3 v
 <1 w
 <3 x
 <1 y
 <3 z
 <1 $

 # check that p & q are indeed variable
 % alternate=shifted
 * compare
 =  ?
 =  p
 =  q
 <1 t
 <3 u
 <3 v
 <1 w
 <3 x
 <1 y
 <3 z
 <1 $

 @ rules
 &[before 2][first trailing]<<z
 &[before 1][first trailing]<y
 &[before 3][first trailing]<<<x
 * compare
 <1 \u4E00  # first Han, first implicit
 <1 \uFDD1\uFDD0  # FractionalUCA.txt: unassigned first primary
 # Note: The root collator currently does not map any characters to the trailing first boundary primary.
 <1 x
 <3 y
 <1 z
 <2 \uFFFD  # The root collator currently maps U+FFFD to the first real trailing primary.

 @ rules
 &[before 2][first primary ignorable]<<z
 &[before 2][first primary ignorable]<<y
 &[before 3][first primary ignorable]<<<x
 &[before 3][first primary ignorable]<<<w
 * compare
 =  \x01
 <2 w
 <3 x
 <3 y
 <2 z
 <2 \u0301

 @ rules
 &[before 3][first secondary ignorable]<<<y
 &[before 3][first secondary ignorable]<<<x
 * compare
 =  \x01
 <3 x
 <3 y
 <2 \u0301

 ** test: canonical closure
 @ rules
 &X=A &U=Â
 * compare
 <1 U
 =  Â
 =  A\u0302
 <2 Ú  # U with acute
 =  U\u0301
 =  Ấ  # A with circumflex & acute
 =  Â\u0301
 =  A\u0302\u0301
 <1 X
 =  A
 <2 X\u030A  # with ring above
 =  Å
 =  A\u030A
 =  \u212B  # Angstrom sign

 @ rules
 &x=\u5140\u55C0
 * compare
 <1 x
 =  \u5140\u55C0
 =  \u5140\uFA0D
 =  \uFA0C\u55C0
 =  \uFA0C\uFA0D  # CJK compatibility characters
 <3 X

 # canonical closure on prefix rules, ICU ticket 9444
 @ rules
 &x=ä|ŝ
 * compare
 <1 äs  # not tailored
 <1 äx
 =  äŝ
 =  a\u0308s\u0302
 =  a\u0308ŝ
 =  äs\u0302
 <3 äX

 ** test: conjoining Jamo map to expansions
 @ rules
 &gg=\u1101  # Jamo Lead consonant GG
 &nj=\u11AC  # Jamo Trail consonant NJ
 * compare
 <1 gg\u1161nj
 =  \u1101\u1161\u11AC
 =  \uAE4C\u11AC
 =  \uAE51
 <3 gg\u1161nJ
 <1 \u1100\u1100

 ** test: canonical tail closure, ICU ticket 5913
 @ rules
 &a<â
 * compare
 <1 a
 <1 â              # tailored
 =  a\u0302
 <2 a\u0323\u0302  # discontiguous contraction
 =  ạ\u0302        # equivalent
 =  ậ              # equivalent
 <1 b

 @ rules
 &a<ạ
 * compare
 <1 a
 <1 ạ              # tailored
 =  a\u0323
 <2 a\u0323\u0302  # contiguous contraction plus extra diacritic
 =  ạ\u0302        # equivalent
 =  ậ              # equivalent
 <1 b

 # Tail closure should work even if there is a prefix and/or contraction.
 @ rules
 &a<\u5140|câ
 # In order to find discontiguous contractions for \u5140|câ
 # there must exist a mapping for \u5140|ca, regardless of what it maps to.
 # (This follows from the UCA spec.)
 &x=\u5140|ca
 * compare
 <1 \u5140a
 =  \uFA0Ca
 <1 \u5140câ              # tailored
 =  \uFA0Ccâ
 =  \u5140ca\u0302
 =  \uFA0Cca\u0302
 <2 \u5140ca\u0323\u0302  # discontiguous contraction
 =  \uFA0Cca\u0323\u0302
 =  \u5140cạ\u0302
 =  \uFA0Ccạ\u0302
 =  \u5140cậ
 =  \uFA0Ccậ
 <1 \u5140b
 =  \uFA0Cb
 <1 \u5140x
 =  \u5140ca

 # Double-check that without the extra mapping there will be no discontiguous match.
 @ rules
 &a<\u5140|câ
 * compare
 <1 \u5140a
 =  \uFA0Ca
 <1 \u5140câ              # tailored
 =  \uFA0Ccâ
 =  \u5140ca\u0302
 =  \uFA0Cca\u0302
 <1 \u5140b
 =  \uFA0Cb
 <1 \u5140ca\u0323\u0302  # no discontiguous contraction
 =  \uFA0Cca\u0323\u0302
 =  \u5140cạ\u0302
 =  \uFA0Ccạ\u0302
 =  \u5140cậ
 =  \uFA0Ccậ

 @ rules
 &a<cạ
 * compare
 <1 a
 <1 cạ              # tailored
 =  ca\u0323
 <2 ca\u0323\u0302  # contiguous contraction plus extra diacritic
 =  cạ\u0302        # equivalent
 =  cậ              # equivalent
 <1 b

 # ᾢ = U+1FA2 GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
 #   = 03C9 0313 0300 0345
 # ccc = 0, 230, 230, 240
 @ rules
 &δ=αῳ
 # In order to find discontiguous contractions for αῳ
 # there must exist a mapping for αω, regardless of what it maps to.
 # (This follows from the UCA spec.)
 &ε=αω
 * compare
 <1 δ
 =  αῳ
 =  αω\u0345
 <2 αω\u0313\u0300\u0345  # discontiguous contraction
 =  αὠ\u0300\u0345
 =  αὢ\u0345
 =  αᾢ
 <2 αω\u0300\u0313\u0345
 =  αὼ\u0313\u0345
 =  αῲ\u0313  # not FCD
 <1 ε
 =  αω

 # Double-check that without the extra mapping there will be no discontiguous match.
 @ rules
 &δ=αῳ
 * compare
 <1 αω\u0313\u0300\u0345  # no discontiguous contraction
 =  αὠ\u0300\u0345
 =  αὢ\u0345
 =  αᾢ
 <2 αω\u0300\u0313\u0345
 =  αὼ\u0313\u0345
 =  αῲ\u0313  # not FCD
 <1 δ
 =  αῳ
 =  αω\u0345

 # Add U+0315 COMBINING COMMA ABOVE RIGHT which has ccc=232.
 # Tests code paths where the tailored string has a combining mark
 # that does not occur in any composite's decomposition.
 @ rules
 &δ=αὼ\u0315
 * compare
 <1 αω\u0313\u0300\u0315  # Not tailored: The grave accent blocks the comma above.
 =  αὠ\u0300\u0315
 =  αὢ\u0315
 <1 δ
 =  αὼ\u0315
 =  αω\u0300\u0315
 <2 αω\u0300\u0315\u0345
 =  αὼ\u0315\u0345
 =  αῲ\u0315  # not FCD

 ** test: danish a+a vs. a-umlaut, ICU ticket 9319
 @ rules
 &z<aa
 * compare
 <1 z
 <1 aa
 <2 aa\u0308
 =  aä

 ** test: Jamo L with and in prefix
 # Useful for the Korean "searchjl" tailoring (instead of contractions of pairs of Jamo L).
 @ rules
 # Jamo Lead consonant G after G or GG
 &[last primary ignorable]<<\u1100|\u1100=\u1101|\u1100
 # Jamo Lead consonant GG sorts like G+G
 &\u1100\u1100=\u1101
 # Note: Making G|GG and GG|GG sort the same as G|G+G
 # would require the ability to reset on G|G+G,
 # or we could make G-after-G equal to some secondary-CE character,
 # and reset on a pair of those.
 # (It does not matter much if there are at most two G in a row in real text.)
 * compare
 <1 \u1100
 <2 \u1100\u1100  # only one primary from a sequence of G lead consonants
 =  \u1101
 <2 \u1100\u1100\u1100
 =  \u1101\u1100
 # but not = \u1100\u1101, see above
 <1 \u1100\u1161
 =  \uAC00
 <2 \u1100\u1100\u1161
 =  \u1100\uAC00  # prefix match from the L of the LV syllable
 =  \u1101\u1161
 =  \uAE4C

 ** test: proposed Korean "searchjl" tailoring with prefixes, CLDR ticket 6546
 @ rules
 # Low secondary CEs for Jamo V & T.
 # Note: T should sort before V for proper syllable order.
 &\u0332  # COMBINING LOW LINE (first primary ignorable)
 <<\u1161<<\u1162

 # Korean Jamo lead consonant search rules, part 2:
 # Make modern compound L jamo primary equivalent to non-compound forms.

 # Secondary CEs for Jamo L-after-L, greater than Jamo V & T.
 &\u0313  # COMBINING COMMA ABOVE (second primary ignorable)
 =\u1100|\u1100
 =\u1103|\u1103
 =\u1107|\u1107
 =\u1109|\u1109
 =\u110C|\u110C

 # Compound L Jamo map to equivalent expansions of primary+secondary CE.
 &\u1100\u0313=\u1101<<<\u3132  # HANGUL CHOSEONG SSANGKIYEOK, HANGUL LETTER SSANGKIYEOK
 &\u1103\u0313=\u1104<<<\u3138  # HANGUL CHOSEONG SSANGTIKEUT, HANGUL LETTER SSANGTIKEUT
 &\u1107\u0313=\u1108<<<\u3143  # HANGUL CHOSEONG SSANGPIEUP, HANGUL LETTER SSANGPIEUP
 &\u1109\u0313=\u110A<<<\u3146  # HANGUL CHOSEONG SSANGSIOS, HANGUL LETTER SSANGSIOS
 &\u110C\u0313=\u110D<<<\u3149  # HANGUL CHOSEONG SSANGCIEUC, HANGUL LETTER SSANGCIEUC

 * compare
 <1 \u1100\u1161
 =  \uAC00
 <2 \u1100\u1162
 =  \uAC1C
 <2 \u1100\u1100\u1161
 =  \u1100\uAC00
 =  \u1101\u1161
 =  \uAE4C
 <3 \u3132\u1161

 ** test: Hangul syllables in prefix & in the interior of a contraction
 @ rules
 &x=\u1100\u1161|a\u1102\u1162z
 * compare
 <1 \u1100\u1161x
 =  \u1100\u1161a\u1102\u1162z
 =  \u1100\u1161a\uB0B4z
 =  \uAC00a\u1102\u1162z
 =  \uAC00a\uB0B4z

 ** test: digits are unsafe-backwards when numeric=on
 @ root
 % numeric=on
 * compare
 # If digits are not unsafe, then numeric collation sees "1"=="01" and "b">"a".
 # We need to back up before the identical prefix "1" and compare the full numbers.
 <1 11b
 <1 101a

 ** test: simple locale data test
 @ locale de
 * compare
 <1 a
 <2 ä
 <1 ae
 <2 æ

 @ locale de-u-co-phonebk
 * compare
 <1 a
 <1 ae
 <2 ä
 <2 æ

 # The following test cases were moved here from ICU 52's DataDrivenCollationTest.txt.

 ** test: DataDrivenCollationTest/TestMorePinyin
 # Testing the primary strength.
 @ locale zh
 % strength=primary
 * compare
 < lā
 = lĀ
 = Lā
 = LĀ
 < lān
 = lĀn
 < lē
 = lĒ
 = Lē
 = LĒ
 < lēn
 = lĒn

 ** test: DataDrivenCollationTest/TestLithuanian
 # Lithuanian sort order.
 @ locale lt
 * compare
 < cz
 < č
 < d
 < iz
 < j
 < sz
 < š
 < t
 < zz
 < ž

 ** test: DataDrivenCollationTest/TestLatvian
 # Latvian sort order.
 @ locale lv
 * compare
 < cz
 < č
 < d
 < gz
 < ģ
 < h
 < iz
 < j
 < kz
 < ķ
 < l
 < lz
 < ļ
 < m
 < nz
 < ņ
 < o
 < rz
 < ŗ
 < s
 < sz
 < š
 < t
 < zz
 < ž

 ** test: DataDrivenCollationTest/TestEstonian
 # Estonian sort order.
 @ locale et
 * compare
 < sy
 < š
 < šy
 < z
 < zy
 < ž
 < v
 < va
 < w
 < õ
 < õy
 < ä
 < äy
 < ö
 < öy
 < ü
 < üy
 < x

 ** test: DataDrivenCollationTest/TestAlbanian
 # Albanian sort order.
 @ locale sq
 * compare
 < cz
 < ç
 < d
 < dz
 < dh
 < e
 < ez
 < ë
 < f
 < gz
 < gj
 < h
 < lz
 < ll
 < m
 < nz
 < nj
 < o
 < rz
 < rr
 < s
 < sz
 < sh
 < t
 < tz
 < th
 < u
 < xz
 < xh
 < y
 < zz
 < zh

 ** test: DataDrivenCollationTest/TestSimplifiedChineseOrder
 # Sorted file has different order.
 @ root
 # normalization=on turned on & off automatically.
 * compare
 < \u5F20
 < \u5F20\u4E00\u8E3F

 ** test: DataDrivenCollationTest/TestTibetanNormalizedIterativeCrash
 # This pretty much crashes.
 @ root
 * compare
 < \u0f71\u0f72\u0f80\u0f71\u0f72
 < \u0f80

 ** test: DataDrivenCollationTest/TestThaiPartialSortKeyProblems
 # These are examples of strings that caused trouble in partial sort key testing.
 @ locale th-TH
 * compare
 < \u0E01\u0E01\u0E38\u0E18\u0E20\u0E31\u0E13\u0E11\u0E4C
 < \u0E01\u0E01\u0E38\u0E2A\u0E31\u0E19\u0E42\u0E18
 * compare
 < \u0E01\u0E07\u0E01\u0E32\u0E23
 < \u0E01\u0E07\u0E42\u0E01\u0E49
 * compare
 < \u0E01\u0E23\u0E19\u0E17\u0E32
 < \u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32
 * compare
 < \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E22\u0E27
 < \u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E4A\u0E22\u0E27
 * compare
 < \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D
 < \u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32

 ** test: DataDrivenCollationTest/TestJavaStyleRule
 # java.text allows rules to start as '<<<x<<<y...'
 # we emulate this by assuming a &[first tertiary ignorable] in this case.
 @ rules
 &\u0001=equal<<<z<<x<<<w &[first tertiary ignorable]=a &[first primary ignorable]=b
 * compare
 = a
 = equal
 < z
 < x
 = b  # x had become the new first primary ignorable
 < w

 ** test: DataDrivenCollationTest/TestShiftedIgnorable
 # The UCA states that primary ignorables should be completely
 # ignorable when following a shifted code point.
 @ root
 % alternate=shifted
 % strength=quaternary
 * compare
 < a\u0020b
 = a\u0020\u0300b
 = a\u0020\u0301b
 < a_b
 = a_\u0300b
 = a_\u0301b
 < A\u0020b
 = A\u0020\u0300b
 = A\u0020\u0301b
 < A_b
 = A_\u0300b
 = A_\u0301b
 < a\u0301b
 < A\u0301b
 < a\u0300b
 < A\u0300b

 ** test: DataDrivenCollationTest/TestNShiftedIgnorable
 # The UCA states that primary ignorables should be completely
 # ignorable when following a shifted code point.
 @ root
 % alternate=non-ignorable
 % strength=tertiary
 * compare
 < a\u0020b
 < A\u0020b
 < a\u0020\u0301b
 < A\u0020\u0301b
 < a\u0020\u0300b
 < A\u0020\u0300b
 < a_b
 < A_b
 < a_\u0301b
 < A_\u0301b
 < a_\u0300b
 < A_\u0300b
 < a\u0301b
 < A\u0301b
 < a\u0300b
 < A\u0300b

 ** test: DataDrivenCollationTest/TestSafeSurrogates
 # It turned out that surrogates were not skipped properly
 # when iterating backwards if they were in the middle of a
 # contraction. This test assures that this is fixed.
 @ rules
 &a < x\ud800\udc00b
 * compare
 < a
 < x\ud800\udc00b

 ** test: DataDrivenCollationTest/da_TestPrimary
 # This test goes through primary strength cases
 @ locale da
 % strength=primary
 * compare
 < Lvi
 < Lwi
 * compare
 < L\u00e4vi
 < L\u00f6wi
 * compare
 < L\u00fcbeck
 = Lybeck

 ** test: DataDrivenCollationTest/da_TestTertiary
 # This test goes through tertiary strength cases
 @ locale da
 % strength=tertiary
 * compare
 < Luc
 < luck
 * compare
 < luck
 < L\u00fcbeck
 * compare
 < lybeck
 < L\u00fcbeck
 * compare
 < L\u00e4vi
 < L\u00f6we
 * compare
 < L\u00f6ww
 < mast

 * compare
 < A/S
 < ANDRE
 < ANDR\u00c9
 < ANDREAS
 < AS
 < CA
 < \u00c7A
 < CB
 < \u00c7C
 < D.S.B.
 < DA
 < \u00d0A
 < DB
 < \u00d0C
 < DSB
 < DSC
 < EKSTRA_ARBEJDE
 < EKSTRABUD0
 < H\u00d8ST
 < HAAG
 < H\u00c5NDBOG
 < HAANDV\u00c6RKSBANKEN
 < Karl
 < karl
 < NIELS\u0020J\u00d8RGEN
 < NIELS-J\u00d8RGEN
 < NIELSEN
 < R\u00c9E,\u0020A
 < REE,\u0020B
 < R\u00c9E,\u0020L
 < REE,\u0020V
 < SCHYTT,\u0020B
 < SCHYTT,\u0020H
 < SCH\u00dcTT,\u0020H
 < SCHYTT,\u0020L
 < SCH\u00dcTT,\u0020M
 < SS
 < \u00df
 < SSA
 < STORE\u0020VILDMOSE
 < STOREK\u00c6R0
 < STORM\u0020PETERSEN
 < STORMLY
 < THORVALD
 < THORVARDUR
 < \u00feORVAR\u00d0UR
 < THYGESEN
 < VESTERG\u00c5RD,\u0020A
 < VESTERGAARD,\u0020A
 < VESTERG\u00c5RD,\u0020B
 < \u00c6BLE
 < \u00c4BLE
 < \u00d8BERG
 < \u00d6BERG

 * compare
 < andere
 < chaque
 < chemin
 < cote
 < cot\u00e9
 < c\u00f4te
 < c\u00f4t\u00e9
 < \u010du\u010d\u0113t
 < Czech
 < hi\u0161a
 < irdisch
 < lie
 < lire
 < llama
 < l\u00f5ug
 < l\u00f2za
 < lu\u010d
 < luck
 < L\u00fcbeck
 < lye
 < l\u00e4vi
 < L\u00f6wen
 < m\u00e0\u0161ta
 < m\u00eer
 < myndig
 < M\u00e4nner
 < m\u00f6chten
 < pi\u00f1a
 < pint
 < pylon
 < \u0161\u00e0ran
 < savoir
 < \u0160erb\u016bra
 < Sietla
 < \u015blub
 < subtle
 < symbol
 < s\u00e4mtlich
 < verkehrt
 < vox
 < v\u00e4ga
 < waffle
 < wood
 < yen
 < yuan
 < yucca
 < \u017eal
 < \u017eena
 < \u017den\u0113va
 < zoo0
 < Zviedrija
 < Z\u00fcrich
 < zysk0
 < \u00e4ndere

 ** test: DataDrivenCollationTest/hi_TestNewRules
 # This test goes through new rules and tests against old rules
 @ locale hi
 * compare
 < कॐ
 < कं
 < कँ
 < कः

 ** test: DataDrivenCollationTest/ro_TestNewRules
 # This test goes through new rules and tests against old rules
 @ locale ro
 * compare
 < xAx
 < xă
 < xĂ
 < Xă
 < XĂ
 < xăx
 < xĂx
 < xâ
 < xÂ
 < Xâ
 < XÂ
 < xâx
 < xÂx
 < xb
 < xIx
 < xî
 < xÎ
 < Xî
 < XÎ
 < xîx
 < xÎx
 < xj
 < xSx
 < xș
 = xş
 < xȘ
 = xŞ
 < Xș
 = Xş
 < XȘ
 = XŞ
 < xșx
 = xşx
 < xȘx
 = xŞx
 < xT
 < xTx
 < xț
 = xţ
 < xȚ
 = xŢ
 < Xț
 = Xţ
 < XȚ
 = XŢ
 < xțx
 = xţx
 < xȚx
 = xŢx
 < xU

 ** test: DataDrivenCollationTest/testOffsets
 # This tests cases where forwards and backwards iteration get different offsets
 @ locale en
 % strength=tertiary
 * compare
 < a\uD800\uDC00\uDC00
 < b\uD800\uDC00\uDC00
 * compare
 < \u0301A\u0301\u0301
 < \u0301B\u0301\u0301
 * compare
 < abcd\r\u0301
 < abce\r\u0301
 # TODO: test offsets in new CollationTest

 # End of test cases moved here from ICU 52's DataDrivenCollationTest.txt.

 ** test: was ICU 52 cmsccoll/TestRedundantRules
 @ rules
 & a < b < c < d& [before 1] c < m
 * compare
 <1 a
 <1 b
 <1 m
 <1 c
 <1 d

 @ rules
 & a < b <<< c << d <<< e& [before 3] e <<< x
 * compare
 <1 a
 <1 b
 <3 c
 <2 d
 <3 x
 <3 e

 @ rules
 & a < b <<< c << d <<< e <<< f < g& [before 1] g < x
 * compare
 <1 a
 <1 b
 <3 c
 <2 d
 <3 e
 <3 f
 <1 x
 <1 g

 @ rules
 & a <<< b << c < d& a < m
 * compare
 <1 a
 <3 b
 <2 c
 <1 m
 <1 d

 @ rules
 &a<b<<b\u0301 &z<b
 * compare
 <1 a
 <1 b\u0301
 <1 z
 <1 b

 @ rules
 &z<m<<<q<<<m
 * compare
 <1 z
 <1 q
 <3 m

 @ rules
 &z<<<m<q<<<m
 * compare
 <1 z
 <1 q
 <3 m

 @ rules
 & a < b < c < d& r < c
 * compare
 <1 a
 <1 b
 <1 d
 <1 r
 <1 c

 @ rules
 & a < b < c < d& c < m
 * compare
 <1 a
 <1 b
 <1 c
 <1 m
 <1 d

 @ rules
 & a < b < c < d& a < m
 * compare
 <1 a
 <1 m
 <1 b
 <1 c
 <1 d

 ** test: was ICU 52 cmsccoll/TestExpansionSyntax
 # The following two rules should sort the particular list of strings the same.
 @ rules
 &AE <<< a << b <<< c &d <<< f
 * compare
 <1 AE
 <3 a
 <2 b
 <3 c
 <1 d
 <3 f

 @ rules
 &A <<< a / E << b / E <<< c /E  &d <<< f
 * compare
 <1 AE
 <3 a
 <2 b
 <3 c
 <1 d
 <3 f

 # The following two rules should sort the particular list of strings the same.
 @ rules
 &AE <<< a <<< b << c << d < e < f <<< g
 * compare
 <1 AE
 <3 a
 <3 b
 <2 c
 <2 d
 <1 e
 <1 f
 <3 g

 @ rules
 &A <<< a / E <<< b / E << c / E << d / E < e < f <<< g
 * compare
 <1 AE
 <3 a
 <3 b
 <2 c
 <2 d
 <1 e
 <1 f
 <3 g

 # The following two rules should sort the particular list of strings the same.
 @ rules
 &AE <<< B <<< C / D <<< F
 * compare
 <1 AE
 <3 B
 <3 F
 <1 AED
 <3 C

 @ rules
 &A <<< B / E <<< C / ED <<< F / E
 * compare
 <1 AE
 <3 B
 <3 F
 <1 AED
 <3 C

 ** test: never reorder trailing primaries
 @ root
 % reorder Zzzz Grek
 * compare
 <1 L
 <1 字
 <1 Ω
 <1 \uFFFD
 <1 \uFFFF

 ** test: fall back to mappings with shorter prefixes, not immediately to ones with no prefixes
 @ rules
 &u=ab|cd
 &v=b|ce
 * compare
 <1 abc
 <1 abcc
 <1 abcf
 <1 abcd
 =  abu
 <1 abce
 =  abv

 # With the following rules, there is only one prefix per composite ĉ or ç,
 # but both prefixes apply to just c in NFD form.
 # We would get different results for composed vs. NFD input
 # if we fell back directly from longest-prefix mappings to no-prefix mappings.
 @ rules
 &x=op|ĉ
 &y=p|ç
 * compare
 <1 opc
 <2 opć
 <1 opcz
 <1 opd
 <1 opĉ
 =  opc\u0302
 =  opx
 <1 opç
 =  opc\u0327
 =  opy

 # The mapping is used which has the longest matching prefix for which
 # there is also a suffix match, with the longest suffix match among several for that prefix.
 @ rules
 &❶=d
 &❷=de
 &❸=def
 &①=c|d
 &②=c|de
 &③=c|def
 &④=bc|d
 &⑤=bc|de
 &⑥=bc|def
 &⑦=abc|d
 &⑧=abc|de
 &⑨=abc|def
 * compare
 <1 9aadzz
 =  9aa❶zz
 <1 9aadez
 =  9aa❷z
 <1 9aadef
 =  9aa❸
 <1 9acdzz
 =  9ac①zz
 <1 9acdez
 =  9ac②z
 <1 9acdef
 =  9ac③
 <1 9bcdzz
 =  9bc④zz
 <1 9bcdez
 =  9bc⑤z
 <1 9bcdef
 =  9bc⑥
 <1 abcdzz
 =  abc⑦zz
 <1 abcdez
 =  abc⑧z
 <1 abcdef
 =  abc⑨

 ** test: prefix + discontiguous contraction with missing prefix contraction
 # Unfortunate terminology: The first "prefix" here is the pre-context,
 # the second "prefix" refers to the contraction/relation string that is
 # one shorter than the one being tested.
 @ rules
 &x=p|e
 &y=p|ê
 &z=op|ê
 # No mapping for op|e:
 # Discontiguous contraction matching should not match op|ê in opệ
 # because it would have to skip the dot below and extend a match on op|e by the circumflex,
 # but there is no match on op|e.
 * compare
 <1 oPe
 <1 ope
 =  opx
 <1 opệ
 =  opy\u0323  # y not z
 <1 opê
 =  opz

 # We cannot test for fallback by whether the contraction default CE32
 # is for another contraction. With the following rules, there is no mapping for op|e,
 # and the fallback to prefix p has no contractions.
 @ rules
 &x=p|e
 &z=op|ê
 * compare
 <1 oPe
 <1 ope
 =  opx
 <2 opệ
 =  opx\u0323\u0302  # x not z
 <1 opê
 =  opz

 # One more variation: Fallback to the simple code point, no shorter non-empty prefix.
 @ rules
 &x=e
 &z=op|ê
 * compare
 <1 ope
 =  opx
 <3 oPe
 =  oPx
 <2 opệ
 =  opx\u0323\u0302  # x not z
 <1 opê
 =  opz

 ** test: maxVariable via rules
 @ rules
 [maxVariable space][alternate shifted]
 * compare
 =  \u0020
 =  \u000A
 <1 .
 <1 °  # degree sign
 <1 $
 <1 0

 ** test: maxVariable via setting
 @ root
 % maxVariable=currency
 % alternate=shifted
 * compare
 =  \u0020
 =  \u000A
 =  .
 =  °  # degree sign
 =  $
 <1 0

 ** test: ICU4J CollationMiscTest/TestContractionClosure (ää)
 # This tests canonical closure, but it also tests that CollationFastLatin
 # bails out properly for contractions with combining marks.
 # For that we need pairs of strings that remain in the Latin fastpath
 # long enough, hence the extra "= b" lines.
 @ rules
 &b=\u00e4\u00e4
 * compare
 <1 b
 =  \u00e4\u00e4
 =  b
 =  a\u0308a\u0308
 =  b
 =  \u00e4a\u0308
 =  b
 =  a\u0308\u00e4

 ** test: ICU4J CollationMiscTest/TestContractionClosure (Å)
 @ rules
 &b=\u00C5
 * compare
 <1 b
 =  \u00C5
 =  b
 =  A\u030A
 =  b
 =  \u212B

 ** test: reset-before on already-tailored characters, ICU ticket 10108
 @ rules
 &a<w<<x &[before 2]x<<y
 * compare
 <1 a
 <1 w
 <2 y
 <2 x

 @ rules
 &a<<w<<<x &[before 2]x<<y
 * compare
 <1 a
 <2 y
 <2 w
 <3 x

 @ rules
 &a<w<x &[before 2]x<<y
 * compare
 <1 a
 <1 w
 <1 y
 <2 x

 @ rules
 &a<w<<<x &[before 2]x<<y
 * compare
 <1 a
 <1 y
 <2 w
 <3 x

 ** test: numeric collation with other settings, ICU ticket 9092
 @ root
 % strength=identical
 % caseFirst=upper
 % numeric=on
 * compare
 <1 100\u0020a
 <1 101

 ** test: collation type fallback from unsupported type, ICU ticket 10149
 @ locale fr-CA-u-co-phonebk
 # Expect the same result as with fr-CA, using backwards-secondary order.
 # That is, we should fall back from the unsupported collation type
 # to the locale's default collation type.
 * compare
 <1 cote
 <2 côte
 <2 coté
 <2 côté

 ** test: @ is equivalent to [backwards 2], ICU ticket 9956
 @ rules
 &b<a @ &v<<w
 * compare
 <1 b
 <1 a
 <1 cote
 <2 côte
 <2 coté
 <2 côté
 <1 v
 <2 w
 <1 x

 ** test: shifted+reordering, ICU ticket 9507
 @ root
 % reorder Grek punct space
 % alternate=shifted
 % strength=quaternary
 # Which primaries are "variable" should be determined without script reordering,
 # and then primaries should be reordered whether they are shifted to quaternary or not.
 * compare
 <4 (  # punctuation
 <4 )
 <4 \u0020  # space
 <1 `  # symbol
 <1 ^
 <1 $  # currency symbol
 <1 €
 <1 0  # numbers
 <1 ε  # Greek
 <1 e  # Latin
 <1 e(e
 <4 e)e
 <4 e\u0020e
 <4 ee
 <3 e(E
 <4 e)E
 <4 e\u0020E
 <4 eE

 ** test: "uppercase first" could sort a string before its prefix, ICU ticket 9351
 @ rules
 &\u0001<<<b<<<B
 % caseFirst=upper
 * compare
 <1 aaa
 <3 aaaB

 ** test: secondary+case ignores secondary ignorables, ICU ticket 9355
 @ rules
 &\u0001<<<b<<<B
 % strength=secondary
 % caseLevel=on
 * compare
 <1 a
 =  ab
 =  aB

 ** test: custom collation rules involving tail of a contraction in Malayalam, ICU ticket 6328
 @ rules
 &[before 2] ൌ << ൗ  # U+0D57 << U+0D4C == 0D46+0D57
 * compare
 <1 ൗx
 <2 ൌx
 <1 ൗy
 <2 ൌy

 ** test: quoted apostrophe in compact syntax, ICU ticket 8204
 @ rules
 &q<<*a''c
 * compare
 <1 d
 <1 p
 <1 q
 <2 a
 <2 \u0027
 <2 c
 <1 r

 # ICU ticket #8260 "Support all collation-related keywords in Collator.getInstance()"
 ** test: locale -u- with collation keywords, ICU ticket 8260
 @ locale de-u-kv-sPace-ka-shifTed-kn-kk-falsE-kf-Upper-kc-tRue-ks-leVel4
 * compare
 <4 \u0020  # space is shifted, strength=quaternary
 <1 !  # punctuation is regular
 <1 2
 <1 12  # numeric sorting
 <1 B
 <c b  # uppercase first on case level
 <1 x\u0301\u0308
 <2 x\u0308\u0301  # normalization off

 ** test: locale @ with collation keywords, ICU ticket 8260
 @ locale fr@colbAckwards=yes;ColStrength=Quaternary;kv=currencY;colalternate=shifted
 * compare
 <4 $  # currency symbols are shifted, strength=quaternary
 <1 àla
 <2 alà  # backwards secondary level

 ** test: locale -u- with script reordering, ICU ticket 8260
 @ locale el-u-kr-kana-SYMBOL-Grek-hani-cyrl-latn-digit-armn-deva-ethi-thai
 * compare
 <1 \u0020
 <1 あ
 <1 ☂
 <1 Ω
 <1 丂
 <1 ж
 <1 L
 <1 4
 <1 Ձ
 <1 अ
 <1 ሄ
 <1 ฉ

 ** test: locale @collation=type should be case-insensitive
 @ locale de@coLLation=PhoneBook
 * compare
 <1 ae
 <2 ä
 <3 Ä

 ** test: import root search rules plus German phonebook rules, ICU ticket 8962
 @ locale de-u-co-search
 * compare
 <1 =
 <1 ≠
 <1 a
 <1 ae
 <2 ä

 # Once more, but with runtime builder.
 @ rules
 [import und-u-co-search][import de-u-co-phonebk]
 * compare
 <1 =
 <1 ≠
 <1 a
 <1 ae
 <2 ä

 # Once again, with import from "root" not "und" (as in a proper language tag).
 @ rules
 [import root-u-co-search][import de-u-co-phonebk]
 * compare
 <1 =
 <1 ≠
 <1 a
 <1 ae
 <2 ä

 ** test: import rules from a language with non-Latin native script, and reset the reordering, ICU ticket 10998
 # Greek should sort Greek first.
 @ rules
 [import el]
 * compare
 <1 4
 <1 Ω
 <1 L

 # Import Greek, and then reset the reordering.
 @ rules
 [import el][reorder Zzzz]
 * compare
 <1 4
 <1 L
 <1 Ω

 # "others" is a synonym for Zzzz.
 @ rules
 [import el][reorder others]
 * compare
 <1 4
 <1 L
 <1 Ω

 ** test: regression test for CollationFastLatinBuilder, ICU ticket 11388
 @ rules
 &x<<aa<<<Aa<<<AA
 % strength=secondary
 * compare
 <1 AA
 <2 Aẩ
 <2 aą
 * compare
 <1 AA
 <2 aą

 ** test: tailor tertiary-after a common tertiary where there is a lower one
 # Assume that Hiragana small A has a below-common tertiary, and Hiragana A has a common one.
 # See ICU ticket 11448 & CLDR ticket 7222.
 @ rules
 &あ<<<x<<<y<<<z
 * compare
 <1 ぁ
 <3 あ
 <3 x
 <3 y
 <3 z
 <3 ァ
 <1 い

 ** test: tailor tertiary-after a below-common tertiary
 @ rules
 &ぁ<<<x<<<y<<<z
 * compare
 <1 ぁ
 <3 x
 <3 y
 <3 z
 <3 あ
 <3 ァ
 <1 い

 ** test: tailor tertiary-before a common tertiary where there is a lower one
 @ rules
 &[before 3]あ<<<x<<<y<<<z
 * compare
 <1 ぁ
 <3 x
 <3 y
 <3 z
 <3 あ
 <3 ァ
 <1 い

 ** test: tailor tertiary-before a below-common tertiary
 @ rules
 &[before 3]ぁ<<<x<<<y<<<z
 * compare
 <1 x
 <3 y
 <3 z
 <3 ぁ
 <3 あ
 <3 ァ
 <1 い

 ** test: reorder single scripts not groups, ICU ticket 11449
 @ root
 % reorder Goth Latn
 * compare
 <1 4
 <1 𐌰  # Gothic
 <1 L
 <1 Ω
 # Before ICU 55, the following reordered together with Gothic.
 <1 𐌈  # Old Italic
 <1 𐑐  # Shavian