| # |
| # Copyright (C) 2002-2013, International Business Machines Corporation and others. |
| # All Rights Reserved. |
| # |
| # file: char.txt |
| # |
| # ICU Character Break Rules, also known as Grapheme Cluster Boundaries |
| # See Unicode Standard Annex #29. |
| # These rules are based on UAX #29 Revision 20 for Unicode Version 6.2 |
| # |
| |
| # |
| # Character Class Definitions. |
| # |
| $CR = [\p{Grapheme_Cluster_Break = CR}]; |
| $LF = [\p{Grapheme_Cluster_Break = LF}]; |
| $Control = [\p{Grapheme_Cluster_Break = Control}]; |
| # TODO: Restore if the Prepend set becomes non-empty again: $Prepend = [\p{Grapheme_Cluster_Break = Prepend}]; |
| $Extend = [\p{Grapheme_Cluster_Break = Extend}]; |
| $SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}]; |
| $Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}]; |
| |
| # |
| # Korean Syllable Definitions |
| # |
| $L = [\p{Grapheme_Cluster_Break = L}]; |
| $V = [\p{Grapheme_Cluster_Break = V}]; |
| $T = [\p{Grapheme_Cluster_Break = T}]; |
| |
| $LV = [\p{Grapheme_Cluster_Break = LV}]; |
| $LVT = [\p{Grapheme_Cluster_Break = LVT}]; |
| |
| |
| ## ------------------------------------------------- |
| !!chain; |
| |
| !!forward; |
| |
| $CR $LF; |
| |
| $L ($L | $V | $LV | $LVT); |
| ($LV | $V) ($V | $T); |
| ($LVT | $T) $T; |
| |
| $Regional_Indicator $Regional_Indicator; |
| |
| [^$Control $CR $LF] $Extend; |
| |
| [^$Control $CR $LF] $SpacingMark; |
| # TODO: Restore if the Prepend set becomes non-empty again: $Prepend [^$Control $CR $LF]; |
| |
| |
| ## ------------------------------------------------- |
| |
| !!reverse; |
| $LF $CR; |
| ($L | $V | $LV | $LVT) $L; |
| ($V | $T) ($LV | $V); |
| $T ($LVT | $T); |
| |
| $Regional_Indicator $Regional_Indicator; |
| |
| $Extend [^$Control $CR $LF]; |
| $SpacingMark [^$Control $CR $LF]; |
| # TODO: Restore if the Prepend set becomes non-empty again: [^$Control $CR $LF] $Prepend; |
| |
| |
| ## ------------------------------------------------- |
| # We don't logically need safe char break rules, but if we don't provide any at all |
| # the engine for preceding() and following() will fall back to the |
| # old style inefficient algorithm. |
| |
| !!safe_reverse; |
| $LF $CR; |
| |
| ## ------------------------------------------------- |
| |
| !!safe_forward; |
| $CR $LF; |
| |