| # © 2016 and later: Unicode, Inc. and others. |
| # License & terms of use: http://www.unicode.org/copyright.html |
| # Generated using tools/cldr/cldr-to-icu/build-icu-data.xml |
| # |
| # File: Arab_Latn.txt |
| # Generated from CLDR |
| # |
| |
| # Generally follows UNGEGN |
| # http://www.eki.ee/wgrs/rom1_ar.pdf |
| # Occasionally deviates in the direction of ISO 233 |
| # http://homepage.mac.com/sirbinks/pdf/Arabic.pdf |
| # a) where required for disambiguation. |
| # b) with underdot instead of cedilla for letter like SAD, |
| # since those are explicitly in Unicode for transliteration. |
| # c) with extra non-Arabic-language letters, like PEH |
| # |
| # Does *not* do assimilation of "al", nor hyphenation. |
| # While it could be done, we need to determine whether a prefix "al" could |
| # occur other than as the definite article (since no space is used). |
| :: [[:Arabic:][:block=ARABIC:][ⁿ،؛؟ـ\u064B-\u0655٠-٬۰-۹﷼ښ]] ; |
| :: NFKD (NFC); |
| $disambig = \u0331 ; |
| $disambig2 = \u0330 ; |
| $under = \u0323 ; |
| $descender = ˌ; |
| $notAbove = [[:^ccc=0:] & [:^ccc=230:]]; |
| # non-letters |
| [:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR |
| [:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR |
| ٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR |
| ٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR |
| # ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate |
| ، ↔ ',' ; # ARABIC COMMA |
| ؛ ↔ ';' ; # ARABIC SEMICOLON |
| ؟ ↔ '?' ; # ARABIC QUESTION MARK |
| ٪ ↔ '%' ; # ARABIC PERCENT SIGN |
| ۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO |
| ۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE |
| ۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO |
| ۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE |
| ۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR |
| ۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE |
| ۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX |
| ۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN |
| ۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT |
| ۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE |
| ٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO |
| ١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE |
| ٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO |
| ٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE |
| ٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR |
| ٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE |
| ٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX |
| ٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN |
| ٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT |
| ٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE |
| # letters |
| # long vowels |
| \u064Eا↔ a\u0304 ; # ARABIC FATHA, ARABIC LETTER ALEF |
| \u064Fو ↔ u\u0304 ; # ARABIC DAMMA, ARABIC LETTER WAW |
| \u0650ي ↔ i\u0304 ; # ARABIC KASRA, ARABIC LETTER YEH |
| # longer items moved here to prevent masking |
| ث ↔ t h $disambig ; # ARABIC LETTER THEH |
| ذ ↔ d h $disambig ; # ARABIC LETTER THAL |
| ش ↔ s h $disambig ; # ARABIC LETTER SHEEN |
| ص ↔ s $under ; # ARABIC LETTER SAD |
| ض ↔ d $under ; # ARABIC LETTER DAD |
| ط ↔ t $under ; # ARABIC LETTER TAH |
| ظ ↔ z $under ; # ARABIC LETTER ZAH |
| غ ↔ g h $disambig ; # ARABIC LETTER GHAIN |
| # WARNING: special case |
| # ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→ |
| # so on the return, we have to skip over (but preserve) the half-ring below (or others like it) |
| # ة\u0655 ← t\u0339\u0308 ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS |
| ة ↔ t \u0308 ; # ARABIC LETTER TEH MARBUTA |
| ة | $1 ← t ($notAbove+) \u0308 ; # ARABIC LETTER TEH MARBUTA |
| # non-Arabic language |
| ژ ↔ z h $disambig ; # ARABIC LETTER JEH |
| ڭ ↔ n $disambig g ; # ARABIC LETTER NG |
| ۋ ↔ v $disambig ; # ARABIC LETTER VE |
| ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH |
| ښ ↔ s $descender; |
| # Arabic language |
| ء ↔ ʾ ; # ARABIC LETTER HAMZA |
| ا ↔ a $under; # ARABIC LETTER ALEF |
| ب ↔ b ; # ARABIC LETTER BEH |
| ت ↔ t ; # ARABIC LETTER TEH |
| ج ↔ j ; # ARABIC LETTER JEEM |
| ح ↔ h $under ; # ARABIC LETTER HAH |
| خ ↔ k h $disambig ; # ARABIC LETTER KHAH |
| د ↔ d ; # ARABIC LETTER DAL |
| ر ↔ r ; # ARABIC LETTER REH |
| ز ↔ z ; # ARABIC LETTER ZAIN |
| س ↔ s ; # ARABIC LETTER SEEN |
| ع ↔ ʿ ; # ARABIC LETTER AIN |
| ـ → ; # ARABIC TATWEEL |
| ف ↔ f ; # ARABIC LETTER FEH |
| ق ↔ q ; # ARABIC LETTER QAF |
| ک ↔ k $disambig ; # ARABIC LETTER KEHEH |
| ك ↔ k ; # ARABIC LETTER KAF |
| ل ↔ l ; # ARABIC LETTER LAM |
| م ↔ m ; # ARABIC LETTER MEEM |
| ن ↔ n ; # ARABIC LETTER NOON |
| ه ↔ h ; # ARABIC LETTER HEH |
| و ↔ w ; # ARABIC LETTER WAW |
| ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA |
| ي ↔ y ; # ARABIC LETTER YEH |
| \u064B ↔ aⁿ ; # ARABIC FATHATAN |
| \u064C ↔ uⁿ ; # ARABIC DAMMATAN |
| \u064D ↔ iⁿ ; # ARABIC KASRATAN |
| \u064E ↔ a ; # ARABIC FATHA |
| \u064F ↔ u ; # ARABIC DAMMA |
| \u0650 ↔ i ; # ARABIC KASRA |
| \u0651 ↔ \u0303 ; # ARABIC SHADDA |
| \u0652 ↔ \u030A ; # ARABIC SUKUN |
| # special combining marks |
| \u0653 ↔ \u0302 ; # ARABIC MADDAH ABOVE |
| \u0654 ↔ \u0309 ; # ARABIC HAMZA ABOVE |
| \u0655 ↔ \u0339 ; # ARABIC HAMZA BELOW |
| # Some non-Arabic language (not in UNGEGN) |
| پ ↔ p ; # ARABIC LETTER PEH |
| چ ↔ c h $disambig ; # ARABIC LETTER TCHEH |
| ڤ ↔ v ; # ARABIC LETTER VEH |
| # ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW |
| # ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW |
| گ ↔ g ; # ARABIC LETTER GAF |
| # fallbacks |
| | s ← c } [eiy]; |
| | k ← c ; |
| | i ← e ; |
| | u ← o ; |
| | ks ← x ; |
| | n ← ⁿ; |
| :: (lower) ; |
| ::NFC (NFD); |
| :: ( [[:Latin:] [%,.0-9;?ʾ-ʿ\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339;ˌ]] ); |
| |