<?xml version="1.0" encoding="UTF-8" ?> <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> <!-- Copyright © 1991-2013 Unicode, Inc. CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) For terms of use, see http://www.unicode.org/copyright.html --> <supplementalData> <version number="$Revision: 12263 $"/> <transforms> <transform source="Arab" target="Latn" direction="both" alias="Arabic-Latin und-Latn-t-und-arab" backwardAlias="Latin-Arabic und-Arab-t-und-latn"> <tRule><![CDATA[ # Generally follows UNGEGN # http://www.eki.ee/wgrs/rom1_ar.pdf # Occasionally deviates in the direction of ISO 233 # http://homepage.mac.com/sirbinks/pdf/Arabic.pdf # a) where required for disambiguation. # b) with underdot instead of cedilla for letter like SAD, # since those are explicitly in Unicode for transliteration. # c) with extra non-Arabic-language letters, like PEH # # Does *not* do assimilation of "al", nor hyphenation. # While it could be done, we need to determine whether a prefix "al" could # occur other than as the definite article (since no space is used). :: [[:Arabic:][:block=ARABIC:][ⁿ،؛؟ـً-ٕ٠-٬۰-۹﷼ښ]] ; :: NFKD (NFC); $disambig = ̱ ; $disambig2 = ̰ ; $under = ̣ ; $descender = ˌ; $notAbove = [[:^ccc=0:] & [:^ccc=230:]]; # non-letters [:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR [:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR ٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR ٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR # ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate ، ↔ ',' ; # ARABIC COMMA ؛ ↔ ';' ; # ARABIC SEMICOLON ؟ ↔ '?' ; # ARABIC QUESTION MARK ٪ ↔ '%' ; # ARABIC PERCENT SIGN ۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO ۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE ۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO ۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE ۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR ۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE ۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX ۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN ۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT ۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE ٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO ١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE ٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO ٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE ٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR ٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE ٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX ٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN ٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT ٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE # letters # long vowels َا↔ ā ; # ARABIC FATHA, ARABIC LETTER ALEF ُو ↔ ū ; # ARABIC DAMMA, ARABIC LETTER WAW ِي ↔ ī ; # ARABIC KASRA, ARABIC LETTER YEH # longer items moved here to prevent masking ث ↔ t h $disambig ; # ARABIC LETTER THEH ذ ↔ d h $disambig ; # ARABIC LETTER THAL ش ↔ s h $disambig ; # ARABIC LETTER SHEEN ص ↔ s $under ; # ARABIC LETTER SAD ض ↔ d $under ; # ARABIC LETTER DAD ط ↔ t $under ; # ARABIC LETTER TAH ظ ↔ z $under ; # ARABIC LETTER ZAH غ ↔ g h $disambig ; # ARABIC LETTER GHAIN # WARNING: special case # ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→ # so on the return, we have to skip over (but preserve) the half-ring below (or others like it) # ةٕ ← ẗ̹ ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS ة ↔ t ̈ ; # ARABIC LETTER TEH MARBUTA ة | $1 ← t ($notAbove+) ̈ ; # ARABIC LETTER TEH MARBUTA # non-Arabic language ژ ↔ z h $disambig ; # ARABIC LETTER JEH ڭ ↔ n $disambig g ; # ARABIC LETTER NG ۋ ↔ v $disambig ; # ARABIC LETTER VE ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH ښ ↔ s $descender; # Arabic language ء ↔ ʾ ; # ARABIC LETTER HAMZA ا ↔ a $under; # ARABIC LETTER ALEF ب ↔ b ; # ARABIC LETTER BEH ت ↔ t ; # ARABIC LETTER TEH ج ↔ j ; # ARABIC LETTER JEEM ح ↔ h $under ; # ARABIC LETTER HAH خ ↔ k h $disambig ; # ARABIC LETTER KHAH د ↔ d ; # ARABIC LETTER DAL ر ↔ r ; # ARABIC LETTER REH ز ↔ z ; # ARABIC LETTER ZAIN س ↔ s ; # ARABIC LETTER SEEN ع ↔ ʿ ; # ARABIC LETTER AIN ـ → ; # ARABIC TATWEEL ف ↔ f ; # ARABIC LETTER FEH ق ↔ q ; # ARABIC LETTER QAF ک ↔ k $disambig ; # ARABIC LETTER KEHEH ك ↔ k ; # ARABIC LETTER KAF ل ↔ l ; # ARABIC LETTER LAM م ↔ m ; # ARABIC LETTER MEEM ن ↔ n ; # ARABIC LETTER NOON ه ↔ h ; # ARABIC LETTER HEH و ↔ w ; # ARABIC LETTER WAW ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA ي ↔ y ; # ARABIC LETTER YEH ً ↔ aⁿ ; # ARABIC FATHATAN ٌ ↔ uⁿ ; # ARABIC DAMMATAN ٍ ↔ iⁿ ; # ARABIC KASRATAN َ ↔ a ; # ARABIC FATHA ُ ↔ u ; # ARABIC DAMMA ِ ↔ i ; # ARABIC KASRA ّ ↔ ̃ ; # ARABIC SHADDA ْ ↔ ̊ ; # ARABIC SUKUN # special combining marks ٓ ↔ ̂ ; # ARABIC MADDAH ABOVE ٔ ↔ ̉ ; # ARABIC HAMZA ABOVE ٕ ↔ ̹ ; # ARABIC HAMZA BELOW # Some non-Arabic language (not in UNGEGN) پ ↔ p ; # ARABIC LETTER PEH چ ↔ c h $disambig ; # ARABIC LETTER TCHEH ڤ ↔ v ; # ARABIC LETTER VEH # ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW # ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW گ ↔ g ; # ARABIC LETTER GAF # fallbacks | s ← c } [eiy]; | k ← c ; | i ← e ; | u ← o ; | ks ← x ; | n ← ⁿ; :: (lower) ; ::NFC (NFD); :: ( [[:Latin:] [%,.0-9;?ʾ-ʿ̂-̄̈-̣̰̊-̱̹;ˌ]] ); ]]></tRule> </transform> </transforms> </supplementalData>