Html程序  |  1170行  |  54.64 KB

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">

<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta http-equiv="Content-Language" content="en-us" />
<meta name="VI60_defaultClientScript" content="JavaScript" />
<meta name="GENERATOR" content="Microsoft FrontPage 6.0" />
<meta name="keywords" content="Unicode, common locale data repository" />
<meta name="ProgId" content="FrontPage.Editor.Document" />
<title>Common Locale Data Repository</title>
<link rel="stylesheet" type="text/css" href="http://www.unicode.org/webscripts/standard_styles.css" />
<style type="text/css">
<!--
.major     {font-size:95%; font-family: Arial, Geneva, sans-serif; color: #808080; font-weight:bold; }
.minor     {font-size:85%; font-family: Arial, Geneva, sans-serif; color: #808080; font-weight:400; }
.table2           { margin-top: 1.5em; margin-bottom: 0.5em }
td,th {border-color:#EEEEEE; vertical-align:top; padding:2px}
th           { background-color: #CCCCCC }
table {border-collapse: collapse}
caption      { font-weight: bold }
-->
</style>
</head>

<body>

<table width="100%" cellpadding="0" cellspacing="0" border="0">
	<tr>
		<td colspan="2" style="padding:0; margin:0">
		<table width="100%" border="0" cellpadding="0" cellspacing="0">
			<tr>
				<td class="icon" style="padding:2px; margin:0"><a href="http://www.unicode.org/">
				<img border="0" src="http://www.unicode.org/webscripts/logo60s2.gif" align="middle" alt="[Unicode]" width="34" height="33" /></a>&nbsp;&nbsp;
				<a class="bar" href="index.html"><font size="3">Common Locale Data Repository</font></a></td>
				<td class="bar" style="padding:2px; margin:0">
				<a href="http://www.unicode.org" class="bar">Home</a> |
				<a href="http://www.unicode.org/sitemap/" class="bar">Site Map</a> |
				<a href="http://www.unicode.org/search/" class="bar">Search</a></td>
			</tr>
		</table>
		</td>
	</tr>
	<tr>
		<td style="padding:2px; margin:0" colspan="2" class="gray">&nbsp;</td>
	</tr>
	<tr>
		<td style="padding:2px; margin:0" valign="top" width="25%" class="navCol">
		<table class="navColTable" border="0" width="100%" cellspacing="4" cellpadding="0">
			<tr>
				<td style="padding:2px; margin:0" class="navColTitle" colspan="2">Contents</td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="#Introduction">Introduction</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="#Variants">Variants</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="#Guidelines">Guidelines</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" width="1%">
				&nbsp;</td>
				<td style="padding:2px; margin:0" valign="top" class="navColCell">
				<a href="#Ambiguity">Ambiguity</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell">
				&nbsp;</td>
				<td style="padding:2px; margin:0" valign="top" class="navColCell">
				<a href="#Pronunciation">Pronunciation</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell">
				&nbsp;</td>
				<td style="padding:2px; margin:0" valign="top" class="navColCell">
				<a href="#Cautions">Cautions</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="#Available_Transliterations">Available Transliterations</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell">
				&nbsp;</td>
				<td style="padding:2px; margin:0" valign="top" class="navColCell">
				<a href="#Korean">Korean</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell">
				&nbsp;</td>
				<td style="padding:2px; margin:0" valign="top" class="navColCell"><a href="#Japanese">Japanese</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell">
				&nbsp;</td>
				<td style="padding:2px; margin:0" valign="top" class="navColCell"><a href="#Greek">Greek</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell">
				&nbsp;</td>
				<td style="padding:2px; margin:0" valign="top" class="navColCell"><a href="#Cyrillic">Cyrillic</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell">
				&nbsp;</td>
				<td style="padding:2px; margin:0" valign="top" class="navColCell"><a href="#Indic">Indic</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell">
				&nbsp;</td>
				<td style="padding:2px; margin:0" valign="top" class="navColCell"><a href="#Others">Others</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="#Submitting_Transliterations">Submitting Transliterations</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="#More_Information">More Information</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" class="navColTitle" colspan="2">Unicode CLDR</td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="index.html">CLDR Project</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="repository_access.html">CLDR Releases (Downloads)</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="survey_tool.html">CLDR Survey Tool</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="filing_bug_reports.html">CLDR Bug Reports</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="comparison_charts.html">CLDR Charts</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="process.html">CLDR Process</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="http://www.unicode.org/reports/tr35/">UTS #35: Locale Data Markup Language 
				(LDML)</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" class="navColTitle" colspan="2">Related Links</td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">Join the
				<a href="http://www.unicode.org/consortium/consort.html">Unicode Consortium</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="http://www.unicode.org/reports/">Unicode Technical Reports</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="http://www.unicode.org/faq/reports_process.html">Technical Reports Development 
				and Maintenance Process</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="http://www.unicode.org/consortium/utc.html">Unicode Technical Committee</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="http://www.unicode.org/versions/">Versions of the Unicode Standard</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" class="navColTitle" colspan="2">Other Publications</td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="http://www.unicode.org/standard/standard.html">The Unicode Standard</a></td>
			</tr>
			<tr>
				<td style="padding:2px; margin:0" valign="top" class="navColCell" colspan="2">
				<a href="http://www.unicode.org/notes/">Unicode Technical Notes</a></td>
			</tr>
		</table>
		<!-- BEGIN CONTENTS --></td>
		<td>
		<table>
			<tr>
				<td class="contents" valign="top">
				<div class="body">
					<h1 align="center">Unicode Transliteration Guidelines</h1><br />
					<blockquote>
						<p><i>This document describes guidelines for the creation and use of CLDR 
					transliterations. Preliminary
						<a href="http://www.unicode.org/cldr/data/charts/transforms/index.html">charts</a> 
					are available for the available transliterations -- be sure to read the known issues 
					there. Please file any feedback 
					on this document or those charts at
						<a href="http://www.unicode.org/cldr/bugs/locale-bugs">Locale Bugs</a>.</i></p>
					</blockquote>
					<h2><a name="Introduction">Introduction</a></h2>
					<table border="1" width="33%" id="table21" cellspacing="0" cellpadding="2" style="border-collapse: collapse; float: right; margin:1em; border-color:#BB0000">
						<tr>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">
							<font size="2"><i><b>Display. </b></i>Some of the characters in this 
							document may not be visible in your browser, and with some fonts the diacritics 
							will not be correctly placed on the base letters. See
							<a href="http://www.unicode.org/help/display_problems.html">Display Problems</a>.</font></td>
						</tr>
					</table>
					<p>Transliteration is the general process of converting characters from one script 
					to another, where the result is roughly phonetic for languages in the target script. 
					For example, &quot;Phobos&quot; and &quot;Deimos&quot; are transliterations of Greek mythological &quot;Φόβος&quot; 
					and &quot;Δεῖμος&quot; into Latin letters, used to name the moons of Mars.</p>
					<p>Transliteration is <i>not</i> translation. Rather, transliteration is the conversion 
					of letters from one script to another without translating the underlying words. 
					The following shows a sample of transliteration systems:</p>
					<table id="table20" style="border-collapse: collapse" border="1" cellspacing="0" cellpadding="2">
						<caption>Sample Transliteration Systems</caption>
						<tr>
							<th width="25%" style="vertical-align: top">Source</th>
							<th width="25%" style="vertical-align: top">Translation</th>
							<th style="vertical-align: top" width="25%">Transliteration</th>
							<th width="25%" style="vertical-align: top">System</th>
						</tr>
						<tr>
							<td bgcolor="#cccccc" style="vertical-align: top" rowspan="2">Αλφαβητικός</td>
							<td bgcolor="#cccccc" style="vertical-align: top"><i>Alphabetic</i></td>
							<td bgcolor="#cccccc" style="vertical-align: top">Alphabētikós</td>
							<td bgcolor="#cccccc" style="vertical-align: top">Classic</td>
						</tr>
						<tr>
							<td bgcolor="#cccccc" style="vertical-align: top">&nbsp;</td>
							<td bgcolor="#cccccc" style="vertical-align: top">Alfavi̱tikós</td>
							<td bgcolor="#cccccc" style="vertical-align: top">UNGEGN</td>
						</tr>
						<tr>
							<td style="vertical-align: top" rowspan="2">しんばし</td>
							<td style="vertical-align: top" rowspan="2"><i>new bridge<br />(district in Tokyo)</i></td>
							<td style="vertical-align: top">shimbashi</td>
							<td style="vertical-align: top">Hepburn</td>
						</tr>
						<tr>
							<td style="vertical-align: top">sinbasi</td>
							<td style="vertical-align: top">Kunrei</td>
						</tr>
						<tr>
							<td style="vertical-align: top" rowspan="3">яйца Фаберже</td>
							<td style="vertical-align: top" rowspan="3"><i>Fabergé eggs</i></td>
							<td style="vertical-align: top">yaytsa Faberzhe</td>
							<td style="vertical-align: top">BGN/PCGN</td>
						</tr>
						<tr>
							<td style="vertical-align: top">jajca Faberže</td>
							<td style="vertical-align: top">Scholarly</td>
						</tr>
						<tr>
							<td style="vertical-align: top">âjca Faberže</td>
							<td style="vertical-align: top">ISO</td>
						</tr>
					</table>
					<p>While an English speaker may 
					not recognize that the Japanese word <i>kyanpasu</i> is equivalent 
					to the English word <i>campus</i>, the word <i>kyanpasu</i> is still far easier 
					to recognize and interpret than if the letters were left in the original script. 
					There are several situations where this transliteration is especially useful, 
					such as the following. See the sidebar for examples.</p>
					<table id="table22" cellpadding="2" style="margin:1em; border-collapse: collapse" border="1" align="right">
						<caption><b>Sample Transliterations</b></caption>
						<tr>
							<th style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Source</th>
							<th style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Transliteration</th>
						</tr>
						<tr>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">김, 
							국삼</td>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Gim, 
							Gugsam </td>
						</tr>
						<tr>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">김, 
							명희</td>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Gim, 
							Myeonghyi </td>
						</tr>
						<tr>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">정, 
							병호</td>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Jeong, Byeongho
							</td>
						</tr>
						<tr>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">...</td>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">...</td>
						</tr>
						<tr>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">たけだ, まさゆき</td>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Takeda, Masayuki 
							</td>
						</tr>
						<tr>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">ますだ, よしひこ</td>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Masuda, Yoshihiko 
							</td>
						</tr>
						<tr>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">やまもと, のぼる  
							</td>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Yamamoto, Noboru 
							</td>
						</tr>
						<tr>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">...</td>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">...</td>
						</tr>
						<tr>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Ρούτση, Άννα</td>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Roútsē, Ánna</td>
						</tr>
						<tr>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Καλούδης, Χρήστος</td>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Kaloúdēs, Chrḗstos</td>
						</tr>
						<tr>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Θεοδωράτου, Ελένη</td>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">Theodōrátou, Elénē</td>
						</tr>
					</table>
					<ul>
						<li>When a user views names that are entered in a world-wide database, it is 
						extremely helpful to view and refer to the names in the user&#39;s native script.</li>
						<li>When the user performs searching and indexing tasks, transliteration can 
						retrieve information in a different script.</li>
						<li>When a service engineer is sent a program dump that is filled with characters 
						from foreign scripts, it is much easier to diagnose the problem when the text 
						is transliterated and the service engineer can recognize the characters.
						</li>
					</ul>
					<p>The term <i>transliteration</i> 
					is sometimes given a narrow meaning, implying that the transformation is <i>reversible</i> (sometimes called 
					<i>lossless</i>). In CLDR this is not the case; 
					the term <i>transliteration</i> 
					is interpreted broadly to mean both reversible and non-reversible transforms of 
					text. (Note that even if theoretically a <span class="nfakPe">transliteration</span> 
					system is supposed to be reversible, in source standards it is often not 
					specified in sufficient detail in the edge cases to actually be reversible.) A 
					non-reversible transliteration is often called a <i>transcription</i>, or called 
					a <i>lossy </i>or<i> ambiguous</i> transcription.</p>
					<p>Note that reversibility is generally 
					only in one direction, so for native to Latin a transliteration may be reversible, but not the 
					contrary. For example, Hangul is reversible, in that any Hangul to Latin to 
					Hangul should provide the same Hangul as the input. Thus we have the following:</p>
					<blockquote>
						<p>갗 
						<font face="Times New Roman">→</font> 
						gach <font face="Times New Roman">→</font> 
						갗</p>
					</blockquote>
					<p>However, for completeness, many Latin 
					characters have fallbacks. This means that more than one Latin character may map to the same 
					Hangul. Thus <i>from</i> Latin we don&#39;t have reversibility, because two 
					different Latin source strings round-trip back to the same Latin string.</p>
					<blockquote>
						<p>gach 
						<font face="Times New Roman">→</font> 
						갗 <font face="Times New Roman">→</font> 
						gach<br>gac 
						<font face="Times New Roman">→</font> 
						갗 <font face="Times New Roman">→</font> 
						gach</p>
					</blockquote>
					<p>Transliteration can also be used to convert unfamiliar letters within the same 
					script, such as converting Icelandic THORN (þ) to th. These are not typically reversible.</p>
					<blockquote>
						<p><i>There is an online demo using released CLDR data 
						at <a href="http://demo.icu-project.org/icu-bin/translit">ICU Transform Demo</a>.</i></p>
					</blockquote>
					<h2><a name="Variants">Variants</a></h2>
					<p>There are many systems for transliteration between languages: the same text can 
					be transliterated in many different ways. For example, for the Greek example above, 
					the transliteration is classical, while the <a href="http://www.eki.ee/wgrs/">UNGEGN</a> 
					alternate has different correspondences, such as φ → <i>f</i> instead of φ → <i>ph</i>.</p>
					<p>CLDR provides for generic mappings from script to script (such as Cyrillic-Latin), 
					and also language-specific variants (Russian-French, or Serbian-German). There can 
					also be semi-generic mappings, such as Russian-Latin or Cyrillic-French. These can 
					be referred to, respectively, as script transliterations, language-specific transliterations, or 
					script-language transliterations. Transliterations from other scripts to Latin are also called
					<i>Romanizations</i>.</p>
					<p>Even within particular languages, there can be variant systems according to different 
					authorities, or even varying across time (if the authority for a system changes its recommendation). 
					The canonical identifier that CLDR uses for these has the form:</p>
					<blockquote>
						<p><i>source-target/variant</i></p>
					</blockquote>
					<p>The source (and target) can be a language or script, either using the English 
					name or a locale code. The variant should specify the authority for the system, and if necessary 
					for disambiguation, 
					the year. For example, the identifier for the Russian to Latin transliteration according 
					to the UNGEGN system would be:</p>
					<ul>
						<li>ru-und_Latn/UNGEGN, or</li>
						<li>Russian-Latin/UNGEGN</li>
					</ul>
					<p>If there were multiple versions of these over time, the variant would be, say, 
					UNGEGN2006.</p>
					<p>The assumption is that implementations will allow the use of fallbacks, if the 
					exact transliteration specified is unavailable. For example, the following would 
					be the fallback chain for the identifier Russian-English/UNGEGN. This is similar 
					to the <i>Lookup Fallback Pattern</i> used in
					<a href="http://tools.ietf.org/html/bcp47">BCP 47 Tags for Identifying Languages</a>, 
					except that it uses a &quot;stepladder approach&quot; to progressively handle the fallback 
					among source, target, and variant, with priorities being the target, source, and 
					variant, in that order.</p>
					<ul>
						<li>Russian-English/UNGEGN</li>
						<li>Russian-English</li>
						<li>Cyrillic-English/UNGEGN</li>
						<li>Cyrillic-English</li>
						<li>Russian-Latin/UNGEGN</li>
						<li>Russian-Latin</li>
						<li>Cyrillic-Latin/UNGEGN</li>
						<li>Cyrillic-Latin</li>
					</ul>
					<h2><a name="Guidelines">Guidelines</a></h2>
					<p>There are a number of generally desirable guidelines for script transliterations. 
					These guidelines are rarely satisfied simultaneously, so constructing a reasonable 
					transliteration is always a process of balancing different requirements. These requirements 
					are most important for people who are building transliterations, but are also useful 
					as background information for users.</p>
					<p>The following lists the general guidelines 
					for Unicode CLDR transliterations: </p>
					<ul>
						<li><i>standard:</i> follow established systems (standards, authorities, or
						<i>de facto</i> practice) where possible, deviating sometimes where necessary for reversibility. 
						In CLDR, the systems are generally described in the comments in the XML data files found in 
					the in the
						<a target="_blank" href="http://www.unicode.org/cldr/data/common/transforms/">transforms</a> 
					folder online. For example, the system for Arabic transliteration in CLDR are 
					found in the comments in
						<a target="_blank" href="http://www.unicode.org/cldr/data/common/transforms/Arabic-Latin.xml">Arabic<wbr>-Latin.xml</a>; 
					there is a reference to the 
						<a target="_blank" href="http://www.eki.ee/wgrs/rom1_ar.pdf">UNGEGN Arabic Tables</a>. 
					Similarly for Hebrew, which also follows the 
						<a href="http://www.eki.ee/wgrs/rom1_he.pdf">Hebrew UNGEGN Tables</a>.</li>
						<li><i>complete</i>: every well-formed sequence of characters in the source 
						script should transliterate to a sequence of characters from the target script, 
						and vice versa.</li>
						<li><i>predictable</i>: the letters themselves (without any knowledge of the 
						languages written in that script) should be sufficient for the transliteration, 
						based on a relatively small number of rules. This allows the transliteration 
						to be performed mechanically. </li>
						<li><i>pronounceable</i>: the resulting characters have reasonable 
						pronunciations in the target script. Transliteration is not as useful if the process simply 
						maps the characters without any regard to their pronunciation. Simply mapping 
						by alphabetic order (&quot;αβγδεζηθ...&quot; to &quot;abcdefgh...&quot;) could yield strings that 
						might be complete and unambiguous, but the pronunciation would be completely 
						unexpected.</li>
						<li><i>reversible</i>: it is possible to recover the text in the source script 
						from the transliteration in the target script. That is, someone that knows the transliteration 
						rules would be able to recover the precise spelling of the original source text. 
						For example, it is possible to go from <i>Elláda</i> back to the original Ελλάδα, 
						while if the transliteration were <i>Ellada</i> (with no accent), it would 
						not be possible.</li>
					</ul>
					<p>Some of these principles may not be achievable simultaneously; in particular, 
					adherence to a standard system <i>and</i> reversibility. Often small changes in 
					existing systems can be made to accommodate reversibility. However, where a particular 
					system specifies a fundamentally non-reversible transliterations, those transliterations 
					as represented in CLDR may not be reversible.</p>
					<h3><a name="Ambiguity">Ambiguity</a></h3>
					<p>In transliteration, multiple characters may produce ambiguities 
					(non-reversible mappings) unless the rules 
					are carefully designed. For example, the Greek character PSI (ψ) maps to <i>ps</i>, 
					but <i>ps</i> could also result from the sequence PI, SIGMA (πσ) since PI (π) maps 
					to p and SIGMA (σ) maps to s. </p>
					<p>The Japanese transliteration standards provide a good mechanism for handling 
					these kinds of ambiguities. Using the Japanese transliteration standards, whenever 
					an ambiguous sequence in the target script does not result from a single letter, 
					the transform uses an apostrophe to disambiguate it. For example, it uses that procedure 
					to distinguish between <i>man&#39;ichi</i> and <i>manichi</i>. Using this procedure, 
					the Greek character PI SIGMA (πσ) maps to <i>p&#39;s</i>. This method is recommended 
					for all script transliteration methods, although sometimes the character may vary: 
					for example, &quot;-&quot; is used in Korean. </p>
					<blockquote>
						<p><b>Note:</b> We&#39;ve had a recent proposal to consistently use the hyphenation dot 
						for this code, thus we&#39;d have πσ → p‧s.</p>
					</blockquote>
					<p>A second problem is that some characters in a target script are not normally 
					found outside of certain contexts. For example, the small Japanese &quot;ya&quot; character, 
					as in &quot;kya&quot; (キャ), is not normally found in isolation. To handle such characters, 
					the Unicode transliterations currently use different conventions.</p>
					<ul>
						<li>Tilde: &quot;ャ&quot; in isolation is represented as &quot;~ya&quot;</li>
						<li>Diacritics: Greek &quot;ς&quot; in isolation is represented as s̱</li>
					</ul>
					<blockquote>
						<p><b>Note:</b> The CLDR committee is considering converging on a common representation for 
						this. The advantage of a common representation is that it allows for easy filtering.</p>
					</blockquote>
					<p>For the default script transforms, the goal is to have unambiguous mappings, 
					with variants for any common use mappings that are ambiguous (non-reversible). In 
					some cases, however, case may not be preserved. For example, </p>
					<table id="table16" cellspacing="1" cellpadding="2" border="1" style="border-collapse: collapse">
						<tr>
							<th>Latin</th>
							<th>Greek</th>
							<th>Latin</th>
						</tr>
						<tr>
							<td>ps PS</td>
							<td>ψ Ψ</td>
							<td>ps PS</td>
						</tr>
						<tr>
							<td>psa Psa <b>PsA</b></td>
							<td>ψα Ψα <b>ΨΑ</b></td>
							<td>psa Psa <b>PSA</b></td>
						</tr>
						<tr>
							<td>psA PSA <b>PSa</b></td>
							<td>ψΑ ΨΑ <b>Ψα</b></td>
							<td>psA PSA <b>Psa</b></td>
						</tr>
					</table>
					<p>The following shows Greek text that is mapped to fully reversible Latin: </p>
					<table id="table5" border="1">
						<tr>
							<th>Greek-Latin</th>
							<th>&nbsp;</th>
						</tr>
						<tr>
							<td>τί φῄς; γραφὴν σέ τις, ὡς ἔοικε, γέγραπται: οὐ γὰρ ἐκεῖνό γε καταγνώσομαι, 
							ὡς σὺ ἕτερον.</td>
							<td>tí phḗis; graphḕn sé tis, hōs éoike, gégraptai: ou gàr ekeînó ge katagnṓsomai, 
							hōs sỳ héteron.</td>
						</tr>
					</table>
					<p>If the user wants a version without certain accents, then CLDR&#39;s <i>chaining 
					rules </i>can be 
					used to remove the accents. For example, the following transliterates to Latin but 
					removes the macron accents on the long vowels. </p>
					<table id="table6" border="1">
						<tr>
							<th>Greek-Latin; nfd; [\u0304] remove; nfc</th>
							<th>&nbsp;</th>
						</tr>
						<tr>
							<td>τί φῄς; γραφὴν σέ τις, ὡς ἔοικε, γέγραπται: οὐ γὰρ ἐκεῖνό γε καταγνώσομαι, 
							ὡς σὺ ἕτερον.</td>
							<td>tí phéis; graphèn sé tis, hos éoike, gégraptai: ou gàr ekeînó ge katagnósomai, 
							hos sỳ héteron.</td>
						</tr>
					</table>
					<p>The above chaining rules, separated by semi-colons, perform the following 
					commands in order:</p>
					<table id="table23" border="1">
						<tr>
							<th style="vertical-align: top; border-color: #EEEEEE; padding: 2px">
							Rule</th>
							<th style="vertical-align: top; border-color: #EEEEEE; padding: 2px">
							Description</th>
						</tr>
						<tr>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px" nowrap>Greek-Latin</td>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">
							transliterate Greek to Latin</td>
						</tr>
						<tr>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px" nowrap>nfd
							</td>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">
							convert to Unicode NFD format (separating accents from base characters)</td>
						</tr>
						<tr>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px" nowrap>[\u0304] remove</td>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">
							remove accents, but <i>filter</i> the command to only apply to a single 
							character: <code>
							<a target="c" href="http://unicode.org/cldr/utility/character.jsp?a=0304">
							U+0304</a></code> ( ̄ ) COMBINING MACRON</td>
						</tr>
						<tr>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px" nowrap>nfc</td>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">
							convert to Unicode NFC format (rejoining accents to base characters)</td>
						</tr>
					</table>
					<p>The following transliterates to Latin but removes <i>all</i> accents. Note 
					that the only change is to expand the filter for the <i>remove</i> command.</p>
					<table id="table7" border="1">
						<tr>
							<th>Greek-Latin; nfd; [:nonspacing marks:] remove; nfc</th>
							<th>&nbsp;</th>
						</tr>
						<tr>
							<td>τί φῄς; γραφὴν σέ τις, ὡς ἔοικε, γέγραπται: οὐ γὰρ ἐκεῖνό γε καταγνώσομαι, 
							ὡς σὺ ἕτερον.</td>
							<td>ti pheis; graphen se tis, hos eoike, gegraptai: ou gar ekeino ge katagnosomai, 
							hos sy heteron.</td>
						</tr>
					</table>
					<h3><a name="Pronunciation">Pronunciation</a></h3>
					<p>Standard transliteration methods often do not follow the pronunciation rules 
					of any particular language in the target script. For example, the Japanese Hepburn 
					system uses a &quot;j&quot; that has the English phonetic value (as opposed to French, German, 
					or Spanish), but uses vowels that do not have the standard English sounds. A transliteration 
					method might also require some special knowledge to have the correct pronunciation. 
					For example, in the Japanese kunrei-siki system, &quot;ti&quot; is pronounced as English &quot;chee&quot;.</p>
					<p>This is similar to situations where there are different languages within the same 
					script. For example, knowing that the word <i>Gewalt</i> comes from German allows 
					a knowledgeable reader to pronounce the &quot;w&quot; as a &quot;v&quot;.&nbsp; 
					When encountering a 
					foreign word like <i>jawa</i>, there is little assurance how it is to be 
					pronounced even when it is not a <span class="nfakPe">transliteration (it is just from /span>another Latin-script language). The <i>j</i> could be 
					pronounced (for an English speaker) as in <i>jump</i>, 
					or <i>Junker</i>, 
					or <i>jour</i>; 
					and so on. Transcriptions are only roughly phonetic, and only so when the 
					specific pronunciation rules are understood.</p>
					<p>The pronunciation of the characters 
					in the original script may also be influenced by context, which may be 
					particularly misleading in transliteration. For, in the Bengali নিঃশব, 
					transliterated as niḥśaba, the <i>visarga</i> <i>ḥ</i> 
					is not pronounced itself (whereas elsewhere it may be) but lengthens the 
					<i>ś</i> 
					sound, and the final inherent <i>a</i> is pronounced (whereas it 
					commonly is not), and the two inherent a&#39;s are pronounced as <i>ɔ</i> 
					and <i>ô</i>, 
					respectively.</p>
					<p>In some cases, transliteration may be heavily influenced by tradition. For example, 
					the modern Greek letter beta (β) sounds like a &quot;v&quot;, but a 
					transliteration may use a <i>b</i> (as in <i>biology</i>). In that case, the user would need to know 
					that a &quot;b&quot; in the transliterated word corresponded to beta (β) and is to be pronounced 
					as a <i>v</i> in modern Greek.</p>
					<p>Letters may also be transliterated differently according 
					to their context to make the pronunciation more predictable. For example, since 
					the Greek sequence GAMMA GAMMA (γγ) is pronounced as <i>ng</i>, the first GAMMA 
					can be transcribed as an &quot;n&quot; in that context. 
					Similarly, the transliteration can give other guidance to the pronunciation in the 
					source language, for example, using &quot;n&quot; or &quot;m&quot; for the same Japanese character 
					(ん) depending on context, even though there is no distinction in the source 
					script.</p>
					<p>In general, predictability means that when transliterating Latin script to other 
					scripts using reversible transliterations, English text will not produce phonetic 
					results. This is because the pronunciation of English cannot be predicted easily 
					from the letters in a word: e.g. <i>grove</i>, <i>move</i>, and <i>love</i> all end with &quot;ove&quot;, but are 
					pronounced very differently. </p>
					<h3><a name="Cautions">Cautions</a></h3>
					<p>Reversibility may require modifications of traditional transcription methods. 
					For example, there are two standard methods for transliterating Japanese katakana 
					and hiragana into Latin letters. The <i>kunrei-siki</i> method is unambiguous. The Hepburn 
					method can be more easily pronounced by foreigners but is ambiguous. In the Hepburn 
					method, both ZI (ジ) and DI (ヂ) are represented by &quot;ji&quot; and both ZU (ズ) and DU (ヅ) 
					are represented by &quot;zu&quot;. A slightly amended version of Hepburn, that uses &quot;dji&quot; 
					for DI and &quot;dzu&quot; for DU, is unambiguous. </p>
					<p>When a sequence of two letters map to one, case mappings (uppercase and lowercase) 
					must be handled carefully to ensure reversibility. For cased scripts, the two letters 
					may need to have different cases, depending on the next letter. For example, the 
					Greek letter PHI (Φ) maps to PH in Latin, but Φο maps to Pho, and not to PHo.
					</p>
					<p>Some scripts have characters that take on different shapes depending on their 
					context. Usually, this is done at the display level (such as with Arabic) and does 
					not require special transliteration support. However, in a few cases this is represented 
					with different character codes, such as in Greek and Hebrew. For example, a Greek 
					SIGMA is written in a final form (ς) at the end of words, and a non-final form (σ) 
					in other locations. This also requires the transform to map different characters based 
					on the context.</p>
					<p>Another thing to look out for when 
					dealing with cased scripts is that some of the characters in the target script may 
					not be able to represent case distinctions, such as some of the IPA characters 
					in the Latin script.</p>
					<p>It is useful for the reverse mapping to be complete so that arbitrary strings 
					in the target script can be reasonably mapped back to the source script. Complete 
					reverse mapping makes it much easier to do mechanical quality checks and so on. 
					For example, even though the letter &quot;q&quot; might not be necessary in a transliteration 
					of Greek, it can be mapped to a KAPPA (κ). Such reverse mappings will not, in general, 
					be unambiguous. </p>
					<h2><a name="Available_Transliterations">Available Transliterations</a></h2>
					<p>Currently Unicode CLDR offers Romanizations for certain scripts, plus transliterations 
					between the Indic scripts (excluding Urdu). Additional script transliterations will 
					be added in the future.</p>
					<wbr>
					<p>Except where otherwise noted, all of these systems are designed to be reversible. 
					For bicameral scripts (those with uppercase and lowercase), however, case may not 
					be completely preserved.</p>
					<p>The transliterations are also designed to be complete for any sequence of the 
					Latin letters <i>a-z</i>. A fallback is used for a letter that is not covered by 
					the transliteration, and default letters may be inserted as required. For example, 
					in the Hangul transliteration, <i>rink</i> → 린크 → <i>linkeu</i>. That is, &quot;r&quot; is 
					mapped to the closest other letter, and a default vowel is inserted at the end (since 
					&quot;nk&quot; cannot end a syllable).</p>
					<p><i>Preliminary
					<a href="http://www.unicode.org/cldr/data/charts/transforms/index.html">charts</a> 
					are available for the available transliterations. Be sure to read the known issues 
					described there.</i></p>
					<h3><a name="Korean">Korean</a></h3>
					<p>There are many Romanizations of Korean. The default transliteration in Unicode 
					CLDR follows the <a href="http://www.korean.go.kr/06_new/rule/rule06.jsp">Korean 
					Ministry of Culture &amp; Tourism Transliteration</a> regulations (see also
					<a href="http://www.korea.net/korea/kor_loca.asp?code=A020303">English summary</a>). 
					There is an optional clause 8 variant for reversibility:</p>
					<blockquote>
						<p>&quot;제 8 항 학술 연구 논문 등 특수 분야에서 한글 복원을 전제로 표기할 경우에는 한글 표기를 대상으로 적는다. 이때 글자 대응은 
						제2장을 따르되 &#39;ㄱ, ㄷ, ㅂ, ㄹ&#39;은 &#39;g, d, b, l&#39;로만 적는다. 음가 없는 &#39;ㅇ&#39;은 붙임표(-)로 표기하되 어두에서는 생략하는 
						것을 원칙으로 한다. 기타 분절의 필요가 있을 때에도 붙임표(-)를 쓴다.&quot;</p>
						<p><i>translation: </i>&quot;Clause 8: When it is required to recover the original 
						Hangul representation faithfully as in scholarly articles, &#39; ㄱ, ㄷ, ㅂ, ㄹ&#39; must 
						be always romanized as &#39;g, d, b, l&#39; while the mapping for the rest of the letters 
						remains the same as specified in clause 2. The placeholder &#39;ㅇ&#39; at the beginning 
						of a syllable should be represented with &#39;-&#39;, but should be omitted at the beginning 
						of a word. In addition, &#39;-&#39; should be used in other cases where a syllable boundary 
						needs to be explicitly marked (be disambiguated.&quot;</p>
					</blockquote>
					<p>There are a number of cases where this Romanization may be ambiguous, because 
					sometimes multiple Latin letters map to a single entity (jamo) in Hangul. This happens 
					with vowels and consonants, the latter being slightly more complicated because there 
					are both initial and final consonants:</p>
					<table border="1" id="table18" style="border-collapse: collapse">
						<tr>
							<th>Type</th>
							<th>Multi-Character Consonants</th>
						</tr>
						<tr>
							<td>Initial-Only</td>
							<td>tt pp jj</td>
						</tr>
						<tr>
							<td>Initial-or-Final</td>
							<td>kk ch ss</td>
						</tr>
						<tr>
							<td>Final-Only</td>
							<td>gs nj nh lg lm lb ls lt lp lh bs ng</td>
						</tr>
					</table>
					<p>CLDR uses the following rules for disambiguation of the possible boundaries 
					between letters, in order. The first rule comes from Clause 8.</p>
					<ol>
						<li>Don&#39;t break so as to require an implicit vowel or null consonant (if possible)</li>
						<li>Don&#39;t break within Initial-Only or Initial-Or-Final sequences (if possible)</li>
						<li>Favor longest match first.</li>
					</ol>
					<p>If there is a single consonant between vowels, then Rule #1 will group it with 
					the following vowel if there is one (this is the same as the first part of Clause 
					8). If there is a sequence of four consonants between vowels, then there is only 
					one possible break (with well-formed text). So the only ambiguities lie with two 
					or three consonants between vowels, where there are possible multi-character consonants 
					involved. Even there, in most cases the resolution is simple, because there isn&#39;t 
					a possible multi-character consonant in the case of two, or two possible multi-character 
					consonants in the case of 3. For example, in the following cases, the left side 
					is unambiguous:</p>
					<blockquote>
						<p>angda = ang-da → 앙다<br />apda = ap-da → 앞다</p>
					</blockquote>
					<p>There are a relatively small number of possible ambiguities, listed below using 
					&quot;a&quot; as a sample vowel.</p>
					<table border="1" id="table17" style="border-collapse: collapse" cellspacing="0" cellpadding="2">
						<tr>
							<th align="left">No. of<br />Cons. </th>
							<th align="left">Latin</th>
							<th align="left">CLDR<br />Disambiguation</th>
							<th align="left">Hangul</th>
							<th colspan="2" align="left">Comments</th>
						</tr>
						<tr>
							<td rowspan="18">2</td>
							<td><code>atta</code></td>
							<td><code>= a-tta</code></td>
							<td>아따</td>
							<td colspan="2" rowspan="3">Rule 1, then 2</td>
						</tr>
						<tr>
							<td><code>appa</code></td>
							<td><code>= a-ppa</code></td>
							<td>아빠</td>
						</tr>
						<tr>
							<td><code>ajja</code></td>
							<td><code>= a-jja</code></td>
							<td>아짜</td>
						</tr>
						<tr>
							<td><code>akka</code></td>
							<td><code>= a-kka</code></td>
							<td>아까</td>
							<td colspan="2" rowspan="3">Rule 1, then 2</td>
						</tr>
						<tr>
							<td><code>assa</code></td>
							<td><code>= a-ssa</code></td>
							<td>아싸</td>
						</tr>
						<tr>
							<td><code>acha</code></td>
							<td><code>= a-cha</code></td>
							<td>아차</td>
						</tr>
						<tr>
							<td><code>agsa </code></td>
							<td><code>= ag-sa</code></td>
							<td>악사</td>
							<td colspan="2" rowspan="12">Rule 1</td>
						</tr>
						<tr>
							<td><code>anja </code></td>
							<td><code>= an-ja</code></td>
							<td>안자</td>
						</tr>
						<tr>
							<td><code>anha </code></td>
							<td><code>= an-ha</code></td>
							<td>안하</td>
						</tr>
						<tr>
							<td><code>alga </code></td>
							<td><code>= al-ga</code></td>
							<td>알가</td>
						</tr>
						<tr>
							<td><code>alma </code></td>
							<td><code>= al-ma</code></td>
							<td>알마</td>
						</tr>
						<tr>
							<td><code>alba </code></td>
							<td><code>= al-ba</code></td>
							<td>알바</td>
						</tr>
						<tr>
							<td><code>alsa </code></td>
							<td><code>= al-sa</code></td>
							<td>알사</td>
						</tr>
						<tr>
							<td><code>alta </code></td>
							<td><code>= al-ta</code></td>
							<td>알타</td>
						</tr>
						<tr>
							<td><code>alpa </code></td>
							<td><code>= al-pa</code></td>
							<td>알파</td>
						</tr>
						<tr>
							<td><code>alha </code></td>
							<td><code>= al-ha</code></td>
							<td>알하</td>
						</tr>
						<tr>
							<td><code>absa </code></td>
							<td><code>= ab-sa</code></td>
							<td>압사</td>
						</tr>
						<tr>
							<td><code>anga </code></td>
							<td><code>= an-ga</code></td>
							<td>안가</td>
						</tr>
						<tr>
							<td rowspan="9">3</td>
							<td><code>agssa </code></td>
							<td><code>= ag-ssa</code></td>
							<td>악싸</td>
							<td colspan="2" rowspan="4">Rule 1, then 2</td>
						</tr>
						<tr>
							<td><code>anjja </code></td>
							<td><code>= an-jja</code></td>
							<td>안짜</td>
						</tr>
						<tr>
							<td><code>alssa </code></td>
							<td><code>= al-ssa</code></td>
							<td>알싸</td>
						</tr>
						<tr>
							<td><code>abssa </code></td>
							<td><code>= ab-ssa</code></td>
							<td>압싸</td>
						</tr>
						<tr>
							<td><code>akkka </code></td>
							<td><code>= akk-ka</code></td>
							<td>앆카</td>
							<td colspan="2" rowspan="2">Rule 1, then 2, then 3</td>
						</tr>
						<tr>
							<td><code>asssa </code></td>
							<td><code>= ass-sa</code></td>
							<td>았사</td>
						</tr>
						<tr>
							<td colspan="3"><i>Known bugs*</i></td>
							<td colspan="2"><i>Should be Rule 1, then 2</i></td>
						</tr>
						<tr>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">
							<code>altta </code></td>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">
							<code>= alt-ta</code></td>
							<td style="vertical-align: top; border-color: #EEEEEE; padding: 2px">앑타</td>
							<td><code>= al-tta</code></td>
							<td>알따</td>
						</tr>
						<tr>
							<td><code>alppa </code></td>
							<td><code>= alp-pa</code></td>
							<td>앒파</td>
							<td><code>= al-ppa</code></td>
							<td>알빠</td>
						</tr>
					</table>
					<p><font size="2">* There is one other known bug in CLDR 1.5.1, where &quot;ch&quot; 
					transliterates incorrectly in the degenerate case<font face="Arial"> — </font>when not followed by a vowel.</font></p>
					<p>For vowel sequences, the situation is simpler. Only Rule #3 applies, so aeo = 
					ae-o → 애오.</p>
					<h3><a name="Japanese">Japanese</a></h3>
					<p>The default transliteration for Japanese uses the a slight variant of the Hepburn 
					system. With Hepburn system, both ZI (ジ) and DI (ヂ) are represented by &quot;ji&quot; and 
					both ZU (ズ) and DU (ヅ) are represented by &quot;zu&quot;. This is amended slightly for reversibility 
					by using &quot;dji&quot; for DI and &quot;dzu&quot; for DU.</p>
					<h3><a name="Greek">Greek</a></h3>
					<p>The default transliteration uses a standard transcription for Greek which is 
					aimed at preserving etymology. The ISO 843 variant includes following differences:
					</p>
					<table id="table10" border="1">
						<tr>
							<th>Greek</th>
							<th>Default</th>
							<th>ISO 843</th>
						</tr>
						<tr>
							<td>β</td>
							<td>b</td>
							<td>v</td>
						</tr>
						<tr>
							<td>γ*</td>
							<td>n</td>
							<td>g</td>
						</tr>
						<tr>
							<td>η</td>
							<td>ē</td>
							<td>ī</td>
						</tr>
						<tr>
							<td>̔</td>
							<td>h</td>
							<td>(omitted)</td>
						</tr>
						<tr>
							<td>̀</td>
							<td>̀</td>
							<td>(omitted)</td>
						</tr>
						<tr>
							<td>~</td>
							<td>~</td>
							<td>(omitted)</td>
						</tr>
					</table>
					<p>* before γ, κ, ξ, χ </p>
					<h3><a name="Cyrillic">Cyrillic</a></h3>
					<p>Cyrillic generally follows ISO 9 for the base Cyrillic set. There are tentative 
					plans to add extended Cyrillic characters in the future, plus variants for GOST 
					and other national standards.</p>
					<h3><a name="Indic">Indic</a></h3>
					<p>Transliteration of Indic scripts follows the ISO 15919<i>
					<strong style="font-weight: 400">Transliteration of Devanagari and related Indic 
					scripts into Latin characters</strong></i>. Internally, all Indic scripts are transliterated 
					by converting first to an internal form, called Inter-Indic, then from Inter-Indic 
					to the target script. Inter-Indic thus provides a pivot between the different 
					scripts, and contains a superset of correspondences for all of them.</p>
					<p>ISO 15919 differs from ISCII 91 in application of diacritics 
					for certain characters. These differences are shown in the following example (illustrated 
					with Devanagari, although the same principles apply to the other Indic scripts):
					</p>
					<table id="table11" border="1">
						<tr>
							<th>Devanagari</th>
							<th>ISCII 91</th>
							<th>ISO 15919</th>
						</tr>
						<tr>
							<td>ऋ</td>
							<td>ṛ</td>
							<td>r̥</td>
						</tr>
						<tr>
							<td>ऌ</td>
							<td>ḻ</td>
							<td>l̥</td>
						</tr>
						<tr>
							<td>ॠ</td>
							<td>ṝ</td>
							<td>r̥̄</td>
						</tr>
						<tr>
							<td>ॡ</td>
							<td>ḻ̄</td>
							<td>l̥̄</td>
						</tr>
						<tr>
							<td>ढ़</td>
							<td>d̂ha</td>
							<td>ṛha</td>
						</tr>
						<tr>
							<td>ड़</td>
							<td>d̂a</td>
							<td>ṛa</td>
						</tr>
					</table>
					<p>Transliteration rules from Indic to Latin are reversible with the exception of 
					the ZWJ and ZWNJ used to request explicit rendering effects. For example:</p>
					<table id="table13" border="1">
						<tr>
							<th>Devanagari</th>
							<th>Romanization</th>
							<th>Note</th>
						</tr>
						<tr>
							<td>क्ष</td>
							<td>kṣa</td>
							<td>normal</td>
						</tr>
						<tr>
							<td>क्‍ष</td>
							<td>kṣa</td>
							<td>explicit halant requested</td>
						</tr>
						<tr>
							<td>क्‌ष</td>
							<td>kṣa</td>
							<td>half-consonant requested</td>
						</tr>
					</table>
					<p>Transliteration between Indic scripts are roundtrip where there are corresponding 
					letters. Otherwise, there may be fallbacks.</p>
					<p>There are two particular instances where transliterations may produce unexpected 
					results: (1) where the final vowel is suppressed in speech, and (2) with the transliteration 
					of &#39;c&#39;. </p>
					<p>For example:</p>
					<table id="table14" border="1">
						<tr>
							<th>Devanagari</th>
							<th style="vertical-align: top">Romanization</th>
							<th>Notes</th>
						</tr>
						<tr>
							<td>सेन्गुप्त</td>
							<td style="vertical-align: top">Sēngupta</td>
							<td style="vertical-align: top">&nbsp;</td>
						</tr>
						<tr>
							<td>सेनगुप्त</td>
							<td style="vertical-align: top">Sēnagupta</td>
							<td style="vertical-align: top">The final &#39;a&#39; is not pronounced</td>
						</tr>
						<tr>
							<td style="vertical-align: top">मोनिक</td>
							<td style="vertical-align: top">Monika</td>
							<td style="vertical-align: top">&nbsp;</td>
						</tr>
						<tr>
							<td>मोनिच</td>
							<td style="vertical-align: top">Monica</td>
							<td style="vertical-align: top">The &#39;c&#39; is pronounced &quot;ch&quot;</td>
						</tr>
					</table>
					<h3><a name="Others">Others</a></h3>
					<p>Unicode CLDR provides other transliterations based on the
					<a href="http://geonames.usgs.gov/">U.S. Board on Geographic Names</a> (BGN) transliterations. 
					These are currently unidirectional <font face="Arial">—</font> to Latin only. The goal is to make them bidirectional 
					in future versions of CLDR.</p>
					<p>Other transliterations are generally based on the
					<a href="http://www.eki.ee/wgrs/">UNGEGN: Working Group on Romanization Systems</a> 
					transliterations. These systems are in 
					wider actual implementation than most ISO standardized transliterations, and are 
					published freely available on the web (<a target="_blank" href="http://www.eki.ee/wgrs/">http://www.eki.ee/wgrs/</a>) 
					and thus easily accessible to all.
					The UNGEGN also has good documentation. For example, the
					<a href="http://www.eki.ee/wgrs/rom1_ar.pdf">UNGEGN Arabic Tables</a> 
					not only presents the UN system, but compares it with the BGN/PCGN 1956 system, 
					the I.G.N. System 1973, ISO 233:1984, the royal Jordanian Geographic Centre 
					System, and the Survey of Egypt System.</p>
					<h2><a name="Submitting_Transliterations">Submitting Transliterations</a></h2>
					<p>If you are interested in providing transliterations for one or 
					more scripts, file an initial bug report at <i>
					<a href="http://www.unicode.org/cldr/bugs/locale-bugs">Locale Bugs</a></i>. The initial bug should contain the scripts and or languages 
					involved, and the system being followed (with a link to a full description of 
					the proposed transliteration system), and a brief example. The proposed data can 
					also be in that bug, or be added in a Reply to that bug.</p>
					<p>You can also file a bug in <i>
					<a href="http://www.unicode.org/cldr/bugs/locale-bugs">Locale Bugs</a></i> if 
					you find a problem in an existing transliteration.</p>
					<p>For submission to CLDR, the data 
					needs to supplied in the correct XML format, and should follow an accepted 
					standard. It is best if the results are 
					tested using the <i><a href="http://demo.icu-project.org/icu-bin/translit">ICU 
					Transform Demo</a></i> first, since if the data doesn&#39;t validate it would 
					not be accepted into CLDR. As mentioned above, even if a transliteration is only used 
					in certain countries or contexts CLDR can provide for them with different 
					variant tags.</p>
					<p>The format for rules is specified in 
					<a target="_blank" href="http://www.unicode.org/reports/tr35/#Transform_Rules">Transform_Rules</a>. 
					The XML format is just a series of rules and comments. For comparison, you can see what is 
					currently in CLDR in the 
					<a target="_blank" href="http://www.unicode.org/cldr/data/common/transforms/">transforms</a> 
					folder online. For example, see 
					<a target="_blank" href="http://www.unicode.org/cldr/data/common/transforms/Hebrew-Latin.xml">Hebrew-Latin.xml</a>.</p>
					<h2><a name="More_Information">More Information</a></h2>
					<p>For more information, see:</p>
					<ul>
						<li>BGN: <a href="http://geonames.usgs.gov/">U.S. Board on Geographic Names</a></li>
						<li>UNGEGN: <a href="http://www.eki.ee/wgrs/">UNITED NATIONS GROUP OF 
						EXPERTS ON GEOGRAPHICAL NAMES: Working Group on Romanization 
						Systems</a></li>
						<li><a href="http://transliteration.eki.ee/">Transliteration of Non-Roman Alphabets 
						and Scripts (Søren Binks)</a> </li>
						<li><a href="http://www.archivists.org/catalog/stds99/chapter8.html">Standards 
						for Archival Description: Romanization</a> </li>
						<li><a href="http://transliteration.eki.ee/pdf/Hindi-Marathi-Nepali.pdf">ISO-15915 
						(Hindi)</a> </li>
						<li><a href="http://transliteration.eki.ee/pdf/Gujarati.pdf">ISO-15915 (Gujarati)</a>
						</li>
						<li><a href="http://transliteration.eki.ee/pdf/Kannada.pdf">ISO-15915 (Kannada)</a>
						</li>
						<li><a href="http://www.cdacindia.com/html/gist/down/iscii_d.asp">ISCII-91</a>
						</li>
						<li><a href="http://www.unicode.org/reports/tr35/">UTS #35: Locale Data Markup 
						Language (LDML)</a></li>
					</ul></div></td>
			</tr>
		</table>
		<hr width="50%" />
		<div align="center">
			<center>
			<table cellspacing="0" cellpadding="0" border="0">
				<tr>
					<td><a href="http://www.unicode.org/copyright.html">
					<img src="http://www.unicode.org/img/hb_notice.gif" border="0" alt="Access to Copyright and terms of use" width="216" height="50" /></a></td>
				</tr>
			</table>
			<script language="Javascript" type="text/javascript" src="http://www.unicode.org/webscripts/lastModified.js">
			
			
			
			
      </script>
			</center></div>
		</td>
	</tr>
</table>

</body>

</html>