mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-1098 Fix Transliterator rules for unescaping
X-SVN-Rev: 6667
This commit is contained in:
parent
98fc1b887a
commit
2f7707fe54
10 changed files with 120 additions and 120 deletions
|
@ -6,7 +6,7 @@
|
|||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Any_Accents.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
// Date: Wed Nov 7 09:34:03 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Any_Accents
|
||||
|
@ -18,8 +18,8 @@ translit_Any_Accents {
|
|||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// $Source: /xsrl/Nsvn/icu/icu/data/Attic/translit_Any_Accents.txt,v $
|
||||
// $Date: 2001/10/26 05:41:15 $
|
||||
// $Revision: 1.1 $
|
||||
// $Date: 2001/11/07 18:50:25 $
|
||||
// $Revision: 1.2 $
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
":: NFD (NFC) ;"
|
||||
|
@ -29,21 +29,21 @@ translit_Any_Accents {
|
|||
// define special conversion characters.
|
||||
// varients of this could use different characters, or set one or the other to null.
|
||||
|
||||
"$pre = \< ;"
|
||||
"$post = \> ;"
|
||||
"$pre = \\\< ;"
|
||||
"$post = \\\> ;"
|
||||
|
||||
// Provide keyboard equivalents for common diacritics used in transliteration
|
||||
|
||||
"$pre \` $post <> \u0300 ;" // COMBINING GRAVE ACCENT
|
||||
"$pre \' $post <> \u0301 ;" // COMBINING ACUTE ACCENT
|
||||
"$pre \^ $post <> \u0302 ;" // COMBINING CIRCUMFLEX ACCENT
|
||||
"$pre \~ $post <> \u0303 ;" // COMBINING TILDE
|
||||
"$pre \- $post <> \u0304 ;" // COMBINING MACRON
|
||||
"$pre \" $post <> \u0308 ;" // COMBINING DIAERESIS
|
||||
"$pre \* $post <> \u030A ;" // COMBINING RING ABOVE
|
||||
"$pre \, $post <> \u0327 ;" // COMBINING CEDILLA
|
||||
"$pre \\\` $post <> \u0300 ;" // COMBINING GRAVE ACCENT
|
||||
"$pre \\\' $post <> \u0301 ;" // COMBINING ACUTE ACCENT
|
||||
"$pre \\\^ $post <> \u0302 ;" // COMBINING CIRCUMFLEX ACCENT
|
||||
"$pre \\\~ $post <> \u0303 ;" // COMBINING TILDE
|
||||
"$pre \\\- $post <> \u0304 ;" // COMBINING MACRON
|
||||
"$pre \\\" $post <> \u0308 ;" // COMBINING DIAERESIS
|
||||
"$pre \\\* $post <> \u030A ;" // COMBINING RING ABOVE
|
||||
"$pre \\\, $post <> \u0327 ;" // COMBINING CEDILLA
|
||||
"$pre '/' $post <> \u0338 ;" // COMBINING LONG SOLIDUS OVERLAY
|
||||
"$pre \. $post <> \u0323 ;" // COMBINING DOT BELOW
|
||||
"$pre \\\. $post <> \u0323 ;" // COMBINING DOT BELOW
|
||||
|
||||
// Combine common characters
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Any_Publishing.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
// Date: Wed Nov 7 09:34:03 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Any_Publishing
|
||||
|
@ -18,8 +18,8 @@ translit_Any_Publishing {
|
|||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// $Source: /xsrl/Nsvn/icu/icu/data/Attic/translit_Any_Publishing.txt,v $
|
||||
// $Date: 2001/10/26 05:41:16 $
|
||||
// $Revision: 1.1 $
|
||||
// $Date: 2001/11/07 18:50:25 $
|
||||
// $Revision: 1.2 $
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Test case
|
||||
|
@ -27,10 +27,10 @@ translit_Any_Publishing {
|
|||
|
||||
// Variables
|
||||
|
||||
"$single = \' ;"
|
||||
"$single = \\\' ;"
|
||||
"$space = ' ' ;"
|
||||
"$double = \" ;"
|
||||
"$back = \` ;"
|
||||
"$double = \\\" ;"
|
||||
"$back = \\\` ;"
|
||||
"$tab = '\u0008' ;"
|
||||
"$makeRight = [[:Z:][:Ps:][:Pi:]$] ;"
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Fullwidth_Halfwidth.utf8.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
// Source: ../../text/resources/Transliterator_Fullwidth_Halfwidth.txt
|
||||
// Date: Wed Nov 7 09:34:03 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Fullwidth_Halfwidth
|
||||
|
@ -58,7 +58,7 @@ translit_Fullwidth_Halfwidth {
|
|||
// single character
|
||||
|
||||
"!<>'!';" // from FULLWIDTH EXCLAMATION MARK
|
||||
""<>'\"';" // from FULLWIDTH QUOTATION MARK
|
||||
""<>'\\\"';" // from FULLWIDTH QUOTATION MARK
|
||||
"#<>'#';" // from FULLWIDTH NUMBER SIGN
|
||||
"$<>'$';" // from FULLWIDTH DOLLAR SIGN
|
||||
"%<>'%';" // from FULLWIDTH PERCENT SIGN
|
||||
|
|
|
@ -4,9 +4,9 @@
|
|||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpicurules.bat
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Greek_Latin.txt
|
||||
// Date: Mon Nov 5 12:28:37 2001
|
||||
// Date: Wed Nov 7 09:34:04 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Greek_Latin
|
||||
|
@ -18,8 +18,8 @@ translit_Greek_Latin {
|
|||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// $Source: /xsrl/Nsvn/icu/icu/data/Attic/translit_Greek_Latin.txt,v $
|
||||
// $Date: 2001/11/05 20:39:12 $
|
||||
// $Revision: 1.2 $
|
||||
// $Date: 2001/11/07 18:50:25 $
|
||||
// $Revision: 1.3 $
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Rules are predicated on running NFD first, and NFC afterwards
|
||||
|
@ -65,13 +65,13 @@ translit_Greek_Latin {
|
|||
|
||||
"$caron = \u030C;"
|
||||
|
||||
"$afterLetter = [:^L:] [\'[:M:]]* ;"
|
||||
"$beforeLetter = [\'[:M:]]* [:^L:] ;"
|
||||
"$afterLetter = [:^L:] [\\\'[:M:]]* ;"
|
||||
"$beforeLetter = [\\\'[:M:]]* [:^L:] ;"
|
||||
|
||||
// Fix punctuation
|
||||
|
||||
"\; <> \? ;"
|
||||
"· <> \: ;"
|
||||
"\\\; <> \\\? ;"
|
||||
"· <> \\\: ;"
|
||||
|
||||
// CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
|
||||
|
||||
|
@ -230,9 +230,9 @@ translit_Greek_Latin {
|
|||
"μ <> m ;"
|
||||
"Μ <> M ;"
|
||||
|
||||
"ν } $gammaLike > n\' ;"
|
||||
"ν } $gammaLike > n\\\' ;"
|
||||
"ν <> n ;"
|
||||
"Ν } $gammaLike <> N\' ;"
|
||||
"Ν } $gammaLike <> N\\\' ;"
|
||||
"Ν <> N ;"
|
||||
|
||||
"ξ <> x ;"
|
||||
|
@ -252,8 +252,8 @@ translit_Greek_Latin {
|
|||
|
||||
// insert separator
|
||||
|
||||
"[Pp] { } ς > \' ;"
|
||||
"[Pp] { } σ > \' ;"
|
||||
"[Pp] { } ς > \\\' ;"
|
||||
"[Pp] { } σ > \\\' ;"
|
||||
|
||||
// Caron means exception
|
||||
|
||||
|
@ -269,7 +269,7 @@ translit_Greek_Latin {
|
|||
"ς <> s $caron;"
|
||||
"σ <> s ;"
|
||||
|
||||
"[Pp] { Σ <> \'S ;"
|
||||
"[Pp] { Σ <> \\\'S ;"
|
||||
"Σ <> S ;"
|
||||
|
||||
"τ <> t ;"
|
||||
|
@ -327,8 +327,8 @@ translit_Greek_Latin {
|
|||
|
||||
// delete any trailing ' marks used for roundtripping
|
||||
|
||||
"< [Ππ] { \' } [Ss] ;"
|
||||
"< [Νν] { \' } $egammaLike ;"
|
||||
"< [Ππ] { \\\' } [Ss] ;"
|
||||
"< [Νν] { \\\' } $egammaLike ;"
|
||||
|
||||
"::NFC (NFD) ;"
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: Transliterator_index.txt
|
||||
// Date: Thu Oct 25 22:17:22 2001
|
||||
// Date: Wed Nov 7 10:34:20 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
//--------------------------------------------------------------------
|
||||
|
@ -22,7 +22,7 @@ translit_index {
|
|||
RuleBasedTransliteratorIDs {
|
||||
// Copyright (c) 2001, International Business Machines Corporation and
|
||||
// others. All Rights Reserved.
|
||||
//
|
||||
//
|
||||
// TRANSLITERATOR INDEX FILE. This file lists the non-algorithmic
|
||||
// system transliterators. It allows arbitrary mappings between
|
||||
// transliterator IDs and file names, and also allows the system to
|
||||
|
@ -31,29 +31,29 @@ translit_index {
|
|||
// "Latin-Jamo;Jamo-Hangul". Internal IDs may also be defined; these
|
||||
// are invisible to the user, but can be composed together by the
|
||||
// system to create visible transliterators.
|
||||
//
|
||||
//
|
||||
// Blank lines and lines beginning with '#' are ignored.
|
||||
//
|
||||
//
|
||||
// Lines in this file have one of the following forms (text not
|
||||
// enclosed by <> is literal):
|
||||
//
|
||||
//
|
||||
// <id>:file:<resource>:<encoding>:<direction>
|
||||
// <id>:internal:<resource>:<encoding>:<direction>
|
||||
// <id>:alias:<getInstanceArg>
|
||||
//
|
||||
//
|
||||
// <id> is the ID of the system transliterator being defined. These
|
||||
// are public IDs enumerated by Transliterator.getAvailableIDs(),
|
||||
// unless the second field is "internal".
|
||||
//
|
||||
//
|
||||
// <resource> is a ResourceReader resource name. Currently these refer
|
||||
// to file names under com/ibm/text/resources. This string is passed
|
||||
// directly to ResourceReader, together with <encoding>.
|
||||
//
|
||||
//
|
||||
// <encoding> is the character encoding to use when reading <resource>;
|
||||
// passed directly to ResourceReader. E.g., "UTF8".
|
||||
//
|
||||
//
|
||||
// <direction> is either "FORWARD" or "REVERSE".
|
||||
//
|
||||
//
|
||||
// <getInstanceArg> is a string to be passed directly to
|
||||
// Transliterator.getInstance(). The returned Transliterator object
|
||||
// then has its ID changed to <id> and is returned.
|
||||
|
@ -74,7 +74,7 @@ translit_index {
|
|||
{ "Greek-Latin/UNGEGN", "alias", "el-Latin/UNGEGN", "" },
|
||||
|
||||
{ "LowerLatin-Jamo", "internal", "translit_Latin_Jamo", "FORWARD" },
|
||||
{ "Latin-Jamo", "alias", "Any-Lower;LowerLatin-Jamo", "" },
|
||||
{ "Latin-Jamo", "alias", "[\\p{Latin}]Any-Lower;LowerLatin-Jamo", "" },
|
||||
{ "Jamo-Latin", "file", "translit_Latin_Jamo", "REVERSE" },
|
||||
|
||||
{ "Latin-Katakana", "file", "translit_Latin_Katakana", "FORWARD" },
|
||||
|
@ -100,8 +100,8 @@ translit_index {
|
|||
|
||||
// Compound rules
|
||||
|
||||
{ "Latin-Hangul", "alias", "[\p{Latin}];Latin-Jamo;[\u1100-\u11FF]NFC", "" },
|
||||
{ "Hangul-Latin", "alias", "[\uAC00-\uD7AF];NFD;Jamo-Latin", "" },
|
||||
{ "Latin-Hangul", "alias", "[\\p{Latin}];NFD;Latin-Jamo;NFC", "" },
|
||||
{ "Hangul-Latin", "alias", "[\\p{Hangul}];NFD;Jamo-Latin", "" },
|
||||
|
||||
// Inter-Indic composed rules
|
||||
{ "Latin-InterIndic", "internal", "translit_Latin_InterIndic", "FORWARD" },
|
||||
|
@ -127,15 +127,15 @@ translit_index {
|
|||
{ "InterIndic-Malayalam", "internal", "translit_InterIndic_Malayalam", "FORWARD" },
|
||||
|
||||
//Latin-Indic transliterators
|
||||
{ "Latin-Devanagari", "alias", "NFD;Latin-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Latin-Bengali", "alias", "NFD;Latin-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Latin-Gurmukhi", "alias", "NFD;Latin-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Latin-Gujarati", "alias", "NFD;Latin-InterIndic;InterIndic-Gujarati;NFC", "" },
|
||||
{ "Latin-Oriya", "alias", "NFD;Latin-InterIndic;InterIndic-Oriya;NFC", "" },
|
||||
{ "Latin-Tamil", "alias", "NFD;Latin-InterIndic;InterIndic-Tamil;NFC", "" },
|
||||
{ "Latin-Telugu", "alias", "NFD;Latin-InterIndic;InterIndic-Telugu;NFC", "" },
|
||||
{ "Latin-Kannada", "alias", "NFD;Latin-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
{ "Latin-Malayalam", "alias", "NFD;Latin-InterIndic;InterIndic-Malayalam;NFC", "" },
|
||||
{ "Latin-Devanagari", "alias", "NFD;[\\p{Latin}]Lower;Latin-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Latin-Bengali", "alias", "NFD;[\\p{Latin}]Lower;Latin-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Latin-Gurmukhi", "alias", "NFD;[\\p{Latin}]Lower;Latin-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Latin-Gujarati", "alias", "NFD;[\\p{Latin}]Lower;Latin-InterIndic;InterIndic-Gujarati;NFC", "" },
|
||||
{ "Latin-Oriya", "alias", "NFD;[\\p{Latin}]Lower;Latin-InterIndic;InterIndic-Oriya;NFC", "" },
|
||||
{ "Latin-Tamil", "alias", "NFD;[\\p{Latin}]Lower;Latin-InterIndic;InterIndic-Tamil;NFC", "" },
|
||||
{ "Latin-Telugu", "alias", "NFD;[\\p{Latin}]Lower;Latin-InterIndic;InterIndic-Telugu;NFC", "" },
|
||||
{ "Latin-Kannada", "alias", "NFD;[\\p{Latin}]Lower;Latin-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
{ "Latin-Malayalam", "alias", "NFD;[\\p{Latin}]Lower;Latin-InterIndic;InterIndic-Malayalam;NFC", "" },
|
||||
|
||||
//Indic-Latin transliterators
|
||||
{ "Devanagari-Latin", "alias", "NFD;Devanagari-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Any_Accents.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
// Date: Wed Nov 7 09:34:03 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Any_Accents
|
||||
|
@ -18,8 +18,8 @@ translit_Any_Accents {
|
|||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// $Source: /xsrl/Nsvn/icu/icu/source/data/translit/Attic/t_Any_Accents.txt,v $
|
||||
// $Date: 2001/10/26 05:41:15 $
|
||||
// $Revision: 1.1 $
|
||||
// $Date: 2001/11/07 18:50:25 $
|
||||
// $Revision: 1.2 $
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
":: NFD (NFC) ;"
|
||||
|
@ -29,21 +29,21 @@ translit_Any_Accents {
|
|||
// define special conversion characters.
|
||||
// varients of this could use different characters, or set one or the other to null.
|
||||
|
||||
"$pre = \< ;"
|
||||
"$post = \> ;"
|
||||
"$pre = \\\< ;"
|
||||
"$post = \\\> ;"
|
||||
|
||||
// Provide keyboard equivalents for common diacritics used in transliteration
|
||||
|
||||
"$pre \` $post <> \u0300 ;" // COMBINING GRAVE ACCENT
|
||||
"$pre \' $post <> \u0301 ;" // COMBINING ACUTE ACCENT
|
||||
"$pre \^ $post <> \u0302 ;" // COMBINING CIRCUMFLEX ACCENT
|
||||
"$pre \~ $post <> \u0303 ;" // COMBINING TILDE
|
||||
"$pre \- $post <> \u0304 ;" // COMBINING MACRON
|
||||
"$pre \" $post <> \u0308 ;" // COMBINING DIAERESIS
|
||||
"$pre \* $post <> \u030A ;" // COMBINING RING ABOVE
|
||||
"$pre \, $post <> \u0327 ;" // COMBINING CEDILLA
|
||||
"$pre \\\` $post <> \u0300 ;" // COMBINING GRAVE ACCENT
|
||||
"$pre \\\' $post <> \u0301 ;" // COMBINING ACUTE ACCENT
|
||||
"$pre \\\^ $post <> \u0302 ;" // COMBINING CIRCUMFLEX ACCENT
|
||||
"$pre \\\~ $post <> \u0303 ;" // COMBINING TILDE
|
||||
"$pre \\\- $post <> \u0304 ;" // COMBINING MACRON
|
||||
"$pre \\\" $post <> \u0308 ;" // COMBINING DIAERESIS
|
||||
"$pre \\\* $post <> \u030A ;" // COMBINING RING ABOVE
|
||||
"$pre \\\, $post <> \u0327 ;" // COMBINING CEDILLA
|
||||
"$pre '/' $post <> \u0338 ;" // COMBINING LONG SOLIDUS OVERLAY
|
||||
"$pre \. $post <> \u0323 ;" // COMBINING DOT BELOW
|
||||
"$pre \\\. $post <> \u0323 ;" // COMBINING DOT BELOW
|
||||
|
||||
// Combine common characters
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Any_Publishing.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
// Date: Wed Nov 7 09:34:03 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Any_Publishing
|
||||
|
@ -18,8 +18,8 @@ translit_Any_Publishing {
|
|||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// $Source: /xsrl/Nsvn/icu/icu/source/data/translit/Attic/t_Any_Publishing.txt,v $
|
||||
// $Date: 2001/10/26 05:41:16 $
|
||||
// $Revision: 1.1 $
|
||||
// $Date: 2001/11/07 18:50:25 $
|
||||
// $Revision: 1.2 $
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Test case
|
||||
|
@ -27,10 +27,10 @@ translit_Any_Publishing {
|
|||
|
||||
// Variables
|
||||
|
||||
"$single = \' ;"
|
||||
"$single = \\\' ;"
|
||||
"$space = ' ' ;"
|
||||
"$double = \" ;"
|
||||
"$back = \` ;"
|
||||
"$double = \\\" ;"
|
||||
"$back = \\\` ;"
|
||||
"$tab = '\u0008' ;"
|
||||
"$makeRight = [[:Z:][:Ps:][:Pi:]$] ;"
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Fullwidth_Halfwidth.utf8.txt
|
||||
// Date: Thu Oct 25 22:17:21 2001
|
||||
// Source: ../../text/resources/Transliterator_Fullwidth_Halfwidth.txt
|
||||
// Date: Wed Nov 7 09:34:03 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Fullwidth_Halfwidth
|
||||
|
@ -58,7 +58,7 @@ translit_Fullwidth_Halfwidth {
|
|||
// single character
|
||||
|
||||
"!<>'!';" // from FULLWIDTH EXCLAMATION MARK
|
||||
""<>'\"';" // from FULLWIDTH QUOTATION MARK
|
||||
""<>'\\\"';" // from FULLWIDTH QUOTATION MARK
|
||||
"#<>'#';" // from FULLWIDTH NUMBER SIGN
|
||||
"$<>'$';" // from FULLWIDTH DOLLAR SIGN
|
||||
"%<>'%';" // from FULLWIDTH PERCENT SIGN
|
||||
|
|
|
@ -4,9 +4,9 @@
|
|||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpicurules.bat
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: ../../text/resources/Transliterator_Greek_Latin.txt
|
||||
// Date: Mon Nov 5 12:28:37 2001
|
||||
// Date: Wed Nov 7 09:34:04 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Greek_Latin
|
||||
|
@ -18,8 +18,8 @@ translit_Greek_Latin {
|
|||
// Corporation and others. All Rights Reserved.
|
||||
//--------------------------------------------------------------------
|
||||
// $Source: /xsrl/Nsvn/icu/icu/source/data/translit/Attic/t_Grek_Latn.txt,v $
|
||||
// $Date: 2001/11/05 20:39:12 $
|
||||
// $Revision: 1.2 $
|
||||
// $Date: 2001/11/07 18:50:25 $
|
||||
// $Revision: 1.3 $
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
// Rules are predicated on running NFD first, and NFC afterwards
|
||||
|
@ -65,13 +65,13 @@ translit_Greek_Latin {
|
|||
|
||||
"$caron = \u030C;"
|
||||
|
||||
"$afterLetter = [:^L:] [\'[:M:]]* ;"
|
||||
"$beforeLetter = [\'[:M:]]* [:^L:] ;"
|
||||
"$afterLetter = [:^L:] [\\\'[:M:]]* ;"
|
||||
"$beforeLetter = [\\\'[:M:]]* [:^L:] ;"
|
||||
|
||||
// Fix punctuation
|
||||
|
||||
"\; <> \? ;"
|
||||
"· <> \: ;"
|
||||
"\\\; <> \\\? ;"
|
||||
"· <> \\\: ;"
|
||||
|
||||
// CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
|
||||
|
||||
|
@ -230,9 +230,9 @@ translit_Greek_Latin {
|
|||
"μ <> m ;"
|
||||
"Μ <> M ;"
|
||||
|
||||
"ν } $gammaLike > n\' ;"
|
||||
"ν } $gammaLike > n\\\' ;"
|
||||
"ν <> n ;"
|
||||
"Ν } $gammaLike <> N\' ;"
|
||||
"Ν } $gammaLike <> N\\\' ;"
|
||||
"Ν <> N ;"
|
||||
|
||||
"ξ <> x ;"
|
||||
|
@ -252,8 +252,8 @@ translit_Greek_Latin {
|
|||
|
||||
// insert separator
|
||||
|
||||
"[Pp] { } ς > \' ;"
|
||||
"[Pp] { } σ > \' ;"
|
||||
"[Pp] { } ς > \\\' ;"
|
||||
"[Pp] { } σ > \\\' ;"
|
||||
|
||||
// Caron means exception
|
||||
|
||||
|
@ -269,7 +269,7 @@ translit_Greek_Latin {
|
|||
"ς <> s $caron;"
|
||||
"σ <> s ;"
|
||||
|
||||
"[Pp] { Σ <> \'S ;"
|
||||
"[Pp] { Σ <> \\\'S ;"
|
||||
"Σ <> S ;"
|
||||
|
||||
"τ <> t ;"
|
||||
|
@ -327,8 +327,8 @@ translit_Greek_Latin {
|
|||
|
||||
// delete any trailing ' marks used for roundtripping
|
||||
|
||||
"< [Ππ] { \' } [Ss] ;"
|
||||
"< [Νν] { \' } $egammaLike ;"
|
||||
"< [Ππ] { \\\' } [Ss] ;"
|
||||
"< [Νν] { \\\' } $egammaLike ;"
|
||||
|
||||
"::NFC (NFD) ;"
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
// THIS IS A MACHINE-GENERATED FILE
|
||||
// Tool: dumpICUrules.bat
|
||||
// Source: Transliterator_index.txt
|
||||
// Date: Thu Oct 25 22:17:22 2001
|
||||
// Date: Wed Nov 7 10:34:20 2001
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
//--------------------------------------------------------------------
|
||||
|
@ -22,7 +22,7 @@ translit_index {
|
|||
RuleBasedTransliteratorIDs {
|
||||
// Copyright (c) 2001, International Business Machines Corporation and
|
||||
// others. All Rights Reserved.
|
||||
//
|
||||
//
|
||||
// TRANSLITERATOR INDEX FILE. This file lists the non-algorithmic
|
||||
// system transliterators. It allows arbitrary mappings between
|
||||
// transliterator IDs and file names, and also allows the system to
|
||||
|
@ -31,29 +31,29 @@ translit_index {
|
|||
// "Latin-Jamo;Jamo-Hangul". Internal IDs may also be defined; these
|
||||
// are invisible to the user, but can be composed together by the
|
||||
// system to create visible transliterators.
|
||||
//
|
||||
//
|
||||
// Blank lines and lines beginning with '#' are ignored.
|
||||
//
|
||||
//
|
||||
// Lines in this file have one of the following forms (text not
|
||||
// enclosed by <> is literal):
|
||||
//
|
||||
//
|
||||
// <id>:file:<resource>:<encoding>:<direction>
|
||||
// <id>:internal:<resource>:<encoding>:<direction>
|
||||
// <id>:alias:<getInstanceArg>
|
||||
//
|
||||
//
|
||||
// <id> is the ID of the system transliterator being defined. These
|
||||
// are public IDs enumerated by Transliterator.getAvailableIDs(),
|
||||
// unless the second field is "internal".
|
||||
//
|
||||
//
|
||||
// <resource> is a ResourceReader resource name. Currently these refer
|
||||
// to file names under com/ibm/text/resources. This string is passed
|
||||
// directly to ResourceReader, together with <encoding>.
|
||||
//
|
||||
//
|
||||
// <encoding> is the character encoding to use when reading <resource>;
|
||||
// passed directly to ResourceReader. E.g., "UTF8".
|
||||
//
|
||||
//
|
||||
// <direction> is either "FORWARD" or "REVERSE".
|
||||
//
|
||||
//
|
||||
// <getInstanceArg> is a string to be passed directly to
|
||||
// Transliterator.getInstance(). The returned Transliterator object
|
||||
// then has its ID changed to <id> and is returned.
|
||||
|
@ -74,7 +74,7 @@ translit_index {
|
|||
{ "Greek-Latin/UNGEGN", "alias", "el-Latin/UNGEGN", "" },
|
||||
|
||||
{ "LowerLatin-Jamo", "internal", "translit_Latin_Jamo", "FORWARD" },
|
||||
{ "Latin-Jamo", "alias", "Any-Lower;LowerLatin-Jamo", "" },
|
||||
{ "Latin-Jamo", "alias", "[\\p{Latin}]Any-Lower;LowerLatin-Jamo", "" },
|
||||
{ "Jamo-Latin", "file", "translit_Latin_Jamo", "REVERSE" },
|
||||
|
||||
{ "Latin-Katakana", "file", "translit_Latin_Katakana", "FORWARD" },
|
||||
|
@ -100,8 +100,8 @@ translit_index {
|
|||
|
||||
// Compound rules
|
||||
|
||||
{ "Latin-Hangul", "alias", "[\p{Latin}];Latin-Jamo;[\u1100-\u11FF]NFC", "" },
|
||||
{ "Hangul-Latin", "alias", "[\uAC00-\uD7AF];NFD;Jamo-Latin", "" },
|
||||
{ "Latin-Hangul", "alias", "[\\p{Latin}];NFD;Latin-Jamo;NFC", "" },
|
||||
{ "Hangul-Latin", "alias", "[\\p{Hangul}];NFD;Jamo-Latin", "" },
|
||||
|
||||
// Inter-Indic composed rules
|
||||
{ "Latin-InterIndic", "internal", "translit_Latin_InterIndic", "FORWARD" },
|
||||
|
@ -127,15 +127,15 @@ translit_index {
|
|||
{ "InterIndic-Malayalam", "internal", "translit_InterIndic_Malayalam", "FORWARD" },
|
||||
|
||||
//Latin-Indic transliterators
|
||||
{ "Latin-Devanagari", "alias", "NFD;Latin-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Latin-Bengali", "alias", "NFD;Latin-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Latin-Gurmukhi", "alias", "NFD;Latin-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Latin-Gujarati", "alias", "NFD;Latin-InterIndic;InterIndic-Gujarati;NFC", "" },
|
||||
{ "Latin-Oriya", "alias", "NFD;Latin-InterIndic;InterIndic-Oriya;NFC", "" },
|
||||
{ "Latin-Tamil", "alias", "NFD;Latin-InterIndic;InterIndic-Tamil;NFC", "" },
|
||||
{ "Latin-Telugu", "alias", "NFD;Latin-InterIndic;InterIndic-Telugu;NFC", "" },
|
||||
{ "Latin-Kannada", "alias", "NFD;Latin-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
{ "Latin-Malayalam", "alias", "NFD;Latin-InterIndic;InterIndic-Malayalam;NFC", "" },
|
||||
{ "Latin-Devanagari", "alias", "NFD;[\\p{Latin}]Lower;Latin-InterIndic;InterIndic-Devanagari;NFC", "" },
|
||||
{ "Latin-Bengali", "alias", "NFD;[\\p{Latin}]Lower;Latin-InterIndic;InterIndic-Bengali;NFC", "" },
|
||||
{ "Latin-Gurmukhi", "alias", "NFD;[\\p{Latin}]Lower;Latin-InterIndic;InterIndic-Gurmukhi;NFC", "" },
|
||||
{ "Latin-Gujarati", "alias", "NFD;[\\p{Latin}]Lower;Latin-InterIndic;InterIndic-Gujarati;NFC", "" },
|
||||
{ "Latin-Oriya", "alias", "NFD;[\\p{Latin}]Lower;Latin-InterIndic;InterIndic-Oriya;NFC", "" },
|
||||
{ "Latin-Tamil", "alias", "NFD;[\\p{Latin}]Lower;Latin-InterIndic;InterIndic-Tamil;NFC", "" },
|
||||
{ "Latin-Telugu", "alias", "NFD;[\\p{Latin}]Lower;Latin-InterIndic;InterIndic-Telugu;NFC", "" },
|
||||
{ "Latin-Kannada", "alias", "NFD;[\\p{Latin}]Lower;Latin-InterIndic;InterIndic-Kannada;NFC", "" },
|
||||
{ "Latin-Malayalam", "alias", "NFD;[\\p{Latin}]Lower;Latin-InterIndic;InterIndic-Malayalam;NFC", "" },
|
||||
|
||||
//Indic-Latin transliterators
|
||||
{ "Devanagari-Latin", "alias", "NFD;Devanagari-InterIndic;InterIndic-Latin;NFC", "" },
|
||||
|
|
Loading…
Add table
Reference in a new issue