mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-11 08:01:32 +00:00
ICU-1830 fixed the diphthongs ending with upsilon;
also changed to file for UNGEGN X-SVN-Rev: 9276
This commit is contained in:
parent
105a570db4
commit
075d5c81a4
2 changed files with 248 additions and 2 deletions
|
@ -3,8 +3,8 @@
|
|||
# Corporation and others. All Rights Reserved.
|
||||
#--------------------------------------------------------------------
|
||||
# $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_Greek_Latin.txt,v $
|
||||
# $Date: 2002/03/14 23:28:34 $
|
||||
# $Revision: 1.20 $
|
||||
# $Date: 2002/07/21 08:39:23 $
|
||||
# $Revision: 1.21 $
|
||||
#--------------------------------------------------------------------
|
||||
|
||||
# Rules are predicated on running NFD first, and NFC afterwards
|
||||
|
@ -64,8 +64,12 @@ $beforeLetter = [[:M:]\']* [:L:] ;
|
|||
$beforeLower = $accent * $lower ;
|
||||
|
||||
$notLetter = [^[:L:][:M:]] ;
|
||||
$under = ̱;
|
||||
|
||||
# Fix punctuation
|
||||
# preserve original
|
||||
\: <> \: $under ;
|
||||
\? <> \? $under ;
|
||||
|
||||
\; <> \? ;
|
||||
· <> \: ;
|
||||
|
|
|
@ -0,0 +1,242 @@
|
|||
# Rules are predicated on running NFD first, and NFC afterwards
|
||||
# MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN
|
||||
# WARNING: need to add accents to both filters ###
|
||||
# :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩ] ;
|
||||
|
||||
# :: [\\u0000-\\u007F \· [:Greek:] [:nonspacing mark:]] ;
|
||||
::NFD (NFC) ;
|
||||
|
||||
# For modern Greek.
|
||||
|
||||
# Useful variables
|
||||
|
||||
$lower = [[:latin:][:greek:] & [:Ll:]] ;
|
||||
$upper = [[:latin:][:greek:] & [:Lu:]] ;
|
||||
$accent = [:M:] ;
|
||||
|
||||
$macron = ̄ ;
|
||||
$ddot = ̈ ;
|
||||
|
||||
$lcgvowel = [αεηιουω] ;
|
||||
$ucgvowel = [ΑΕΗΙΟΥΩ] ;
|
||||
$gvowel = [$lcgvowel $ucgvowel] ;
|
||||
$lcgvowelC = [$lcgvowel $accent] ;
|
||||
|
||||
$evowel = [aeiouyAEIOUY];
|
||||
$vowel = [ $evowel $gvowel] ;
|
||||
|
||||
$beforeLower = $accent * $lower ;
|
||||
|
||||
$gammaLike = [ΓΚΞΧγκξχϰ] ;
|
||||
$egammaLike = [GKXCgkxc] ;
|
||||
$smooth = ̓ ;
|
||||
$rough = ̔ ;
|
||||
$iotasub = ͅ ;
|
||||
|
||||
$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;
|
||||
|
||||
$under = ̱;
|
||||
|
||||
$caron = ̌;
|
||||
|
||||
$afterLetter = [:L:] [\'[:M:]]* ;
|
||||
$beforeLetter = [\'[:M:]]* [:L:] ;
|
||||
|
||||
# Fix punctuation
|
||||
|
||||
# preserve orginal
|
||||
\: <> \: $under ;
|
||||
\? <> \? $under ;
|
||||
|
||||
\; <> \? ;
|
||||
· <> \: ;
|
||||
|
||||
# Fix any ancient characters that creep in
|
||||
|
||||
͂ > ́ ;
|
||||
̂ > ́ ;
|
||||
̀ > ́ ;
|
||||
$smooth > ;
|
||||
$rough > ;
|
||||
$iotasub > ;
|
||||
ͺ > ;
|
||||
|
||||
# need to have these up here so the rules don't mask
|
||||
|
||||
η <> i $under ;
|
||||
Η <> I $under ;
|
||||
|
||||
Ψ } $beforeLower <> Ps ;
|
||||
Ψ <> PS ;
|
||||
ψ <> ps ;
|
||||
|
||||
ω <> o $under ;
|
||||
Ω <> O $under;
|
||||
|
||||
# at begining or end of word, convert mp to b
|
||||
|
||||
[^[:L:][:M:]] { μπ > b ;
|
||||
μπ } [^[:L:][:M:]] > b ;
|
||||
[^[:L:][:M:]] { [Μμ][Ππ] > B ;
|
||||
[Μμ][Ππ] } [^[:L:][:M:]] > B ;
|
||||
|
||||
μπ < b ;
|
||||
Μπ < B } $beforeLower ;
|
||||
ΜΠ < B ;
|
||||
|
||||
# handle diphthongs ending with upsilon
|
||||
|
||||
ου <> ou ;
|
||||
ΟΥ <> OU ;
|
||||
Ου <> Ou ;
|
||||
οΥ <> oU ;
|
||||
|
||||
$fmaker = [aeiAEI] $under ? ;
|
||||
$shiftForwardVowels = [:Mn:] ;
|
||||
|
||||
$fmaker { υ ( $shiftForwardVowels )* } $softener > $1 v $under ;
|
||||
υ $1 < ( $shiftForwardVowels )* v $under ;
|
||||
|
||||
$fmaker { υ ( $shiftForwardVowels )* } > $1 f $under;
|
||||
υ $1 < ( $shiftForwardVowels )* f $under ;
|
||||
|
||||
$fmaker { Υ } $softener <> V $under ;
|
||||
$fmaker { Υ <> U $under ;
|
||||
|
||||
υ <> y ;
|
||||
Υ <> Y ;
|
||||
|
||||
# NORMAL
|
||||
|
||||
α <> a ;
|
||||
Α <> A ;
|
||||
|
||||
β <> v ;
|
||||
Β <> V ;
|
||||
|
||||
γ } $gammaLike <> n } $egammaLike ;
|
||||
γ <> g ;
|
||||
Γ } $gammaLike <> N } $egammaLike ;
|
||||
Γ <> G ;
|
||||
|
||||
δ <> d ;
|
||||
Δ <> D ;
|
||||
|
||||
ε <> e ;
|
||||
Ε <> E ;
|
||||
|
||||
ζ <> z ;
|
||||
Ζ <> Z ;
|
||||
|
||||
θ <> th ;
|
||||
Θ } $beforeLower <> Th ;
|
||||
Θ <> TH ;
|
||||
|
||||
ι <> i ;
|
||||
Ι <> I ;
|
||||
|
||||
κ <> k ;
|
||||
Κ <> K ;
|
||||
|
||||
λ <> l ;
|
||||
Λ <> L ;
|
||||
|
||||
μ <> m ;
|
||||
Μ <> M ;
|
||||
|
||||
ν } $gammaLike > n\' ;
|
||||
ν <> n ;
|
||||
Ν } $gammaLike <> N\' ;
|
||||
Ν <> N ;
|
||||
|
||||
ξ <> x ;
|
||||
Ξ <> X ;
|
||||
|
||||
ο <> o ;
|
||||
Ο <> O ;
|
||||
|
||||
π <> p ;
|
||||
Π <> P ;
|
||||
|
||||
ρ <> r ;
|
||||
Ρ <> R ;
|
||||
|
||||
[Pp] { } ς > \' ;
|
||||
[Pp] { } σ > \' ;
|
||||
|
||||
# Caron means exception
|
||||
|
||||
# before a letter, initial
|
||||
ς } $beforeLetter <> s $under } $beforeLetter;
|
||||
σ } $beforeLetter <> s } $beforeLetter;
|
||||
|
||||
# otherwise, after a letter = final
|
||||
$afterLetter { σ <> $afterLetter { s $under;
|
||||
$afterLetter { ς <> $afterLetter { s ;
|
||||
|
||||
# otherwise (isolated) = initial
|
||||
ς <> s $under;
|
||||
σ <> s ;
|
||||
|
||||
[Pp] { Σ <> \'S ;
|
||||
Σ <> S ;
|
||||
|
||||
τ <> t ;
|
||||
Τ <> T ;
|
||||
|
||||
φ <> f ;
|
||||
Φ <> F ;
|
||||
|
||||
χ <> ch ;
|
||||
Χ } $beforeLower <> Ch ;
|
||||
Χ <> CH ;
|
||||
|
||||
# Completeness for ASCII
|
||||
|
||||
# $ignore = [[:Mark:]''] * ;
|
||||
|
||||
| ch < h ;
|
||||
| k < c ;
|
||||
| i < j ;
|
||||
| k < q ;
|
||||
| b < u } $vowel ;
|
||||
| b < w } $vowel ;
|
||||
| y < u ;
|
||||
| y < w ;
|
||||
|
||||
| Ch < H ;
|
||||
| K < C ;
|
||||
| I < J ;
|
||||
| K < Q ;
|
||||
| B < W } $vowel ;
|
||||
| B < U } $vowel ;
|
||||
| Y < W ;
|
||||
| Y < U ;
|
||||
|
||||
# Completeness for Greek
|
||||
|
||||
ϐ > | β ;
|
||||
ϑ > | θ ;
|
||||
ϒ > | Υ ;
|
||||
ϕ > | φ ;
|
||||
ϖ > | π ;
|
||||
|
||||
ϰ > | κ ;
|
||||
ϱ > | ρ ;
|
||||
ϲ > | σ ;
|
||||
ϳ > j ;
|
||||
ϴ > | Θ ;
|
||||
ϵ > | ε ;
|
||||
µ > | μ ;
|
||||
|
||||
# delete any trailing ' marks used for roundtripping
|
||||
|
||||
< [Ππ] { \' } [Ss] ;
|
||||
< [Νν] { \' } $egammaLike ;
|
||||
|
||||
::NFC (NFD) ;
|
||||
|
||||
# MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD
|
||||
# :: ( [́̄̆̈':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǡǦ-ǭǰǴ-ǵǸ-ǻȀ-țȞ-ȟȦ-ȳ̱Ḁ-ẙẠ-ỹK-Å] ) ;
|
||||
|
||||
# :: ([\\u0000-\\u007F [:Latin:] [:nonspacing mark:]]) ;
|
Loading…
Add table
Reference in a new issue