mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-16 18:25:57 +00:00
ICU-11446 Spoof Checker data update: use MA table only.
X-SVN-Rev: 37072
This commit is contained in:
parent
74157ec338
commit
56459a99d9
5 changed files with 8253 additions and 6757 deletions
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
***************************************************************************
|
||||
* Copyright (C) 2008-2014 International Business Machines Corporation
|
||||
* Copyright (C) 2008-2015 International Business Machines Corporation
|
||||
* and others. All Rights Reserved.
|
||||
***************************************************************************
|
||||
*
|
||||
|
@ -1809,6 +1809,15 @@ public class SpoofChecker {
|
|||
* Using skeletons directly makes it possible to quickly check whether an identifier is confusable with any of some
|
||||
* large set of existing identifiers, by creating an efficiently searchable collection of the skeletons.
|
||||
*
|
||||
* Skeletons are computed using the algorithm and data describe in Unicode UAX 39.
|
||||
* The latest proposed update, UAX 39 Version 8 draft 1, says "the tables SL, SA, and ML
|
||||
* were still problematic, and discouraged from use in [Uniocde] 7.0.
|
||||
* They were thus removed from version 8.0"
|
||||
*
|
||||
* In light of this, the default mapping data included with ICU 55 uses the
|
||||
* Unicode 7 MA (Multi script Any case) table data for the other type options
|
||||
* (Single Script, Any Case), (Single Script, Lower Case) and (Multi Script, Lower Case).
|
||||
*
|
||||
* @param type
|
||||
* The type of skeleton, corresponding to which of the Unicode confusable data tables to use. The default
|
||||
* is Mixed-Script, Lowercase. Allowed options are SINGLE_SCRIPT_CONFUSABLE and ANY_CASE_CONFUSABLE. The
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:847e0ccaa347c084b4f8a52871942bd2493d12e2675e831ee206e86a176da7ac
|
||||
size 11876902
|
||||
oid sha256:0d15d27af09b6d207302e051d429e949e7b137054e0fc2c7db5be89b3a43424e
|
||||
size 11868952
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c986657e2b3c0c646d4f51cc0332cbede2c984deab3864a42749f7ee76a7a95f
|
||||
oid sha256:bba5e69e2602c2a121977ede7224e4126ce04905831fb048bef10cb59ec822f2
|
||||
size 90574
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -407,39 +407,42 @@ public class SpoofCheckerTest extends TestFmwk {
|
|||
|
||||
checkSkeleton(sc, MA, "\\u02b9identifier'", "'identifier'", testName);
|
||||
|
||||
checkSkeleton(sc, SL, "nochange", "\\u213C\\u2134\\U0001D41C\\u210E\\u237A\\u213C\\u210A\\u212E", testName);
|
||||
checkSkeleton(sc, SA, "nochange", "\\u213C\\u2134\\U0001D41C\\u210E\\u237A\\u213C\\u210A\\u212E", testName);
|
||||
checkSkeleton(sc, ML, "nochange", "\\u213C\\u2134\\U0001D41C\\u210E\\u237A\\u213C\\u210A\\u212E", testName);
|
||||
checkSkeleton(sc, SL, "nochange", "nochange", testName);
|
||||
checkSkeleton(sc, SA, "nochange", "nochange", testName);
|
||||
checkSkeleton(sc, ML, "nochange", "nochange", testName);
|
||||
checkSkeleton(sc, MA, "nochange", "nochange", testName);
|
||||
checkSkeleton(sc, MA, "love", "love", testName);
|
||||
checkSkeleton(sc, MA, "1ove", "love", testName); // Digit 1 to letter l
|
||||
checkSkeleton(sc, ML, "OOPS", "OOPS", testName);
|
||||
checkSkeleton(sc, ML, "00PS", "00PS", testName); // Digit 0 unchanged in lower case mode.
|
||||
checkSkeleton(sc, ML, "00PS", "OOPS", testName);
|
||||
checkSkeleton(sc, MA, "OOPS", "OOPS", testName);
|
||||
checkSkeleton(sc, MA, "00PS", "OOPS", testName); // Digit 0 to letter O in any case mode only
|
||||
checkSkeleton(sc, MA, "00PS", "OOPS", testName); // Digit 0 to letter O
|
||||
checkSkeleton(sc, SL, "\\u059c", "\\u0301", testName);
|
||||
checkSkeleton(sc, SL, "\\u2A74", "\\u003A\\u003A\\u003D", testName);
|
||||
checkSkeleton(sc, SL, "\\u247E", "\\u0028\\u0031\\u0031\\u0029", testName); // "(11)"
|
||||
checkSkeleton(sc, SL, "\\uFDFB", "\\u062C\\u0644\\u0020\\u062C\\u0644\\u0031\\u0644\\u2134", testName);
|
||||
checkSkeleton(sc, SL, "\\u247E", "(ll)", testName);
|
||||
checkSkeleton(sc, SL, "\\uFDFB", "\\u062C\\u0644\\u0020\\u062C\\u0644\\u006c\\u0644\\u006f", testName);
|
||||
|
||||
// This mapping exists in the ML and MA tables, does not exist in SL, SA
|
||||
// 0C83 mapping existed in the ML and MA tables, did not exist in SL, SA (Original Unicode 7)
|
||||
// mapping exists in all tables (ICU 55).
|
||||
// 0C83 ; 0983 ; ML # KANNADA SIGN VISARGA to
|
||||
checkSkeleton(sc, SL, "\\u0C83", "\\u0C83", testName);
|
||||
checkSkeleton(sc, SA, "\\u0C83", "\\u0C83", testName);
|
||||
checkSkeleton(sc, SL, "\\u0C83", "\\u0983", testName);
|
||||
checkSkeleton(sc, SA, "\\u0C83", "\\u0983", testName);
|
||||
checkSkeleton(sc, ML, "\\u0C83", "\\u0983", testName);
|
||||
checkSkeleton(sc, MA, "\\u0C83", "\\u0983", testName);
|
||||
|
||||
// 0391 mappings exist only in MA and SA tables.
|
||||
// 0391 mappings existed only in MA and SA tables (Original Unicode 7).
|
||||
// mappings exist in all tables (ICU 55)
|
||||
checkSkeleton(sc, MA, "\\u0391", "A", testName);
|
||||
checkSkeleton(sc, SA, "\\u0391", "\\U0001D400", testName);
|
||||
checkSkeleton(sc, ML, "\\u0391", "\\u0391", testName);
|
||||
checkSkeleton(sc, SL, "\\u0391", "\\u0391", testName);
|
||||
checkSkeleton(sc, SA, "\\u0391", "A", testName);
|
||||
checkSkeleton(sc, ML, "\\u0391", "A", testName);
|
||||
checkSkeleton(sc, SL, "\\u0391", "A", testName);
|
||||
|
||||
// 13CF Mappings in all four tables, different in MA.
|
||||
checkSkeleton(sc, ML, "\\u13CF", "\\U0001D41B", testName);
|
||||
// 13CF Mappings in all four tables, different in MA (Original Unicode 7).
|
||||
// Mapping same in all tables (ICU 55)
|
||||
checkSkeleton(sc, ML, "\\u13CF", "b", testName);
|
||||
checkSkeleton(sc, MA, "\\u13CF", "b", testName);
|
||||
checkSkeleton(sc, SL, "\\u13CF", "\\U0001D41B", testName);
|
||||
checkSkeleton(sc, SA, "\\u13CF", "\\U0001D41B", testName);
|
||||
checkSkeleton(sc, SL, "\\u13CF", "b", testName);
|
||||
checkSkeleton(sc, SA, "\\u13CF", "b", testName);
|
||||
|
||||
// 0022 ; 0027 0027 ;
|
||||
// all tables
|
||||
|
|
Loading…
Add table
Reference in a new issue