mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
no message
X-SVN-Rev: 14687
This commit is contained in:
parent
77c134dc38
commit
5c397b73b3
19 changed files with 1269 additions and 342 deletions
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA_Data.java,v $
|
||||
* $Date: 2002/07/14 22:07:00 $
|
||||
* $Revision: 1.1 $
|
||||
* $Date: 2004/03/11 19:03:19 $
|
||||
* $Revision: 1.2 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -27,7 +27,7 @@ import com.ibm.icu.text.UTF16;
|
|||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
public class UCA_Data implements UCA_Types {
|
||||
static final boolean DEBUG = true;
|
||||
static final boolean DEBUG = false;
|
||||
static final boolean DEBUG_SHOW_ADD = false;
|
||||
|
||||
private Normalizer toD;
|
||||
|
|
|
@ -1,7 +1,4 @@
|
|||
# Correlated with Unicode 4.0
|
||||
# Note: The casing of block names is not normative.
|
||||
# For example, "Basic Latin" and "BASIC LATIN" are equivalent.
|
||||
#
|
||||
# Code points not explicitly listed in this file are given the value No_Block.
|
||||
#
|
||||
# Note: The casing of block names is not normative.
|
||||
# For example, "Basic Latin" and "BASIC LATIN" are equivalent.
|
||||
# Format:
|
||||
# Start Code..End Code; Block Name
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/BuildNames.java,v $
|
||||
* $Date: 2004/02/07 01:01:17 $
|
||||
* $Revision: 1.8 $
|
||||
* $Date: 2004/03/11 19:03:18 $
|
||||
* $Revision: 1.9 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -26,7 +26,7 @@ import com.ibm.text.utility.*;
|
|||
|
||||
public class BuildNames implements UCD_Types {
|
||||
|
||||
static final boolean DEBUG = true;
|
||||
static final boolean DEBUG = false;
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
collectWords();
|
||||
|
|
|
@ -59,7 +59,7 @@ public class CheckICU {
|
|||
toolFactory = ToolUnicodePropertySource.make("4.0.0");
|
||||
|
||||
String[] quickList = {
|
||||
"Canonical_Combining_Class",
|
||||
// "Canonical_Combining_Class",
|
||||
// "Script", "Bidi_Mirroring_Glyph", "Case_Folding",
|
||||
//"Numeric_Value"
|
||||
};
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/ConvertUCD.java,v $
|
||||
* $Date: 2004/02/12 08:23:17 $
|
||||
* $Revision: 1.14 $
|
||||
* $Date: 2004/03/11 19:03:18 $
|
||||
* $Revision: 1.15 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -305,6 +305,7 @@ public final class ConvertUCD implements UCD_Types {
|
|||
value.compact();
|
||||
}
|
||||
|
||||
/*
|
||||
UData ud;
|
||||
ud = getEntry(0x5e);
|
||||
System.out.println("SPOT-CHECK: 5e: " + ud);
|
||||
|
@ -320,6 +321,7 @@ public final class ConvertUCD implements UCD_Types {
|
|||
|
||||
ud = getEntry(0xFFFF);
|
||||
System.out.println("SPOT-CHECK: FFFF: " + ud);
|
||||
*/
|
||||
|
||||
writeJavaData();
|
||||
}
|
||||
|
@ -410,7 +412,7 @@ public final class ConvertUCD implements UCD_Types {
|
|||
|
||||
int count = Utility.split(line,';',parts);
|
||||
|
||||
if (parts[0].equals("2801")) {
|
||||
if (false && parts[0].equals("2801")) {
|
||||
System.out.println("debug?");
|
||||
}
|
||||
|
||||
|
@ -468,7 +470,7 @@ public final class ConvertUCD implements UCD_Types {
|
|||
if (end == 0) end = cpStart;
|
||||
|
||||
for (int j = cpStart; j <= end; ++j) {
|
||||
if (j != UCD.mapToRepresentative(j, false)) continue;
|
||||
if (j != UCD.mapToRepresentative(j, Integer.MAX_VALUE)) continue;
|
||||
if (skipLetters && getEntry(cpStart).isLetter()) continue;
|
||||
appendCharProperties(j, prop);
|
||||
}
|
||||
|
@ -490,7 +492,7 @@ public final class ConvertUCD implements UCD_Types {
|
|||
if (val.equals("")) continue; // skip empty values, they mean default
|
||||
|
||||
for (int cps = cpStart; cps <= cpTop; ++cps) {
|
||||
if (UCD.mapToRepresentative(cps, false) != cps) continue; // skip condensed ranges
|
||||
if (UCD.mapToRepresentative(cps, Integer.MAX_VALUE) != cps) continue; // skip condensed ranges
|
||||
|
||||
if (key.equals("binary")) {
|
||||
appendCharProperties(cps, val);
|
||||
|
@ -508,7 +510,7 @@ public final class ConvertUCD implements UCD_Types {
|
|||
if (type.equals("I")) {
|
||||
data.simpleCaseFolding = val;
|
||||
setBinaryProperty(cps, CaseFoldTurkishI);
|
||||
System.out.println("SPOT-CHECK: <" + parts[i-1] + "> Setting "
|
||||
if (DEBUG) System.out.println("SPOT-CHECK: <" + parts[i-1] + "> Setting "
|
||||
+ Utility.hex(cps) + ": " + Utility.hex(val));
|
||||
}
|
||||
} else if (labels[0].equals("SpecialCasing") // special handling for special casing
|
||||
|
@ -658,7 +660,7 @@ public final class ConvertUCD implements UCD_Types {
|
|||
System.out.println("Warning: NULL name\r\n" + uData);
|
||||
System.out.println();
|
||||
}
|
||||
if (uData.codePoint == 0x2801) {
|
||||
if (false && uData.codePoint == 0x2801) {
|
||||
System.out.println("SPOT-CHECK: " + uData);
|
||||
}
|
||||
uData.writeBytes(dataOut);
|
||||
|
|
|
@ -1,16 +1,29 @@
|
|||
#
|
||||
# Unicode Character Database: Derived Property Data
|
||||
# This file shows when various code points were designated in Unicode
|
||||
# This file shows when various code points were first assigned in Unicode.
|
||||
#
|
||||
# Caution: When using the Age *property*, all assigned code points
|
||||
# in each version are included, not just the newly assigned code points.
|
||||
# For more information, see http://www.unicode.org/reports/tr18/
|
||||
#
|
||||
# Notes:
|
||||
# - The term 'designated' means that a previously reserved code point was specified
|
||||
# to be a noncharacter or surrogate, or assigned as a character,
|
||||
# control or format code.
|
||||
#
|
||||
# - The term 'assigned' means that a previously reserved code point was assigned
|
||||
# to be a character (graphic, format, control, or private-use);
|
||||
# a noncharacter code point; or a surrogate code point.
|
||||
# For more information, see The Unicode Standard Section 2.4
|
||||
#
|
||||
# - Versions are only tracked from 1.1 onwards, since version 1.0
|
||||
# predated changes required by the ISO 10646 merger.
|
||||
#
|
||||
# - The Hangul Syllables that were removed from 2.0 are not included in the 1.1 listing.
|
||||
#
|
||||
# - The supplementary private use code points and the non-character code points
|
||||
# were designated in version 2.0, but not specifically listed in the UCD
|
||||
# were assigned in version 2.0, but not specifically listed in the UCD
|
||||
# until versions 3.0 and 3.1 respectively.
|
||||
#
|
||||
# - Contiguous ranges are broken into separate lines where they would cross code point
|
||||
# types: graphic, format, control, private-use, surrogate, noncharacter
|
||||
#
|
||||
# For details on the contents of each version, see
|
||||
# http://www.unicode.org/versions/enumeratedversions.html.
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/DerivedProperty.java,v $
|
||||
* $Date: 2004/02/18 03:08:59 $
|
||||
* $Revision: 1.25 $
|
||||
* $Date: 2004/03/11 19:03:17 $
|
||||
* $Revision: 1.26 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -378,7 +378,7 @@ public final class DerivedProperty implements UCD_Types {
|
|||
shortName = "IDC";
|
||||
header = "# Derived Property: " + name
|
||||
+ "\r\n# Characters that can continue an identifier."
|
||||
+ "\r\n# Generated from: ID_Start + Mn+Mc+Nd+Pc"
|
||||
+ "\r\n# Generated from: ID_Start + Mn+Mc+Nd+Pc + Other_ID_Continue"
|
||||
+ "\r\n# NOTE: Cf characters should be filtered out.";
|
||||
}
|
||||
public boolean hasValue(int cp) {
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateData.java,v $
|
||||
* $Date: 2004/02/18 03:08:59 $
|
||||
* $Revision: 1.34 $
|
||||
* $Date: 2004/03/11 19:03:17 $
|
||||
* $Revision: 1.35 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -110,8 +110,8 @@ public class GenerateData implements UCD_Types {
|
|||
output.println("# Generated algorithmically from the Unicode Character Database");
|
||||
}
|
||||
output.println("# For documentation, see UCD.html");
|
||||
output.println("# Note: Unassigned and Noncharacter codepoints may be omitted");
|
||||
output.println("# if they have default property values.");
|
||||
//output.println("# Note: Unassigned and Noncharacter codepoints may be omitted");
|
||||
//output.println("# if they have default property values.");
|
||||
output.println(HORIZONTAL_LINE);
|
||||
output.println();
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
355
tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.txt
Normal file
355
tools/unicodetools/com/ibm/text/UCD/MakeUnicodeFiles.txt
Normal file
|
@ -0,0 +1,355 @@
|
|||
Generate: DerivedCoreProperties
|
||||
DeltaVersion: 11
|
||||
|
||||
File: Blocks
|
||||
Property: Block
|
||||
Format: valueList
|
||||
|
||||
File: CaseFolding
|
||||
Property: SPECIAL
|
||||
|
||||
File: DerivedAge
|
||||
Property: Age
|
||||
Format: nameStyle=none noLabel skipValue=unassigned
|
||||
|
||||
Value: 1.1
|
||||
# Assigned as of Unicode 1.1.0 (June, 1993)
|
||||
# [excluding removed Hangul Syllables]
|
||||
|
||||
Value: 2.0
|
||||
# Newly assigned in Unicode 2.0.0 (July, 1996)
|
||||
|
||||
Value: 2.1
|
||||
# Newly assigned in Unicode 2.1.2 (May, 1998)
|
||||
|
||||
Value: 3.0
|
||||
# Newly assigned in Unicode 3.0.0 (September, 1999)
|
||||
|
||||
Value: 3.1
|
||||
# Newly assigned in Unicode 3.1.0 (March, 2001)
|
||||
|
||||
Value: 3.2
|
||||
# Newly assigned in Unicode 3.2.0 (March, 2002)
|
||||
|
||||
Value: 4.0
|
||||
# Newly assigned in Unicode 4.0.0 (April, 2003)
|
||||
|
||||
File: extracted/DerivedBidiClass
|
||||
Property: Bidi_Class
|
||||
# Bidi Class (listing UnicodeData.txt, field 4: see UCD.html)
|
||||
Format: valueStyle=short skipUnassigned=Left_To_Right
|
||||
|
||||
File: extracted/DerivedBinaryProperties
|
||||
Property: Bidi_Mirrored
|
||||
# Bidi_Mirrored (listing UnicodeData.txt, field 9: see UCD.html)
|
||||
|
||||
File: extracted/DerivedCombiningClass
|
||||
Property: Canonical_Combining_Class
|
||||
# Combining Class (listing UnicodeData.txt, field 3: see UCD.html)
|
||||
# All code points not explicitly listed in this file have the property
|
||||
# value: 0.
|
||||
Format: nameStyle=none valueStyle=short skipUnassigned=Not_Reordered
|
||||
|
||||
File: DerivedCoreProperties
|
||||
Property: Math
|
||||
# Derived Property: Math
|
||||
# Generated from: Sm + Other_Math
|
||||
|
||||
Property: Alphabetic
|
||||
# Derived Property: Alphabetic
|
||||
# Generated from: Lu+Ll+Lt+Lm+Lo+Nl + Other_Alphabetic
|
||||
|
||||
|
||||
Property: Lowercase
|
||||
# Derived Property: Lowercase
|
||||
# Generated from: Ll + Other_Lowercase
|
||||
|
||||
|
||||
Property: Uppercase
|
||||
# Derived Property: Uppercase
|
||||
# Generated from: Lu + Other_Uppercase
|
||||
|
||||
|
||||
Property: ID_Start
|
||||
# Derived Property: ID_Start
|
||||
# Characters that can start an identifier.
|
||||
# Generated from Lu+Ll+Lt+Lm+Lo+Nl+Other_ID_Start
|
||||
|
||||
|
||||
Property: ID_Continue
|
||||
# Derived Property: ID_Continue
|
||||
# Characters that can continue an identifier.
|
||||
# Generated from: ID_Start + Mn+Mc+Nd+Pc
|
||||
# NOTE: Cf characters should be filtered out.
|
||||
|
||||
|
||||
Property: XID_Start
|
||||
# Derived Property: XID_Start
|
||||
# ID_Start modified for closure under NFKx
|
||||
# Modified as described in UAX #15
|
||||
# NOTE: Does NOT remove the non-NFKx characters.
|
||||
# Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))
|
||||
|
||||
|
||||
Property: XID_Continue
|
||||
# Derived Property: XID_Continue
|
||||
# Mod_ID_Continue modified for closure under NFKx
|
||||
# Modified as described in UAX #15
|
||||
# NOTE: Cf characters should be filtered out.
|
||||
# NOTE: Does NOT remove the non-NFKx characters.
|
||||
# Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string))
|
||||
|
||||
|
||||
Property: Default_Ignorable_Code_Point
|
||||
# Derived Property: Default_Ignorable_Code_Point
|
||||
# Generated from Other_Default_Ignorable_Code_Point + Cf + Cc + Cs - White_Space
|
||||
|
||||
|
||||
Property: Grapheme_Extend
|
||||
# Derived Property: Grapheme_Extend
|
||||
# Generated from: Me + Mn + Other_Grapheme_Extend
|
||||
# Note: depending on an application's interpretation of Co (private use),
|
||||
# they may be either in Grapheme_Base, or in Grapheme_Extend, or in neither.
|
||||
|
||||
|
||||
Property: Grapheme_Base
|
||||
# Derived Property: Grapheme_Base
|
||||
# Generated from: [0..10FFFF] - Cc - Cf - Cs - Co - Cn - Zl - Zp - Grapheme_Extend
|
||||
# Note: depending on an application's interpretation of Co (private use),
|
||||
# they may be either in Grapheme_Base, or in Grapheme_Extend, or in neither.
|
||||
|
||||
|
||||
File: extracted/DerivedDecompositionType
|
||||
Property: Decomposition_Type
|
||||
Format: skipValue=None
|
||||
# Decomposition_Type (from UnicodeData.txt, field 5: see UCD.html)
|
||||
|
||||
File: extracted/DerivedEastAsianWidth
|
||||
Property: East_Asian_Width
|
||||
Format: valueStyle=short skipUnassigned=Neutral
|
||||
# East_Asian_Width (listing EastAsianWidth.txt, field 1)
|
||||
|
||||
File: extracted/DerivedGeneralCategory
|
||||
Property: General_Category
|
||||
Format: valueStyle=short noLabel
|
||||
|
||||
File: extracted/DerivedJoiningGroup
|
||||
Property: Joining_Group
|
||||
# Joining Group (listing ArabicShaping.txt, field 3)
|
||||
Format: skipValue=No_Joining_Group
|
||||
|
||||
File: extracted/DerivedJoiningType
|
||||
Property: Joining_Type
|
||||
# Type T is derived, as described in ArabicShaping.txt
|
||||
Format: valueStyle=short skipValue=Non_Joining
|
||||
|
||||
File: extracted/DerivedLineBreak
|
||||
Property: Line_Break
|
||||
Format: valueStyle=short skipUnassigned=Unknown
|
||||
|
||||
File: DerivedNormalizationProps
|
||||
|
||||
Property: FC_NFKC_Closure
|
||||
# Derived Property: FC_NFKC_Closure
|
||||
# Generated from computing: b = NFKC(Fold(a)); c = NFKC(Fold(b));
|
||||
# Then if (c != b) add the mapping from a to c to the set of
|
||||
# mappings that constitute the FC_NFKC_Closure list
|
||||
# Uses the full case folding from CaseFolding.txt, without the T option.
|
||||
Format: nameStyle=short
|
||||
|
||||
|
||||
Property: Full_Composition_Exclusion
|
||||
# Derived Property: Full_Composition_Exclusion
|
||||
# Generated from: Composition Exclusions + Singletons + Non-Starter Decompositions
|
||||
|
||||
|
||||
Property: NFD_QuickCheck
|
||||
# Derived Property: NFD_QuickCheck
|
||||
# Generated from computing decomposibles
|
||||
Format: nameStyle=short valueStyle=short skipValue=Yes
|
||||
|
||||
|
||||
Property: NFC_QuickCheck
|
||||
# Derived Property: NFC_QuickCheck
|
||||
# Generated from computing decomposibles (and characters that may compose with previous ones)
|
||||
Format: nameStyle=short valueStyle=short skipValue=Yes
|
||||
|
||||
Property: NFKD_QuickCheck
|
||||
# Derived Property: NFKD_QuickCheck
|
||||
# Generated from computing decomposibles
|
||||
Format: nameStyle=short valueStyle=short skipValue=Yes
|
||||
|
||||
|
||||
Property: NFKC_QuickCheck
|
||||
# Derived Property: NFKC_QuickCheck
|
||||
# Generated from computing decomposibles (and characters that may compose with previous ones)
|
||||
Format: nameStyle=short valueStyle=short skipValue=Yes
|
||||
|
||||
Property: Expands_On_NFD
|
||||
# Derived Property: Expands_On_NFD
|
||||
# Generated according to UAX #15.
|
||||
# Characters whose normalized length is not one.
|
||||
# WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
|
||||
# The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!
|
||||
|
||||
|
||||
Property: Expands_On_NFC
|
||||
# Derived Property: Expands_On_NFC
|
||||
# Generated according to UAX #15.
|
||||
# Characters whose normalized length is not one.
|
||||
# WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
|
||||
# The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!
|
||||
|
||||
|
||||
Property: Expands_On_NFKD
|
||||
# Derived Property: Expands_On_NFKD
|
||||
# Generated according to UAX #15.
|
||||
# Characters whose normalized length is not one.
|
||||
# WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
|
||||
# The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!
|
||||
|
||||
|
||||
Property: Expands_On_NFKC
|
||||
# Derived Property: Expands_On_NFKC
|
||||
# Generated according to UAX #15.
|
||||
# Characters whose normalized length is not one.
|
||||
# WARNING: Normalization of STRINGS must use the algorithm in UAX #15 because characters may interact.
|
||||
# The length of a normalized string is not necessarily the sum of the lengths of the normalized characters!
|
||||
|
||||
|
||||
File: extracted/DerivedNumericType
|
||||
Property: Numeric_Type
|
||||
# Numeric Type (from UnicodeData.txt, field 6/7/8 plus Unihan.txt: see UCD.html)
|
||||
Format: skipValue=None
|
||||
|
||||
File: extracted/DerivedNumericValues
|
||||
Property: Numeric_Value
|
||||
# Numeric Values (from UnicodeData.txt, field 6/7/8)
|
||||
# WARNING: Certain valus, such as 0.16666667, are repeating fractions
|
||||
# Although they are only printed with a limited number of decimal places
|
||||
# in this file, they should be expressed to the limits of the precision
|
||||
# available when used.
|
||||
Format: sortNumeric
|
||||
|
||||
File: HangulSyllableType
|
||||
Property: Hangul_Syllable_Type
|
||||
Format: valueStyle=short skipValue=Not_Applicable
|
||||
|
||||
File: NormalizationTest
|
||||
Property: SPECIAL
|
||||
|
||||
File: PropList
|
||||
|
||||
Property: White_Space
|
||||
|
||||
Property: Bidi_Control
|
||||
|
||||
Property: Join_Control
|
||||
|
||||
Property: Dash
|
||||
|
||||
Property: Hyphen
|
||||
|
||||
Property: Quotation_Mark
|
||||
|
||||
Property: Terminal_Punctuation
|
||||
|
||||
Property: Other_Math
|
||||
|
||||
Property: Hex_Digit
|
||||
|
||||
Property: ASCII_Hex_Digit
|
||||
|
||||
Property: Other_Alphabetic
|
||||
|
||||
Property: Ideographic
|
||||
|
||||
Property: Diacritic
|
||||
|
||||
Property: Extender
|
||||
|
||||
Property: Other_Lowercase
|
||||
|
||||
Property: Other_Uppercase
|
||||
|
||||
Property: Noncharacter_Code_Point
|
||||
|
||||
Property: Other_Grapheme_Extend
|
||||
|
||||
Property: Grapheme_Link
|
||||
|
||||
Property: IDS_Binary_Operator
|
||||
|
||||
Property: IDS_Trinary_Operator
|
||||
|
||||
Property: Radical
|
||||
|
||||
Property: Unified_Ideograph
|
||||
|
||||
Property: Other_Default_Ignorable_Code_Point
|
||||
|
||||
Property: Deprecated
|
||||
|
||||
Property: Soft_Dotted
|
||||
|
||||
Property: Logical_Order_Exception
|
||||
|
||||
Property: Other_ID_Start
|
||||
|
||||
Property: Other_ID_Continue
|
||||
|
||||
Property: STerm
|
||||
|
||||
Property: Variation_Selector
|
||||
|
||||
File: PropertyAliases
|
||||
Property: SPECIAL
|
||||
|
||||
File: PropertyValueAliases
|
||||
Property: SPECIAL
|
||||
|
||||
File: Scripts
|
||||
|
||||
Property: Script
|
||||
Format: nameStyle=none skipUnassigned=Common
|
||||
|
||||
File: SpecialCasing
|
||||
Property: SPECIAL
|
||||
|
||||
File: StandardizedVariants
|
||||
Property: SPECIAL
|
||||
|
||||
HackName: noBreak
|
||||
HackName: Arabic_Presentation_Forms-A
|
||||
HackName: Arabic_Presentation_Forms-B
|
||||
HackName: CJK_Symbols_and_Punctuation
|
||||
HackName: Combining_Diacritical_Marks_for_Symbols
|
||||
HackName: Enclosed_CJK_Letters_and_Months
|
||||
HackName: Greek_and_Coptic
|
||||
HackName: Halfwidth_and_Fullwidth_Forms
|
||||
HackName: Latin-1_Supplement
|
||||
HackName: Latin_Extended-A
|
||||
HackName: Latin_Extended-B
|
||||
HackName: Miscellaneous_Mathematical_Symbols-A
|
||||
HackName: Miscellaneous_Mathematical_Symbols-B
|
||||
HackName: Miscellaneous_Symbols_and_Arrows
|
||||
HackName: Superscripts_and_Subscripts
|
||||
HackName: Supplemental_Arrows-A
|
||||
HackName: Supplemental_Arrows-B
|
||||
HackName: Supplementary_Private_Use_Area-A
|
||||
HackName: Supplementary_Private_Use_Area-B
|
||||
HackName: Canadian-Aboriginal
|
||||
HackName: Old-Italic
|
||||
|
||||
FinalComments
|
||||
Note that PropertyAliases sorts by the long name, while PropertyValueAliases
|
||||
sorts by the short name
|
||||
ArabicShaping
|
||||
BidiMirroring
|
||||
CompositionExclusions
|
||||
EastAsianWidth
|
||||
LineBreak
|
||||
StandardizedVariants
|
||||
UnicodeData
|
||||
|
||||
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/MyFloatLister.java,v $
|
||||
* $Date: 2003/03/12 16:01:26 $
|
||||
* $Revision: 1.5 $
|
||||
* $Date: 2004/03/11 19:03:17 $
|
||||
* $Revision: 1.6 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -40,7 +40,7 @@ class MyFloatLister extends PropertyLister {
|
|||
public byte status(int cp) {
|
||||
//if ((cp & 0xFFF) == 0) System.out.println("# " + Utility.hex(cp));
|
||||
if (false && !ucdData.isRepresented(cp)) {
|
||||
if (ucdData.mapToRepresentative(cp, false) != cp) return PropertyLister.CONTINUE;
|
||||
if (ucdData.mapToRepresentative(cp, ucdData.getCompositeVersion()) != cp) return PropertyLister.CONTINUE;
|
||||
return PropertyLister.CONTINUE;
|
||||
}
|
||||
if (ucdData.getCategory(cp) == Cn) return PropertyLister.CONTINUE;
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
# This file contains aliases for properties used in the UCD.
|
||||
# These names can be used for XML formats of UCD data, for regular-expression
|
||||
# property tests, and other programmatic textual descriptions of Unicode data.
|
||||
# For information on which properties are normative, see UCD.html.
|
||||
#
|
||||
# The names may be translated in appropriate environments, and additional
|
||||
# aliases may be useful.
|
||||
#
|
||||
# FORMAT
|
||||
#
|
||||
# Each line has two or more fields, separated by semicolons.
|
||||
#
|
||||
# First Field: The first field is an abbreviated name for the property.
|
||||
#
|
||||
# Second Field: The second field is a long name
|
||||
#
|
||||
# The above are the preferred aliases. Other aliases may be listed in additional fields.
|
||||
#
|
||||
# Loose matching should be applied to all property names and property values, with
|
||||
# the exception of String Property values. With loose matching of property names and
|
||||
# values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property
|
||||
# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1".
|
||||
#
|
||||
# NOTE: Property value names are NOT unique across properties. For example:
|
||||
#
|
||||
# AL means Arabic Letter for the Bidi_Class property, and
|
||||
# AL means Alpha_Left for the Combining_Class property, and
|
||||
# AL means Alphabetic for the Line_Break property.
|
||||
#
|
||||
# In addition, some property names may be the same as some property value names.
|
||||
# For example:
|
||||
#
|
||||
# sc means the Script property, and
|
||||
# Sc means the General_Category property value Currency_Symbol (Sc)
|
||||
#
|
||||
# The combination of property value and property name is, however, unique.
|
||||
#
|
||||
# For more information, see UTS #18: Regular Expression Guidelines
|
||||
# ================================================
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
# This file contains aliases for property values used in the UCD.
|
||||
# These names can be used for XML formats of UCD data, for regular-expression
|
||||
# property tests, and other programmatic textual descriptions of Unicode data.
|
||||
# For information on which properties are normative, see UCD.html.
|
||||
#
|
||||
# The names may be translated in appropriate environments, and additional
|
||||
# aliases may be useful.
|
||||
#
|
||||
# FORMAT
|
||||
#
|
||||
# Each line describes a property value name.
|
||||
# This consists of three or more fields, separated by semicolons.
|
||||
#
|
||||
# First Field: The first field describes the property for which that
|
||||
# property value name is used.
|
||||
#
|
||||
# Second Field: The second field is an abbreviated name.
|
||||
# If there is no abbreviated name available, the field is marked with "n/a".
|
||||
#
|
||||
# Third Field: The third field is a long name.
|
||||
#
|
||||
# In the case of ccc, there are 4 fields. The second field is numeric, third
|
||||
# is abbreviated, and fourth is long.
|
||||
#
|
||||
# The above are the preferred aliases. Other aliases may be listed in additional fields.
|
||||
#
|
||||
# Loose matching should be applied to all property names and property values, with
|
||||
# the exception of String Property values. With loose matching of property names and
|
||||
# values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property
|
||||
# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1".
|
||||
#
|
||||
# NOTE: Property value names are NOT unique across properties. For example:
|
||||
#
|
||||
# AL means Arabic Letter for the Bidi_Class property, and
|
||||
# AL means Alpha_Left for the Combining_Class property, and
|
||||
# AL means Alphabetic for the Line_Break property.
|
||||
#
|
||||
# In addition, some property names may be the same as some property value names.
|
||||
# For example:
|
||||
#
|
||||
# sc means the Script property, and
|
||||
# Sc means the General_Category property value Currency_Symbol (Sc)
|
||||
#
|
||||
# The combination of property value and property name is, however, unique.
|
||||
#
|
||||
# For more information, see UTS #18: Regular Expression Guidelines
|
||||
# ================================================
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
package com.ibm.text.UCD;
|
||||
|
||||
import java.text.NumberFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
|
@ -53,7 +54,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
public String _getValue(int codepoint) {
|
||||
if (codepoint == 0x1D100) {
|
||||
if (DEBUG && codepoint == 0x1D100) {
|
||||
System.out.println("here");
|
||||
}
|
||||
//if ((ODD_BALLS & ucd.getCategoryMask(codepoint)) != 0) return null;
|
||||
|
@ -82,10 +83,17 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
.setValues("<string>"));
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
NumberFormat nf = NumberFormat.getInstance();
|
||||
{
|
||||
nf.setGroupingUsed(false);
|
||||
nf.setMaximumFractionDigits(8);
|
||||
nf.setMinimumFractionDigits(1);
|
||||
}
|
||||
public String _getValue(int codepoint) {
|
||||
|
||||
double num = ucd.getNumericValue(codepoint);
|
||||
if (Double.isNaN(num)) return null;
|
||||
return Double.toString(num);
|
||||
return nf.format(num);
|
||||
}
|
||||
}.setMain("Numeric_Value", "nv", UnicodeProperty.NUMERIC, version));
|
||||
|
||||
|
@ -100,8 +108,9 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
public int getMaxWidth(boolean isShort) {
|
||||
return 14;
|
||||
}
|
||||
}.setMain("FC_NFKC_Closure", "FNC", UnicodeProperty.STRING, version)
|
||||
.addName("FC_NFKC"));
|
||||
}.setMain("FC_NFKC_Closure", "FC_NFKC", UnicodeProperty.STRING, version)
|
||||
//.addName("FNC")
|
||||
);
|
||||
|
||||
add(new UnicodeProperty.SimpleProperty() {
|
||||
public String _getValue(int codepoint) {
|
||||
|
@ -319,7 +328,7 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
case UCD_Types.COMBINING_CLASS>>8: temp = (ucd.getCombiningClassID_fromIndex((short)i, style)); break;
|
||||
case UCD_Types.BIDI_CLASS>>8: temp = (ucd.getBidiClassID_fromIndex((byte)i, style)); break;
|
||||
case UCD_Types.DECOMPOSITION_TYPE>>8: temp = (ucd.getDecompositionTypeID_fromIndex((byte)i, style));
|
||||
check = temp != null;
|
||||
//check = temp != null;
|
||||
break;
|
||||
case UCD_Types.NUMERIC_TYPE>>8: temp = (ucd.getNumericTypeID_fromIndex((byte)i, style));
|
||||
titlecase = true;
|
||||
|
@ -389,7 +398,10 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
case UCD_Types.EAST_ASIAN_WIDTH>>8:
|
||||
return lookup(valueAlias, UCD_Names.LONG_EAST_ASIAN_WIDTH, UCD_Names.EAST_ASIAN_WIDTH, result);
|
||||
case UCD_Types.LINE_BREAK>>8:
|
||||
return lookup(valueAlias, UCD_Names.LONG_LINE_BREAK, UCD_Names.LINE_BREAK, result);
|
||||
lookup(valueAlias, UCD_Names.LONG_LINE_BREAK, UCD_Names.LINE_BREAK, result);
|
||||
if (valueAlias.equals("Inseparable")) addUnique("Inseperable", result);
|
||||
// Inseparable; Inseperable
|
||||
return result;
|
||||
case UCD_Types.JOINING_TYPE>>8:
|
||||
return lookup(valueAlias, UCD_Names.LONG_JOINING_TYPE, UCD_Names.JOINING_TYPE, result);
|
||||
case UCD_Types.JOINING_GROUP>>8:
|
||||
|
@ -445,10 +457,13 @@ public class ToolUnicodePropertySource extends UnicodeProperty.Factory {
|
|||
if (isType(BINARY_MASK)) {
|
||||
return up.hasValue(codepoint) ? "True" : "False";
|
||||
}
|
||||
return "<unknown>";
|
||||
throw new IllegalArgumentException("Failed to find value for " + Utility.hex(codepoint));
|
||||
}
|
||||
|
||||
public String getAge(int codePoint) {
|
||||
if (codePoint == 0xF0000) {
|
||||
System.out.println("debug point");
|
||||
}
|
||||
if (needAgeCache) {
|
||||
for (int i = UCD_Types.AGE11; i < UCD_Types.LIMIT_AGE; ++i) {
|
||||
ucdCache[i] = UCD.make(UCD_Names.AGE_VERSIONS[i]);
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD.java,v $
|
||||
* $Date: 2004/02/18 03:09:01 $
|
||||
* $Revision: 1.32 $
|
||||
* $Date: 2004/03/11 19:03:16 $
|
||||
* $Revision: 1.33 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -86,7 +86,6 @@ public final class UCD implements UCD_Types {
|
|||
*/
|
||||
public boolean isAllocated(int codePoint) {
|
||||
if (getCategory(codePoint) != Cn) return true;
|
||||
if (compositeVersion >= 0x20000 && codePoint >= 0xF0000 && codePoint <= 0x10FFFD) return true;
|
||||
if (isNoncharacter(codePoint)) return true;
|
||||
return false;
|
||||
}
|
||||
|
@ -94,11 +93,9 @@ public final class UCD implements UCD_Types {
|
|||
public boolean isNoncharacter(int codePoint) {
|
||||
if ((codePoint & 0xFFFE) == 0xFFFE) {
|
||||
if (compositeVersion < 0x20000 && codePoint > 0xFFFF) return false;
|
||||
// major < 2
|
||||
return true;
|
||||
}
|
||||
if (codePoint >= 0xFDD0 && codePoint <= 0xFDEF && compositeVersion >= 0x30100) return true;
|
||||
// major >= 3 && minor >= 1
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -113,8 +110,9 @@ public final class UCD implements UCD_Types {
|
|||
* Is the code point a PUA character (fast check)
|
||||
*/
|
||||
public boolean isPUA(int codePoint) {
|
||||
return (codePoint >= 0xE000 && codePoint < 0xF900
|
||||
|| codePoint >= 0xF0000 && codePoint < 0xFFFFE
|
||||
if (codePoint >= 0xE000 && codePoint < 0xF900) return true;
|
||||
if (compositeVersion < 0x20000) return false;
|
||||
return (codePoint >= 0xF0000 && codePoint < 0xFFFFE
|
||||
|| codePoint >= 0x100000 && codePoint < 0x10FFFE);
|
||||
}
|
||||
|
||||
|
@ -353,7 +351,7 @@ public final class UCD implements UCD_Types {
|
|||
return combiningClassSet.get(0xFF & value);
|
||||
}
|
||||
|
||||
static UnicodeSet BIDI_R_SET, BIDI_AL_SET;
|
||||
static UnicodeSet BIDI_R_SET, BIDI_AL_SET, BIDI_BN_SET;
|
||||
|
||||
/**
|
||||
* Get the bidi class
|
||||
|
@ -424,10 +422,17 @@ public final class UCD implements UCD_Types {
|
|||
BIDI_R_SET.removeAll(noncharacters);
|
||||
BIDI_AL_SET.removeAll(noncharacters);
|
||||
|
||||
|
||||
BIDI_BN_SET = new UnicodeSet();
|
||||
if (compositeVersion >= 0x40001) {
|
||||
BIDI_BN_SET.addAll(noncharacters);
|
||||
UnicodeSet DefaultIg = DerivedProperty.make(DefaultIgnorable, this).getSet();
|
||||
System.out.println("DefaultIg: " + DefaultIg);
|
||||
BIDI_BN_SET.addAll(DefaultIg);
|
||||
}
|
||||
|
||||
System.out.println("BIDI_R_SET: " + BIDI_R_SET);
|
||||
System.out.println("BIDI_AL_SET: " + BIDI_AL_SET);
|
||||
System.out.println("BIDI_BN_SET: " + BIDI_BN_SET);
|
||||
|
||||
if (BIDI_R_SET.containsSome(BIDI_AL_SET)) {
|
||||
throw new ChainException("BIDI values for Cf characters overlap!!", null);
|
||||
|
@ -435,6 +440,9 @@ public final class UCD implements UCD_Types {
|
|||
|
||||
}
|
||||
|
||||
if (BIDI_BN_SET.contains(codePoint)) {
|
||||
return BIDI_BN;
|
||||
}
|
||||
if (BIDI_R_SET.contains(codePoint)) {
|
||||
return BIDI_R;
|
||||
}
|
||||
|
@ -1012,7 +1020,7 @@ public final class UCD implements UCD_Types {
|
|||
}
|
||||
|
||||
public static String getScriptID_fromIndex(byte prop, byte length) {
|
||||
return prop < 0 || prop >= UCD_Names.JOINING_GROUP.length ? null
|
||||
return prop < 0 || prop >= UCD_Names.SCRIPT.length ? null
|
||||
: (length == SHORT) ? UCD_Names.SCRIPT[prop] : UCD_Names.LONG_SCRIPT[prop];
|
||||
}
|
||||
|
||||
|
@ -1043,7 +1051,7 @@ public final class UCD implements UCD_Types {
|
|||
: style == SHORT ? UCD_Names.SHORT_BP[bit] : UCD_Names.BP[bit];
|
||||
}
|
||||
|
||||
public static int mapToRepresentative(int ch, boolean lessThan20105) {
|
||||
public static int mapToRepresentative(int ch, int rCompositeVersion) {
|
||||
if (ch <= 0xFFFD) {
|
||||
//if (ch <= 0x2800) return ch;
|
||||
//if (ch <= 0x28FF) return 0x2800; // braille
|
||||
|
@ -1061,7 +1069,7 @@ public final class UCD implements UCD_Types {
|
|||
if (ch <= 0xDFFF) return 0xDC00;
|
||||
if (ch <= 0xE000) return ch; // Private Use
|
||||
if (ch <= 0xF8FF) return 0xE000;
|
||||
if (lessThan20105) {
|
||||
if (rCompositeVersion < 0x20105) {
|
||||
if (ch <= 0xF900) return ch; // CJK Compatibility Ideograp
|
||||
if (ch <= 0xFA2D) return 0xF900;
|
||||
}
|
||||
|
@ -1069,14 +1077,20 @@ public final class UCD implements UCD_Types {
|
|||
if (ch <= 0xFDEF) return 0xFFFF;
|
||||
} else {
|
||||
if ((ch & 0xFFFE) == 0xFFFE) return 0xFFFF; // Noncharacter
|
||||
|
||||
if (ch <= 0x20000) return ch; // Extension B
|
||||
if (ch <= 0x2A6D6) return 0x20000;
|
||||
//if (ch <= 0x2F800) return ch;
|
||||
//if (ch <= 0x2FA1D) return 0x2F800; // compat ideographs
|
||||
if (ch <= 0xF0000) return ch; // Plane 15 Private Use
|
||||
if (ch < 0xF0000) return ch; // Plane 15 Private Use
|
||||
if (rCompositeVersion >= 0x20000) {
|
||||
return 0xE000;
|
||||
}
|
||||
/*
|
||||
if (ch <= 0xFFFFD) return 0xF0000; // Plane 16 Private Use
|
||||
if (ch <= 0x100000) return ch; // Plane 15 Private Use
|
||||
if (ch <= 0x10FFFD) return 0x100000; // Plane 16 Private Use
|
||||
*/
|
||||
}
|
||||
return ch;
|
||||
}
|
||||
|
@ -1106,6 +1120,7 @@ public final class UCD implements UCD_Types {
|
|||
byte cat = getCategory(cp);
|
||||
if (cat == Mn || cat == Mc || cat == Nd || cat == Pc) return true;
|
||||
if (getBinaryProperty(cp, Other_ID_Start)) return true;
|
||||
if (getBinaryProperty(cp, Other_ID_Continue)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1189,7 +1204,7 @@ to guarantee identifier closure.
|
|||
if (codePoint >= 0x2800 && codePoint <= 0x28FF) return true;
|
||||
if (codePoint >= 0x2F800 && codePoint <= 0x2FA1D) return true;
|
||||
|
||||
int rangeStart = mapToRepresentative(codePoint, compositeVersion < 0x020105);
|
||||
int rangeStart = mapToRepresentative(codePoint, compositeVersion);
|
||||
switch (rangeStart) {
|
||||
default:
|
||||
return getRaw(codePoint) == null;
|
||||
|
@ -1247,7 +1262,7 @@ to guarantee identifier closure.
|
|||
|
||||
// do range stuff
|
||||
String constructedName = null;
|
||||
int rangeStart = mapToRepresentative(codePoint, compositeVersion < 0x020105);
|
||||
int rangeStart = mapToRepresentative(codePoint, compositeVersion);
|
||||
boolean isHangul = false;
|
||||
boolean isRemapped = false;
|
||||
switch (rangeStart) {
|
||||
|
@ -1297,7 +1312,7 @@ to guarantee identifier closure.
|
|||
case 0xE000: // Private Use
|
||||
case 0xF0000: // Private Use
|
||||
case 0x100000: // Private Use
|
||||
if (fixStrings) constructedName = "<private use area-" + Utility.hex(codePoint, 4) + ">";
|
||||
if (fixStrings) constructedName = "<private-use-" + Utility.hex(codePoint, 4) + ">";
|
||||
isRemapped = true;
|
||||
break;
|
||||
case 0xD800: // Surrogate
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Names.java,v $
|
||||
* $Date: 2004/02/18 03:09:01 $
|
||||
* $Revision: 1.26 $
|
||||
* $Date: 2004/03/11 19:03:16 $
|
||||
* $Revision: 1.27 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -152,7 +152,8 @@ final class UCD_Names implements UCD_Types {
|
|||
"Logical_Order_Exception",
|
||||
"Other_ID_Start",
|
||||
"STerm",
|
||||
"Variation_Selector"
|
||||
"Variation_Selector",
|
||||
"Other_ID_Continue",
|
||||
};
|
||||
|
||||
static final String[] SHORT_BP = {
|
||||
|
@ -189,7 +190,8 @@ final class UCD_Names implements UCD_Types {
|
|||
"LOE",
|
||||
"OIDS",
|
||||
"STerm",
|
||||
"VS"
|
||||
"VS",
|
||||
"OIDC"
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -262,7 +264,7 @@ final class UCD_Names implements UCD_Types {
|
|||
"Unknown", "OpenPunctuation", "ClosePunctuation", "Quotation",
|
||||
"Glue", "Nonstarter", "Exclamation", "BreakSymbols",
|
||||
"InfixNumeric", "PrefixNumeric", "PostfixNumeric",
|
||||
"Numeric", "Alphabetic", "Ideographic", "Inseperable", "Hyphen",
|
||||
"Numeric", "Alphabetic", "Ideographic", "Inseparable", "Hyphen",
|
||||
"CombiningMark", "BreakBefore", "BreakAfter", "Space",
|
||||
"MandatoryBreak", "CarriageReturn", "LineFeed", "ContingentBreak",
|
||||
"ComplexContext", "Ambiguous", "BreakBoth", "Surrogate", "ZWSpace",
|
||||
|
@ -327,7 +329,8 @@ final class UCD_Names implements UCD_Types {
|
|||
"SHAVIAN",
|
||||
"OSMANYA",
|
||||
"CYPRIOT",
|
||||
"BRAILLE",
|
||||
"BRAILLE",
|
||||
"KATAKANA_OR_HIRAGANA",
|
||||
|
||||
};
|
||||
|
||||
|
@ -395,6 +398,7 @@ final class UCD_Names implements UCD_Types {
|
|||
"Osma",
|
||||
"Cprt",
|
||||
"Brai",
|
||||
"Hrkt",
|
||||
|
||||
};
|
||||
|
||||
|
@ -643,11 +647,13 @@ final class UCD_Names implements UCD_Types {
|
|||
case 9: s = style < LONG ? "VR" : "Virama"; break;
|
||||
case 200: s = style < LONG ? "ATBL" : "AttachedBelowLeft"; break;
|
||||
case 202: s = style < LONG ? "ATB" : "AttachedBelow"; break;
|
||||
/*
|
||||
case 204: s = style < LONG ? "ATBR" : "AttachedBelowRight"; break;
|
||||
case 208: s = style < LONG ? "ATL" : "AttachedLeft"; break;
|
||||
case 210: s = style < LONG ? "ATR" : "AttachedRight"; break;
|
||||
case 212: s = style < LONG ? "ATAL" : "AttachedAboveLeft"; break;
|
||||
case 214: s = style < LONG ? "ATA" : "AttachedAbove"; break;
|
||||
case 214: s = style < LONG ? "ATA" : "AttachedAbove"; break;
|
||||
*/
|
||||
case 216: s = style < LONG ? "ATAR" : "AttachedAboveRight"; break;
|
||||
case 218: s = style < LONG ? "BL" : "BelowLeft"; break;
|
||||
case 220: s = style < LONG ? "B" : "Below"; break;
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/UCD_Types.java,v $
|
||||
* $Date: 2004/02/18 03:09:01 $
|
||||
* $Revision: 1.27 $
|
||||
* $Date: 2004/03/11 19:03:16 $
|
||||
* $Revision: 1.28 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -15,9 +15,7 @@ package com.ibm.text.UCD;
|
|||
|
||||
public interface UCD_Types {
|
||||
|
||||
public static final int dVersion = 6; // change to fix the generated file D version. If less than zero, no "d"
|
||||
static final byte BINARY_FORMAT = 14; // bumped if binary format of UCD changes. Forces rebuild
|
||||
|
||||
static final byte BINARY_FORMAT = 15; // bumped if binary format of UCD changes. Forces rebuild
|
||||
|
||||
public static final String BASE_DIR = "C:\\DATA\\";
|
||||
public static final String UCD_DIR = BASE_DIR + "UCD\\";
|
||||
|
@ -213,9 +211,10 @@ public interface UCD_Types {
|
|||
Soft_Dotted = 29,
|
||||
Logical_Order_Exception = 30,
|
||||
Other_ID_Start = 31,
|
||||
Sentence_Terminal = 32,
|
||||
Variation_Selector = 33,
|
||||
LIMIT_BINARY_PROPERTIES = 34;
|
||||
Sentence_Terminal = 32,
|
||||
Variation_Selector = 33,
|
||||
Other_ID_Continue = 34,
|
||||
LIMIT_BINARY_PROPERTIES = 35;
|
||||
|
||||
/*
|
||||
static final int
|
||||
|
@ -383,7 +382,8 @@ public interface UCD_Types {
|
|||
OSMANYA = 51,
|
||||
CYPRIOT = 52,
|
||||
BRAILLE = 53,
|
||||
LIMIT_SCRIPT = 54;
|
||||
KATAKANA_OR_HIRAGANA = 54,
|
||||
LIMIT_SCRIPT = 55;
|
||||
|
||||
static final int
|
||||
UNKNOWN = 0,
|
||||
|
|
|
@ -7,6 +7,7 @@ import java.io.PrintWriter;
|
|||
|
||||
import com.ibm.text.UCD.Default;
|
||||
import com.ibm.text.UCD.GenerateData;
|
||||
import com.ibm.text.UCD.MakeUnicodeFiles;
|
||||
import com.ibm.text.UCD.UCD_Types;
|
||||
|
||||
public class UnicodeDataFile {
|
||||
|
@ -26,16 +27,23 @@ public class UnicodeDataFile {
|
|||
|
||||
result.out.println("# " + filename + UnicodeDataFile.getFileSuffix(false));
|
||||
result.out.println(generateDateLine());
|
||||
result.out.println("#");
|
||||
result.out.println("#");
|
||||
result.out.println("# Unicode Character Database");
|
||||
result.out.println("# Copyright (c) 1991-2004 Unicode, Inc.");
|
||||
result.out.println(
|
||||
"# For terms of use, see http://www.unicode.org/terms_of_use.html");
|
||||
result.out.println("# For documentation, see UCD.html");
|
||||
try {
|
||||
Utility.appendFile(filename + "Header.txt", Utility.LATIN1, result.out);
|
||||
} catch (FileNotFoundException e) {
|
||||
/*
|
||||
result.out.println("# Unicode Character Database: Derived Property Data");
|
||||
result.out.println("# Generated algorithmically from the Unicode Character Database");
|
||||
result.out.println("# For documentation, see UCD.html");
|
||||
result.out.println("# Note: Unassigned and Noncharacter codepoints may be omitted");
|
||||
result.out.println("# if they have default property values.");
|
||||
result.out.println("# ================================================");
|
||||
*/
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@ -51,14 +59,20 @@ public class UnicodeDataFile {
|
|||
}
|
||||
|
||||
public static String getHTMLFileSuffix(boolean withDVersion) {
|
||||
return "-" + Default.ucd().getVersion()
|
||||
+ ((withDVersion && UCD_Types.dVersion >= 0) ? ("d" + UCD_Types.dVersion) : "")
|
||||
return "-"
|
||||
+ Default.ucd().getVersion()
|
||||
+ ((withDVersion && MakeUnicodeFiles.dVersion >= 0)
|
||||
? ("d" + MakeUnicodeFiles.dVersion)
|
||||
: "")
|
||||
+ ".html";
|
||||
}
|
||||
|
||||
public static String getFileSuffix(boolean withDVersion) {
|
||||
return "-" + Default.ucd().getVersion()
|
||||
+ ((withDVersion && UCD_Types.dVersion >= 0) ? ("d" + UCD_Types.dVersion) : "")
|
||||
return "-"
|
||||
+ Default.ucd().getVersion()
|
||||
+ ((withDVersion && MakeUnicodeFiles.dVersion >= 0)
|
||||
? ("d" + MakeUnicodeFiles.dVersion)
|
||||
: "")
|
||||
+ ".txt";
|
||||
}
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/Utility.java,v $
|
||||
* $Date: 2004/02/18 03:09:02 $
|
||||
* $Revision: 1.40 $
|
||||
* $Date: 2004/03/11 19:03:16 $
|
||||
* $Revision: 1.41 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -725,8 +725,7 @@ public final class Utility implements UCD_Types { // COMMON UTILITIES
|
|||
public static PrintWriter openPrintWriter(String directory, String filename, Encoding options) throws IOException {
|
||||
File file = new File(directory + filename);
|
||||
Utility.fixDot();
|
||||
System.out.print("Creating File: " + file);
|
||||
System.out.println("\t" + file.getCanonicalPath());
|
||||
System.out.println("Creating File: " + file.getCanonicalPath());
|
||||
File parent = new File(file.getParent());
|
||||
//System.out.println("Creating File: "+ parent);
|
||||
parent.mkdirs();
|
||||
|
|
Loading…
Add table
Reference in a new issue