mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-1970 add properties: joining group, joining type, line break
X-SVN-Rev: 9043
This commit is contained in:
parent
8c4e52bd51
commit
ecfab592b4
3 changed files with 14335 additions and 14 deletions
226
icu4c/source/data/unidata/ArabicShaping.txt
Normal file
226
icu4c/source/data/unidata/ArabicShaping.txt
Normal file
|
@ -0,0 +1,226 @@
|
|||
# ArabicShaping-3.2.0.txt
|
||||
#
|
||||
# This file is a normative contributory data file in the
|
||||
# Unicode Character Database.
|
||||
#
|
||||
# This file defines the shaping classes for Arabic and Syriac
|
||||
# positional shaping, repeating in machine readable form the
|
||||
# information printed in Tables 8-6, 8-7, 8-8, 8-10, 8-11, and
|
||||
# 8-13 of The Unicode Standard, Version 3.0, plus additions
|
||||
# for Unicode 3.1 and Unicode 3.2.
|
||||
#
|
||||
# See sections 8.2 and 8.3 of The Unicode Standard, Version 3.0
|
||||
# for more information.
|
||||
#
|
||||
# Each line contains four fields, separated by a semicolon.
|
||||
#
|
||||
# The first field gives the code point, in 4-digit hexadecimal
|
||||
# form, of an Arabic or Syriac character.
|
||||
# The second field gives a short schematic name for that character,
|
||||
# abbreviated from the normative Unicode character name.
|
||||
# The third field defines the joining type: R right-joining,
|
||||
# D dual-joining, U non-joining
|
||||
# The fourth field defines the joining group.
|
||||
#
|
||||
#
|
||||
# Note: Characters of joining type T and most characters of
|
||||
# joining type U are not explicitly listed in this file.
|
||||
#
|
||||
# Characters of joining type T can derived by the following formula:
|
||||
# T = Mn + Cf - ZWNJ - ZWJ
|
||||
# where Mn and Cf are the general category values. In other words,
|
||||
# any non-spacing mark or any format control character, except
|
||||
# U+200C ZERO WIDTH NON-JOINER (joining type U) and U+200D ZERO WIDTH
|
||||
# JOINER (joining type C).
|
||||
#
|
||||
# For an explicit listing of characters of joining type T, see
|
||||
# the derived property file DerivedJoiningType.txt.
|
||||
#
|
||||
# There are currently no characters of type L defined in Unicode.
|
||||
#
|
||||
# Joining type U includes all characters which are neither joining
|
||||
# type T, nor explicitly marked in this file as types R, L, D, or C.
|
||||
#
|
||||
# #############################################################
|
||||
|
||||
# Unicode; Schematic Name; Joining Type; Joining Group
|
||||
|
||||
# Arabic characters
|
||||
|
||||
0621; HAMZA; U; <no shaping>
|
||||
0622; MADDA ON ALEF; R; ALEF
|
||||
0623; HAMZA ON ALEF; R; ALEF
|
||||
0624; HAMZA ON WAW; R; WAW
|
||||
0625; HAMZA UNDER ALEF; R; ALEF
|
||||
0626; HAMZA ON YEH; D; YEH
|
||||
0627; ALEF; R; ALEF
|
||||
0628; BEH; D; BEH
|
||||
0629; TEH MARBUTA; R; TEH MARBUTA
|
||||
062A; TEH; D; BEH
|
||||
062B; THEH; D; BEH
|
||||
062C; JEEM; D; HAH
|
||||
062D; HAH; D; HAH
|
||||
062E; KHAH; D; HAH
|
||||
062F; DAL; R; DAL
|
||||
0630; THAL; R; DAL
|
||||
0631; REH; R; REH
|
||||
0632; ZAIN; R; REH
|
||||
0633; SEEN; D; SEEN
|
||||
0634; SHEEN; D; SEEN
|
||||
0635; SAD; D; SAD
|
||||
0636; DAD; D; SAD
|
||||
0637; TAH; D; TAH
|
||||
0638; ZAH; D; TAH
|
||||
0639; AIN; D; AIN
|
||||
063A; GHAIN; D; AIN
|
||||
0640; TATWEEL; C; <no shaping>
|
||||
0641; FEH; D; FEH
|
||||
0642; QAF; D; QAF
|
||||
0643; KAF; D; KAF
|
||||
0644; LAM; D; LAM
|
||||
0645; MEEM; D; MEEM
|
||||
0646; NOON; D; NOON
|
||||
0647; HEH; D; HEH
|
||||
0648; WAW; R; WAW
|
||||
0649; ALEF MAKSURA; D; YEH
|
||||
064A; YEH; D; YEH
|
||||
066E; DOTLESS BEH; D; BEH
|
||||
066F; DOTLESS QAF; D; QAF
|
||||
0671; HAMZAT WASL ON ALEF; R; ALEF
|
||||
0672; WAVY HAMZA ON ALEF; R; ALEF
|
||||
0673; WAVY HAMZA UNDER ALEF; R; ALEF
|
||||
0674; HIGH HAMZA; U; <no shaping>
|
||||
0675; HIGH HAMZA ALEF; R; ALEF
|
||||
0676; HIGH HAMZA WAW; R; WAW
|
||||
0677; HIGH HAMZA WAW WITH DAMMA; R; WAW
|
||||
0678; HIGH HAMZA YEH; D; YEH
|
||||
0679; TEH WITH SMALL TAH; D; BEH
|
||||
067A; TEH WITH 2 DOTS VERTICAL ABOVE; D; BEH
|
||||
067B; BEH WITH 2 DOTS VERTICAL BELOW; D; BEH
|
||||
067C; TEH WITH RING; D; BEH
|
||||
067D; TEH WITH 3 DOTS ABOVE DOWNWARD; D; BEH
|
||||
067E; TEH WITH 3 DOTS BELOW; D; BEH
|
||||
067F; TEH WITH 4 DOTS ABOVE; D; BEH
|
||||
0680; BEH WITH 4 DOTS BELOW; D; BEH
|
||||
0681; HAMZA ON HAH; D; HAH
|
||||
0682; HAH WITH 2 DOTS VERTICAL ABOVE; D; HAH
|
||||
0683; HAH WITH MIDDLE 2 DOTS; D; HAH
|
||||
0684; HAH WITH MIDDLE 2 DOTS VERTICAL; D; HAH
|
||||
0685; HAH WITH 3 DOTS ABOVE; D; HAH
|
||||
0686; HAH WITH MIDDLE 3 DOTS DOWNWARD; D; HAH
|
||||
0687; HAH WITH MIDDLE 4 DOTS; D; HAH
|
||||
0688; DAL WITH SMALL TAH; R; DAL
|
||||
0689; DAL WITH RING; R; DAL
|
||||
068A; DAL WITH DOT BELOW; R; DAL
|
||||
068B; DAL WITH DOT BELOW AND SMALL TAH; R; DAL
|
||||
068C; DAL WITH 2 DOTS ABOVE; R; DAL
|
||||
068D; DAL WITH 2 DOTS BELOW; R; DAL
|
||||
068E; DAL WITH 3 DOTS ABOVE; R; DAL
|
||||
068F; DAL WITH 3 DOTS ABOVE DOWNWARD; R; DAL
|
||||
0690; DAL WITH 4 DOTS ABOVE; R; DAL
|
||||
0691; REH WITH SMALL TAH; R; REH
|
||||
0692; REH WITH SMALL V; R; REH
|
||||
0693; REH WITH RING; R; REH
|
||||
0694; REH WITH DOT BELOW; R; REH
|
||||
0695; REH WITH SMALL V BELOW; R; REH
|
||||
0696; REH WITH DOT BELOW AND DOT ABOVE; R; REH
|
||||
0697; REH WITH 2 DOTS ABOVE; R; REH
|
||||
0698; REH WITH 3 DOTS ABOVE; R; REH
|
||||
0699; REH WITH 4 DOTS ABOVE; R; REH
|
||||
069A; SEEN WITH DOT BELOW AND DOT ABOVE; D; SEEN
|
||||
069B; SEEN WITH 3 DOTS BELOW; D; SEEN
|
||||
069C; SEEN WITH 3 DOTS BELOW AND 3 DOTS ABOVE; D; SEEN
|
||||
069D; SAD WITH 2 DOTS BELOW; D; SAD
|
||||
069E; SAD WITH 3 DOTS ABOVE; D; SAD
|
||||
069F; TAH WITH 3 DOTS ABOVE; D; TAH
|
||||
06A0; AIN WITH 3 DOTS ABOVE; D; AIN
|
||||
06A1; DOTLESS FEH; D; FEH
|
||||
06A2; FEH WITH DOT MOVED BELOW; D; FEH
|
||||
06A3; FEH WITH DOT BELOW; D; FEH
|
||||
06A4; FEH WITH 3 DOTS ABOVE; D; FEH
|
||||
06A5; FEH WITH 3 DOTS BELOW; D; FEH
|
||||
06A6; FEH WITH 4 DOTS ABOVE; D; FEH
|
||||
06A7; QAF WITH DOT ABOVE; D; QAF
|
||||
06A8; QAF WITH 3 DOTS ABOVE; D; QAF
|
||||
06A9; OPEN KAF; D; GAF
|
||||
06AA; SWASH KAF; D; SWASH KAF
|
||||
06AB; KAF WITH RING; D; GAF
|
||||
06AC; KAF WITH DOT ABOVE; D; KAF
|
||||
06AD; KAF WITH 3 DOTS ABOVE; D; KAF
|
||||
06AE; KAF WITH 3 DOTS BELOW; D; KAF
|
||||
06AF; GAF; D; GAF
|
||||
06B0; GAF WITH RING; D; GAF
|
||||
06B1; GAF WITH 2 DOTS ABOVE; D; GAF
|
||||
06B2; GAF WITH 2 DOTS BELOW; D; GAF
|
||||
06B3; GAF WITH 2 DOTS VERTICAL BELOW; D; GAF
|
||||
06B4; GAF WITH 3 DOTS ABOVE; D; GAF
|
||||
06B5; LAM WITH SMALL V; D; LAM
|
||||
06B6; LAM WITH DOT ABOVE; D; LAM
|
||||
06B7; LAM WITH 3 DOTS ABOVE; D; LAM
|
||||
06B8; LAM WITH 3 DOTS BELOW; D; LAM
|
||||
06B9; NOON WITH DOT BELOW; D; NOON
|
||||
06BA; DOTLESS NOON; D; NOON
|
||||
06BB; DOTLESS NOON WITH SMALL TAH; D; NOON
|
||||
06BC; NOON WITH RING; D; NOON
|
||||
06BD; NOON WITH 3 DOTS ABOVE; D; NOON
|
||||
06BE; KNOTTED HEH; D; KNOTTED HEH
|
||||
06BF; HAH WITH MIDDLE 3 DOTS DOWNWARD AND DOT ABOVE; D; HAH
|
||||
06C0; HAMZA ON HEH; R; TEH MARBUTA
|
||||
06C1; HEH GOAL; D; HEH GOAL
|
||||
06C2; HAMZA ON HEH GOAL; R; HAMZA ON HEH GOAL
|
||||
06C3; TEH MARBUTA GOAL; R; HAMZA ON HEH GOAL
|
||||
06C4; WAW WITH RING; R; WAW
|
||||
06C5; WAW WITH BAR; R; WAW
|
||||
06C6; WAW WITH SMALL V; R; WAW
|
||||
06C7; WAW WITH DAMMA; R; WAW
|
||||
06C8; WAW WITH ALEF ABOVE; R; WAW
|
||||
06C9; WAW WITH INVERTED SMALL V; R; WAW
|
||||
06CA; WAW WITH 2 DOTS ABOVE; R; WAW
|
||||
06CB; WAW WITH 3 DOTS ABOVE; R; WAW
|
||||
06CC; DOTLESS YEH; D; YEH
|
||||
06CD; YEH WITH TAIL; R; YEH WITH TAIL
|
||||
06CE; YEH WITH SMALL V; D; YEH
|
||||
06CF; WAW WITH DOT ABOVE; R; WAW
|
||||
06D0; YEH WITH 2 DOTS VERTICAL BELOW; D; YEH
|
||||
06D1; YEH WITH 3 DOTS BELOW; D; YEH
|
||||
06D2; YEH BARREE; R; YEH BARREE
|
||||
06D3; HAMZA ON YEH BARREE; R; YEH BARREE
|
||||
06D5; AE; R; TEH MARBUTA
|
||||
06FA; SEEN WITH DOT BELOW AND 3 DOTS ABOVE; D; SEEN
|
||||
06FB; DAD WITH DOT BELOW; D; SAD
|
||||
06FC; GHAIN WITH DOT BELOW; D; AIN
|
||||
|
||||
# Syriac characters
|
||||
|
||||
0710; ALAPH; R; ALAPH
|
||||
0712; BETH; D; BETH
|
||||
0713; GAMAL; D; GAMAL
|
||||
0714; GAMAL GARSHUNI; D; GAMAL
|
||||
0715; DALATH; R; DALATH RISH
|
||||
0716; DOTLESS DALATH RISH; R; DALATH RISH
|
||||
0717; HE; R; HE
|
||||
0718; WAW; R; SYRIAC WAW
|
||||
0719; ZAIN; R; ZAIN
|
||||
071A; HETH; D; HETH
|
||||
071B; TETH; D; TETH
|
||||
071C; TETH GARSHUNI; D; TETH
|
||||
071D; YUDH; D; YUDH
|
||||
071E; YUDH HE; R; YUDH HE
|
||||
071F; KAPH; D; KAPH
|
||||
0720; LAMADH; D; LAMADH
|
||||
0721; MIM; D; MIM
|
||||
0722; NUN; D; NUN
|
||||
0723; SEMKATH; D; SEMKATH
|
||||
0724; FINAL SEMKATH; D; FINAL SEMKATH
|
||||
0725; E; D; E
|
||||
0726; PE; D; PE
|
||||
0727; REVERSED PE; D; REVERSED PE
|
||||
0728; SADHE; R; SADHE
|
||||
0729; QAPH; D; QAPH
|
||||
072A; RISH; R; DALATH RISH
|
||||
072B; SHIN; D; SHIN
|
||||
072C; TAW; R; TAW
|
||||
|
||||
# Other
|
||||
|
||||
200D; ZERO WIDTH JOINER; C; <no shaping>
|
13889
icu4c/source/data/unidata/LineBreak.txt
Normal file
13889
icu4c/source/data/unidata/LineBreak.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -34,7 +34,7 @@
|
|||
/* data --------------------------------------------------------------------- */
|
||||
|
||||
static UNewTrie *trie;
|
||||
static uint32_t *pv;
|
||||
uint32_t *pv;
|
||||
static int32_t pvCount;
|
||||
|
||||
static uint32_t prevStart=0, prevLimit=0, prevValue=0;
|
||||
|
@ -47,6 +47,11 @@ parseTwoFieldFile(char *filename, char *basename,
|
|||
UParseLineFn *lineFn,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
static void
|
||||
parseArabicShaping(char *filename, char *basename,
|
||||
const char *suffix,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
static void
|
||||
ageLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
|
@ -77,6 +82,11 @@ eaWidthLineFn(void *context,
|
|||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
static void
|
||||
lineBreakLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
U_CFUNC void
|
||||
|
@ -87,7 +97,6 @@ initAdditionalProperties() {
|
|||
U_CFUNC void
|
||||
generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pErrorCode) {
|
||||
char *basename;
|
||||
UErrorCode errorCode;
|
||||
|
||||
basename=filename+uprv_strlen(filename);
|
||||
|
||||
|
@ -118,18 +127,22 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr
|
|||
|
||||
parseTwoFieldFile(filename, basename, "DerivedCoreProperties", suffix, derivedPropListLineFn, pErrorCode);
|
||||
|
||||
parseTwoFieldFile(filename, basename, "LineBreak", suffix, lineBreakLineFn, pErrorCode);
|
||||
|
||||
parseArabicShaping(filename, basename, suffix, pErrorCode);
|
||||
|
||||
/*
|
||||
* Preset East Asian Width defaults:
|
||||
* N for all
|
||||
* A for Private Use
|
||||
* W for plane 2
|
||||
*/
|
||||
errorCode=U_ZERO_ERROR;
|
||||
if( !upvec_setValue(pv, 0, 0x110000, 0, (uint32_t)(U_EA_NEUTRAL<<UPROPS_EA_WIDTH_SHIFT), UPROPS_EA_WIDTH_MASK, pErrorCode) ||
|
||||
!upvec_setValue(pv, 0xe000, 0xf900, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_WIDTH_SHIFT), UPROPS_EA_WIDTH_MASK, pErrorCode) ||
|
||||
!upvec_setValue(pv, 0xf0000, 0xffffe, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_WIDTH_SHIFT), UPROPS_EA_WIDTH_MASK, pErrorCode) ||
|
||||
!upvec_setValue(pv, 0x100000, 0x10fffe, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_WIDTH_SHIFT), UPROPS_EA_WIDTH_MASK, pErrorCode) ||
|
||||
!upvec_setValue(pv, 0x20000, 0x2fffe, 0, (uint32_t)(U_EA_WIDE<<UPROPS_EA_WIDTH_SHIFT), UPROPS_EA_WIDTH_MASK, pErrorCode)
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
if( !upvec_setValue(pv, 0, 0x110000, 0, (uint32_t)(U_EA_NEUTRAL<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode) ||
|
||||
!upvec_setValue(pv, 0xe000, 0xf900, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode) ||
|
||||
!upvec_setValue(pv, 0xf0000, 0xffffe, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode) ||
|
||||
!upvec_setValue(pv, 0x100000, 0x10fffe, 0, (uint32_t)(U_EA_AMBIGUOUS<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode) ||
|
||||
!upvec_setValue(pv, 0x20000, 0x2fffe, 0, (uint32_t)(U_EA_WIDE<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode)
|
||||
) {
|
||||
fprintf(stderr, "genprops: unable to set default East Asian Widths: %s\n", u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
|
@ -138,7 +151,7 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr
|
|||
/* parse EastAsianWidth.txt */
|
||||
parseTwoFieldFile(filename, basename, "EastAsianWidth", suffix, eaWidthLineFn, pErrorCode);
|
||||
/* set last range */
|
||||
if(!upvec_setValue(pv, prevStart, prevLimit, 0, (uint32_t)(prevValue<<UPROPS_EA_WIDTH_SHIFT), UPROPS_EA_WIDTH_MASK, pErrorCode)) {
|
||||
if(!upvec_setValue(pv, prevStart, prevLimit, 0, (uint32_t)(prevValue<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode)) {
|
||||
fprintf(stderr, "genprops error: unable to set East Asian Width: %s\n", u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
|
@ -164,13 +177,16 @@ parseTwoFieldFile(char *filename, char *basename,
|
|||
UErrorCode *pErrorCode) {
|
||||
char *fields[2][2];
|
||||
|
||||
writeUCDFilename(basename, ucdFile, suffix);
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
writeUCDFilename(basename, ucdFile, suffix);
|
||||
|
||||
u_parseDelimitedFile(filename, ';', fields, 2, lineFn, NULL, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "error parsing %s.txt: %s\n", ucdFile, u_errorName(*pErrorCode));
|
||||
}
|
||||
}
|
||||
|
||||
/* DerivedAge.txt ----------------------------------------------------------- */
|
||||
|
@ -522,7 +538,7 @@ derivedPropListLineFn(void *context,
|
|||
|
||||
/* keep this list in sync with UEAWidthCode in uprops.h or uchar.h */
|
||||
static const char *const
|
||||
eaNames[U_EA_TOP]={
|
||||
eaNames[U_EA_COUNT]={
|
||||
"N", /* Non-East Asian Neutral, default for unassigned code points */
|
||||
"A", /* Ambiguous, default for Private Use code points */
|
||||
"H", /* Half-width */
|
||||
|
@ -546,7 +562,7 @@ eaWidthLineFn(void *context,
|
|||
++limit;
|
||||
|
||||
/* parse binary property name */
|
||||
i=getTokenIndex(eaNames, U_EA_TOP, fields[1][0]);
|
||||
i=getTokenIndex(eaNames, U_EA_COUNT, fields[1][0]);
|
||||
if(i<0) {
|
||||
fprintf(stderr, "genprops error: unknown width name \"%s\" in EastAsianWidth.txt\n", fields[1][0]);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
|
@ -557,7 +573,7 @@ eaWidthLineFn(void *context,
|
|||
if(prevLimit==start && (uint32_t)i==prevValue) {
|
||||
prevLimit=limit;
|
||||
} else {
|
||||
if(!upvec_setValue(pv, prevStart, prevLimit, 0, (uint32_t)(prevValue<<UPROPS_EA_WIDTH_SHIFT), UPROPS_EA_WIDTH_MASK, pErrorCode)) {
|
||||
if(!upvec_setValue(pv, prevStart, prevLimit, 0, (uint32_t)(prevValue<<UPROPS_EA_SHIFT), UPROPS_EA_MASK, pErrorCode)) {
|
||||
fprintf(stderr, "genprops error: unable to set East Asian Width: %s\n", u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
|
@ -567,6 +583,192 @@ eaWidthLineFn(void *context,
|
|||
}
|
||||
}
|
||||
|
||||
/* LineBreak.txt ------------------------------------------------------------ */
|
||||
|
||||
/* LineBreak.txt block names in the order of the parallel ULineBreak constants */
|
||||
static const char *const
|
||||
lbNames[U_LB_COUNT]={
|
||||
"XX",
|
||||
"AI",
|
||||
"AL",
|
||||
"B2",
|
||||
"BA",
|
||||
"BB",
|
||||
"BK",
|
||||
"CB",
|
||||
"CL",
|
||||
"CM",
|
||||
"CR",
|
||||
"EX",
|
||||
"GL",
|
||||
"HY",
|
||||
"ID",
|
||||
"IN",
|
||||
"IS",
|
||||
"LF",
|
||||
"NS",
|
||||
"NU",
|
||||
"OP",
|
||||
"PO",
|
||||
"PR",
|
||||
"QU",
|
||||
"SA",
|
||||
"SG",
|
||||
"SP",
|
||||
"SY",
|
||||
"ZW"
|
||||
};
|
||||
|
||||
static void
|
||||
lineBreakLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
uint32_t start, limit;
|
||||
int32_t i;
|
||||
|
||||
u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genprops: syntax error in LineBreak.txt field 0 at %s\n", fields[0][0]);
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
++limit;
|
||||
|
||||
/* parse block name */
|
||||
i=getTokenIndex(lbNames, U_LB_COUNT, fields[1][0]);
|
||||
if(i<0) {
|
||||
fprintf(stderr, "genprops error: unknown line break name \"%s\" in LineBreak.txt\n", fields[1][0]);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
if(!upvec_setValue(pv, start, limit, 2, (uint32_t)i<<UPROPS_LB_SHIFT, UPROPS_LB_MASK, pErrorCode)) {
|
||||
fprintf(stderr, "genprops error: unable to set line break code: %s\n", u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
/* ArabicShaping.txt -------------------------------------------------------- */
|
||||
|
||||
/* Joining Type/Joining Group names in the order of the parallel UJoiningType/UJoiningGroup constants */
|
||||
static const char *const
|
||||
jtNames[U_JT_COUNT]={
|
||||
"U",
|
||||
"C",
|
||||
"D",
|
||||
"L",
|
||||
"R",
|
||||
"T"
|
||||
};
|
||||
|
||||
static const char *const
|
||||
jgNames[U_JG_COUNT]={
|
||||
"<no shaping>",
|
||||
"AIN",
|
||||
"ALAPH",
|
||||
"ALEF",
|
||||
"BEH",
|
||||
"BETH",
|
||||
"DAL",
|
||||
"DALATH RISH",
|
||||
"E",
|
||||
"FEH",
|
||||
"FINAL SEMKATH",
|
||||
"GAF",
|
||||
"GAMAL",
|
||||
"HAH",
|
||||
"HAMZA ON HEH GOAL",
|
||||
"HE",
|
||||
"HEH",
|
||||
"HEH GOAL",
|
||||
"HETH",
|
||||
"KAF",
|
||||
"KAPH",
|
||||
"KNOTTED HEH",
|
||||
"LAM",
|
||||
"LAMADH",
|
||||
"MEEM",
|
||||
"MIM",
|
||||
"NOON",
|
||||
"NUN",
|
||||
"PE",
|
||||
"QAF",
|
||||
"QAPH",
|
||||
"REH",
|
||||
"REVERSED PE",
|
||||
"SAD",
|
||||
"SADHE",
|
||||
"SEEN",
|
||||
"SEMKATH",
|
||||
"SHIN",
|
||||
"SWASH KAF",
|
||||
"SYRIAC WAW",
|
||||
"TAH",
|
||||
"TAW",
|
||||
"TEH MARBUTA",
|
||||
"TETH",
|
||||
"WAW",
|
||||
"YEH",
|
||||
"YEH BARREE",
|
||||
"YEH WITH TAIL",
|
||||
"YUDH",
|
||||
"YUDH HE",
|
||||
"ZAIN"
|
||||
};
|
||||
|
||||
static void
|
||||
arabicShapingLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
uint32_t start, limit;
|
||||
int32_t jt, jg;
|
||||
|
||||
u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "genprops: syntax error in ArabicShaping.txt field 0 at %s\n", fields[0][0]);
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
++limit;
|
||||
|
||||
/* parse joining type */
|
||||
jt=getTokenIndex(jtNames, U_JT_COUNT, fields[2][0]);
|
||||
if(jt<0) {
|
||||
fprintf(stderr, "genprops error: unknown joining type in \"%s\" in ArabicShaping.txt\n", fields[2][0]);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
/* parse joining group */
|
||||
jg=getTokenIndex(jgNames, U_JG_COUNT, fields[3][0]);
|
||||
if(jg<0) {
|
||||
fprintf(stderr, "genprops error: unknown joining group in \"%s\" in ArabicShaping.txt\n", fields[3][0]);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
if(!upvec_setValue(pv, start, limit, 2, ((uint32_t)jt<<UPROPS_JT_SHIFT)|((uint32_t)jg<<UPROPS_JG_SHIFT), UPROPS_JT_MASK|UPROPS_JG_MASK, pErrorCode)) {
|
||||
fprintf(stderr, "genprops error: unable to set joining type/group code: %s\n", u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
parseArabicShaping(char *filename, char *basename,
|
||||
const char *suffix,
|
||||
UErrorCode *pErrorCode) {
|
||||
char *fields[4][2];
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
writeUCDFilename(basename, "ArabicShaping", suffix);
|
||||
|
||||
u_parseDelimitedFile(filename, ';', fields, 4, arabicShapingLineFn, NULL, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "error parsing ArabicShaping.txt: %s\n", u_errorName(*pErrorCode));
|
||||
}
|
||||
}
|
||||
|
||||
/* data serialization ------------------------------------------------------- */
|
||||
|
||||
U_CFUNC int32_t
|
||||
|
@ -593,6 +795,10 @@ writeAdditionalData(uint8_t *p, int32_t capacity, int32_t indexes[UPROPS_INDEX_C
|
|||
indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]=UPROPS_VECTOR_WORDS;
|
||||
indexes[UPROPS_RESERVED_INDEX]=
|
||||
indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]+pvCount;
|
||||
|
||||
indexes[UPROPS_MAX_VALUES_INDEX]=
|
||||
(((int32_t)UBLOCK_COUNT-1)<<UPROPS_BLOCK_SHIFT)|
|
||||
((int32_t)USCRIPT_CODE_LIMIT-1);
|
||||
}
|
||||
|
||||
if(p!=NULL && (pvCount*4)<=capacity) {
|
||||
|
|
Loading…
Add table
Reference in a new issue