mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-16 10:17:23 +00:00
ICU-8972 add parser for ppucd.txt, preparsed UCD data with simple syntax
X-SVN-Rev: 31132
This commit is contained in:
parent
fcc0c5bce6
commit
78fc8e2b0b
4 changed files with 605 additions and 1 deletions
|
@ -57,7 +57,7 @@ OBJECTS = filestrm.o package.o pkgitems.o swapimpl.o toolutil.o unewdata.o \
|
|||
denseranges.o \
|
||||
ucm.o ucmstate.o uoptions.o uparse.o \
|
||||
ucbuf.o xmlparser.o writesrc.o \
|
||||
pkg_icu.o pkg_genc.o pkg_gencmn.o flagparser.o filetools.o \
|
||||
pkg_icu.o pkg_genc.o pkg_gencmn.o ppucd.o flagparser.o filetools.o \
|
||||
udbgutil.o dbgutil.o ucln_tu.o
|
||||
|
||||
STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O))
|
||||
|
|
438
icu4c/source/tools/toolutil/ppucd.cpp
Normal file
438
icu4c/source/tools/toolutil/ppucd.cpp
Normal file
|
@ -0,0 +1,438 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: ppucd.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2011dec11
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "charstr.h"
|
||||
#include "cstring.h"
|
||||
#include "ppucd.h"
|
||||
#include "uassert.h"
|
||||
#include "uparse.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
PropertyNames::~PropertyNames() {}
|
||||
|
||||
int32_t
|
||||
PropertyNames::getPropertyEnum(const char *name) const {
|
||||
return u_getPropertyEnum(name);
|
||||
}
|
||||
|
||||
int32_t
|
||||
PropertyNames::getPropertyValueEnum(int32_t property, const char *name) const {
|
||||
return u_getPropertyValueEnum((UProperty)property, name);
|
||||
}
|
||||
|
||||
UniProps::UniProps()
|
||||
: start(U_SENTINEL), end(U_SENTINEL),
|
||||
bmg(U_SENTINEL),
|
||||
numericValue(NULL),
|
||||
name(NULL), uni1Name(NULL), nameAlias(NULL) {
|
||||
memset(binProps, 0, sizeof(binProps));
|
||||
memset(intProps, 0, sizeof(intProps));
|
||||
memset(age, 0, 4);
|
||||
}
|
||||
|
||||
UniProps::~UniProps() {}
|
||||
|
||||
const int32_t PreparsedUCD::kNumLineBuffers;
|
||||
|
||||
PreparsedUCD::PreparsedUCD(const char *filename, UErrorCode &errorCode)
|
||||
: icuPnames(new PropertyNames()), pnames(icuPnames),
|
||||
file(NULL),
|
||||
defaultLineIndex(-1), blockLineIndex(-1), lineIndex(0),
|
||||
lineNumber(0),
|
||||
lineType(NO_LINE),
|
||||
fieldLimit(NULL), lineLimit(NULL) {
|
||||
if(U_FAILURE(errorCode)) { return; }
|
||||
|
||||
if(filename==NULL || *filename==0 || (*filename=='-' && filename[1]==0)) {
|
||||
filename=NULL;
|
||||
file=stdin;
|
||||
} else {
|
||||
file=fopen(filename, "r");
|
||||
}
|
||||
if(file==NULL) {
|
||||
perror("error opening preparsed UCD");
|
||||
fprintf(stderr, "error opening preparsed UCD file %s\n", filename);
|
||||
errorCode=U_FILE_ACCESS_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
memset(ucdVersion, 0, 4);
|
||||
lines[0][0]=0;
|
||||
}
|
||||
|
||||
PreparsedUCD::~PreparsedUCD() {
|
||||
if(file!=stdin) {
|
||||
fclose(file);
|
||||
}
|
||||
delete icuPnames;
|
||||
}
|
||||
|
||||
// Same order as the LineType values.
|
||||
static const char *lineTypeStrings[]={
|
||||
NULL,
|
||||
NULL,
|
||||
"ucd",
|
||||
"property",
|
||||
"binary",
|
||||
"value",
|
||||
"default",
|
||||
"block",
|
||||
"cp",
|
||||
"algnamesrange"
|
||||
};
|
||||
|
||||
PreparsedUCD::LineType
|
||||
PreparsedUCD::readLine(UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return NO_LINE; }
|
||||
// Select the next available line buffer.
|
||||
while(!isLineBufferAvailable(lineIndex)) {
|
||||
lineIndex= (lineIndex==kNumLineBuffers-1) ? lineIndex=0 : lineIndex+1;
|
||||
}
|
||||
char *line=lines[lineIndex];
|
||||
*line=0;
|
||||
lineLimit=fieldLimit=line;
|
||||
lineType=NO_LINE;
|
||||
char *result=fgets(line, sizeof(lines[0]), file);
|
||||
if(result==NULL) {
|
||||
if(ferror(file)) {
|
||||
perror("error reading preparsed UCD");
|
||||
fprintf(stderr, "error reading preparsed UCD before line %ld\n", (long)lineNumber);
|
||||
errorCode=U_FILE_ACCESS_ERROR;
|
||||
}
|
||||
return NO_LINE;
|
||||
}
|
||||
++lineNumber;
|
||||
if(*line=='#') {
|
||||
fieldLimit=strchr(line, 0);
|
||||
return lineType=EMPTY_LINE;
|
||||
}
|
||||
// Remove trailing /r/n.
|
||||
char c;
|
||||
char *limit=strchr(line, 0);
|
||||
while(line<limit && ((c=*(limit-1))=='\n' || c=='\r')) { --limit; }
|
||||
// Remove trailing white space.
|
||||
while(line<limit && ((c=*(limit-1))==' ' || c=='\t')) { --limit; }
|
||||
*limit=0;
|
||||
lineLimit=limit;
|
||||
if(line==limit) {
|
||||
fieldLimit=limit;
|
||||
return lineType=EMPTY_LINE;
|
||||
}
|
||||
// Split by ';'.
|
||||
char *semi=line;
|
||||
while((semi=strchr(semi, ';'))!=NULL) { *semi++=0; }
|
||||
fieldLimit=strchr(line, 0);
|
||||
// Determine the line type.
|
||||
int32_t type;
|
||||
for(type=EMPTY_LINE+1;; ++type) {
|
||||
if(type==LINE_TYPE_COUNT) {
|
||||
fprintf(stderr,
|
||||
"error in preparsed UCD: unknown line type (first field) '%s' on line %ld\n",
|
||||
line, (long)lineNumber);
|
||||
errorCode=U_PARSE_ERROR;
|
||||
return NO_LINE;
|
||||
}
|
||||
if(0==strcmp(line, lineTypeStrings[type])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
lineType=(LineType)type;
|
||||
if(lineType==UNICODE_VERSION_LINE && fieldLimit<lineLimit) {
|
||||
u_versionFromString(ucdVersion, fieldLimit+1);
|
||||
}
|
||||
return lineType;
|
||||
}
|
||||
|
||||
const char *
|
||||
PreparsedUCD::firstField() {
|
||||
char *field=lines[lineIndex];
|
||||
fieldLimit=strchr(field, 0);
|
||||
return field;
|
||||
}
|
||||
|
||||
const char *
|
||||
PreparsedUCD::nextField() {
|
||||
if(fieldLimit==lineLimit) { return NULL; }
|
||||
char *field=fieldLimit+1;
|
||||
fieldLimit=strchr(field, 0);
|
||||
return field;
|
||||
}
|
||||
|
||||
const UniProps *
|
||||
PreparsedUCD::getProps(UnicodeSet &newValues, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return NULL; }
|
||||
newValues.clear();
|
||||
if(!lineHasPropertyValues()) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
firstField();
|
||||
const char *field=nextField();
|
||||
if(field==NULL) {
|
||||
// No range field after the type.
|
||||
fprintf(stderr,
|
||||
"error in preparsed UCD: missing default/block/cp range field "
|
||||
"(no second field) on line %ld\n",
|
||||
(long)lineNumber);
|
||||
errorCode=U_PARSE_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
UChar32 start, end;
|
||||
if(!parseCodePointRange(field, start, end, errorCode)) { return NULL; }
|
||||
UniProps *props;
|
||||
switch(lineType) {
|
||||
case DEFAULT_LINE:
|
||||
if(defaultLineIndex>=0) {
|
||||
fprintf(stderr,
|
||||
"error in preparsed UCD: second line with default properties on line %ld\n",
|
||||
(long)lineNumber);
|
||||
errorCode=U_PARSE_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
if(start!=0 || end!=0x10ffff) {
|
||||
fprintf(stderr,
|
||||
"error in preparsed UCD: default range must be 0..10FFFF, not '%s' on line %ld\n",
|
||||
field, (long)lineNumber);
|
||||
errorCode=U_PARSE_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
props=&defaultProps;
|
||||
defaultLineIndex=lineIndex;
|
||||
break;
|
||||
case BLOCK_LINE:
|
||||
blockProps=defaultProps; // Block inherits default properties.
|
||||
props=&blockProps;
|
||||
blockLineIndex=lineIndex;
|
||||
break;
|
||||
case CP_LINE:
|
||||
if(blockProps.start<=start && end<=blockProps.end) {
|
||||
// Code point range fully inside the last block inherits the block properties.
|
||||
cpProps=blockProps;
|
||||
} else if(start>blockProps.end || end<blockProps.start) {
|
||||
// Code point range fully outside the last block inherits the default properties.
|
||||
cpProps=defaultProps;
|
||||
} else {
|
||||
// Code point range partially overlapping with the last block is illegal.
|
||||
fprintf(stderr,
|
||||
"error in preparsed UCD: cp range %s on line %ld only "
|
||||
"partially overlaps with block range %04lX..%04lX\n",
|
||||
field, (long)lineNumber, (long)blockProps.start, (long)blockProps.end);
|
||||
errorCode=U_PARSE_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
props=&cpProps;
|
||||
break;
|
||||
default:
|
||||
// Will not occur because of the range check above.
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
props->start=start;
|
||||
props->end=end;
|
||||
while((field=nextField())!=NULL) {
|
||||
if(!parseProperty(*props, field, newValues, errorCode)) { return NULL; }
|
||||
}
|
||||
return props;
|
||||
}
|
||||
|
||||
static const struct {
|
||||
const char *name;
|
||||
int32_t prop;
|
||||
} ppucdProperties[]={
|
||||
{ "Name_Alias", PPUCD_NAME_ALIAS }
|
||||
};
|
||||
|
||||
// Returns TRUE for "ok to continue parsing fields".
|
||||
UBool
|
||||
PreparsedUCD::parseProperty(UniProps &props, const char *field, UnicodeSet &newValues,
|
||||
UErrorCode &errorCode) {
|
||||
CharString pBuffer;
|
||||
const char *p=field;
|
||||
const char *v=strchr(p, '=');
|
||||
int binaryValue;
|
||||
if(*p=='-') {
|
||||
if(v!=NULL) {
|
||||
fprintf(stderr,
|
||||
"error in preparsed UCD: mix of binary-property-no and "
|
||||
"enum-property syntax '%s' on line %ld\n",
|
||||
field, (long)lineNumber);
|
||||
errorCode=U_PARSE_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
binaryValue=0;
|
||||
++p;
|
||||
} else if(v==NULL) {
|
||||
binaryValue=1;
|
||||
} else {
|
||||
binaryValue=-1;
|
||||
// Copy out the property name rather than modifying the field (writing a NUL).
|
||||
pBuffer.append(p, (int32_t)(v-p), errorCode);
|
||||
p=pBuffer.data();
|
||||
++v;
|
||||
}
|
||||
int32_t prop=pnames->getPropertyEnum(p);
|
||||
if(prop<0) {
|
||||
for(int32_t i=0;; ++i) {
|
||||
if(i==LENGTHOF(ppucdProperties)) {
|
||||
// Ignore unknown property names.
|
||||
return TRUE;
|
||||
}
|
||||
if(0==uprv_stricmp(p, ppucdProperties[i].name)) {
|
||||
prop=ppucdProperties[i].prop;
|
||||
U_ASSERT(prop>=0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(prop<UCHAR_BINARY_LIMIT) {
|
||||
if(binaryValue>=0) {
|
||||
props.binProps[prop]=(UBool)binaryValue;
|
||||
} else {
|
||||
// No binary value for a binary property.
|
||||
fprintf(stderr,
|
||||
"error in preparsed UCD: enum-property syntax '%s' "
|
||||
"for binary property on line %ld\n",
|
||||
field, (long)lineNumber);
|
||||
errorCode=U_PARSE_ERROR;
|
||||
}
|
||||
} else if(binaryValue>=0) {
|
||||
// Binary value for a non-binary property.
|
||||
fprintf(stderr,
|
||||
"error in preparsed UCD: binary-property syntax '%s' "
|
||||
"for non-binary property on line %ld\n",
|
||||
field, (long)lineNumber);
|
||||
errorCode=U_PARSE_ERROR;
|
||||
} else if(prop<UCHAR_INT_LIMIT) {
|
||||
int32_t value;
|
||||
if((value=pnames->getPropertyValueEnum(prop, v))==UCHAR_INVALID_CODE) {
|
||||
fprintf(stderr,
|
||||
"error in preparsed UCD: '%s' is not a valid value on line %ld\n",
|
||||
field, (long)lineNumber);
|
||||
errorCode=U_PARSE_ERROR;
|
||||
} else {
|
||||
props.intProps[prop]=value;
|
||||
}
|
||||
} else {
|
||||
switch(prop) {
|
||||
case UCHAR_NUMERIC_VALUE:
|
||||
props.numericValue=v;
|
||||
break;
|
||||
case UCHAR_NAME:
|
||||
props.name=v;
|
||||
break;
|
||||
case UCHAR_UNICODE_1_NAME:
|
||||
props.uni1Name=v;
|
||||
break;
|
||||
case UCHAR_AGE:
|
||||
u_versionFromString(props.age, v); // Writes 0.0.0.0 if v is not numeric.
|
||||
break;
|
||||
case UCHAR_BIDI_MIRRORING_GLYPH:
|
||||
props.bmg=parseCodePoint(v, errorCode);
|
||||
break;
|
||||
case UCHAR_CASE_FOLDING:
|
||||
parseString(v, props.cf, errorCode);
|
||||
break;
|
||||
case PPUCD_NAME_ALIAS:
|
||||
props.nameAlias=v;
|
||||
break;
|
||||
default:
|
||||
// Ignore unhandled properties.
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
newValues.add((UChar32)prop);
|
||||
return TRUE;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
PreparsedUCD::getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode) {
|
||||
if(U_FAILURE(errorCode)) { return NULL; }
|
||||
if(lineType!=ALG_NAMES_RANGE_LINE) {
|
||||
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
firstField();
|
||||
const char *field=nextField();
|
||||
if(field==NULL) {
|
||||
// No range field after the type.
|
||||
fprintf(stderr,
|
||||
"error in preparsed UCD: missing algnamesrange range field "
|
||||
"(no second field) on line %ld\n",
|
||||
(long)lineNumber);
|
||||
errorCode=U_PARSE_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
return parseCodePointRange(field, start, end, errorCode);
|
||||
}
|
||||
|
||||
UChar32
|
||||
PreparsedUCD::parseCodePoint(const char *s, UErrorCode &errorCode) {
|
||||
char *end;
|
||||
uint32_t value=(uint32_t)uprv_strtoul(s, &end, 16);
|
||||
if(end<=s || *end!=0 || value>=0x110000) {
|
||||
fprintf(stderr,
|
||||
"error in preparsed UCD: '%s' is not a valid code point on line %ld\n",
|
||||
s, (long)lineNumber);
|
||||
errorCode=U_PARSE_ERROR;
|
||||
return U_SENTINEL;
|
||||
}
|
||||
return (UChar32)value;
|
||||
}
|
||||
|
||||
UBool
|
||||
PreparsedUCD::parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode) {
|
||||
uint32_t st, e;
|
||||
u_parseCodePointRange(s, &st, &e, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr,
|
||||
"error in preparsed UCD: '%s' is not a valid code point range on line %ld\n",
|
||||
s, (long)lineNumber);
|
||||
return FALSE;
|
||||
}
|
||||
start=(UChar32)st;
|
||||
end=(UChar32)e;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
void
|
||||
PreparsedUCD::parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode) {
|
||||
UChar *buffer=uni.getBuffer(-1);
|
||||
int32_t length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode);
|
||||
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
errorCode=U_ZERO_ERROR;
|
||||
uni.releaseBuffer(0);
|
||||
buffer=uni.getBuffer(length);
|
||||
length=u_parseString(s, buffer, uni.getCapacity(), NULL, &errorCode);
|
||||
}
|
||||
uni.releaseBuffer(length);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr,
|
||||
"error in preparsed UCD: '%s' is not a valid Unicode string on line %ld\n",
|
||||
s, (long)lineNumber);
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
164
icu4c/source/tools/toolutil/ppucd.h
Normal file
164
icu4c/source/tools/toolutil/ppucd.h
Normal file
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: ppucd.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2011dec11
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __PPUCD_H__
|
||||
#define __PPUCD_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
/** Additions to the uchar.h enum UProperty. */
|
||||
enum {
|
||||
/** Name_Alias */
|
||||
PPUCD_NAME_ALIAS=UCHAR_OTHER_PROPERTY_LIMIT
|
||||
};
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class U_TOOLUTIL_API PropertyNames {
|
||||
public:
|
||||
virtual ~PropertyNames();
|
||||
virtual int32_t getPropertyEnum(const char *name) const;
|
||||
virtual int32_t getPropertyValueEnum(int32_t property, const char *name) const;
|
||||
};
|
||||
|
||||
struct U_TOOLUTIL_API UniProps {
|
||||
UniProps();
|
||||
~UniProps();
|
||||
|
||||
UChar32 start, end;
|
||||
UBool binProps[UCHAR_BINARY_LIMIT];
|
||||
int32_t intProps[UCHAR_INT_LIMIT-UCHAR_INT_START];
|
||||
UVersionInfo age;
|
||||
UChar32 bmg;
|
||||
const char *numericValue;
|
||||
const char *name;
|
||||
const char *uni1Name;
|
||||
const char *nameAlias;
|
||||
UnicodeString cf;
|
||||
};
|
||||
|
||||
class U_TOOLUTIL_API PreparsedUCD {
|
||||
public:
|
||||
enum LineType {
|
||||
/** No line, end of file. */
|
||||
NO_LINE,
|
||||
/** Empty line. (Might contain a comment.) */
|
||||
EMPTY_LINE,
|
||||
|
||||
/** ucd;6.1.0 */
|
||||
UNICODE_VERSION_LINE,
|
||||
|
||||
/** property;Binary;Alpha;Alphabetic */
|
||||
PROPERTY_LINE,
|
||||
/** binary;N;No;F;False */
|
||||
BINARY_LINE,
|
||||
/** value;gc;Zs;Space_Separator */
|
||||
VALUE_LINE,
|
||||
|
||||
/** defaults;0000..10FFFF;age=NA;bc=L;... */
|
||||
DEFAULT_LINE,
|
||||
/** block;0000..007F;age=1.1;blk=ASCII;ea=Na;... */
|
||||
BLOCK_LINE,
|
||||
/** cp;0030;AHex;bc=EN;gc=Nd;na=DIGIT ZERO;... */
|
||||
CP_LINE,
|
||||
|
||||
/** algnamesrange;4E00..9FCC;han;CJK UNIFIED IDEOGRAPH- */
|
||||
ALG_NAMES_RANGE_LINE,
|
||||
|
||||
LINE_TYPE_COUNT
|
||||
};
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* Prepare this object for a new, empty package.
|
||||
*/
|
||||
PreparsedUCD(const char *filename, UErrorCode &errorCode);
|
||||
|
||||
/** Destructor. */
|
||||
~PreparsedUCD();
|
||||
|
||||
/** Sets (aliases) a non-standard PropertyNames implementation. Caller retains ownership. */
|
||||
void setPropertyNames(const PropertyNames *pn) { pnames=pn; }
|
||||
|
||||
/**
|
||||
* Reads a line from the preparsed UCD file.
|
||||
* Splits the line by replacing each ';' with a NUL.
|
||||
*/
|
||||
LineType readLine(UErrorCode &errorCode);
|
||||
|
||||
/** Returns the line's next field, or NULL. */
|
||||
const char *nextField();
|
||||
|
||||
/** Returns the Unicode version when or after the UNICODE_VERSION_LINE has been read. */
|
||||
const UVersionInfo &getUnicodeVersion() const { return ucdVersion; }
|
||||
|
||||
/** Returns TRUE if the current line has property values. */
|
||||
UBool lineHasPropertyValues() const { return DEFAULT_LINE<=lineType && lineType<=CP_LINE; }
|
||||
|
||||
/**
|
||||
* Parses properties from the current line.
|
||||
* Clears newValues and sets UProperty codes for property values mentioned
|
||||
* on the current line (as opposed to being inherited).
|
||||
* Returns a pointer to the filled-in UniProps, or NULL if something went wrong.
|
||||
* The returned UniProps are usable until the next line of the same type is read.
|
||||
*/
|
||||
const UniProps *getProps(UnicodeSet &newValues, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Returns the code point range for the current algnamesrange line.
|
||||
* Calls & parses nextField().
|
||||
* Further nextField() calls will yield the range's type & prefix string.
|
||||
* Returns U_SUCCESS(errorCode).
|
||||
*/
|
||||
UBool getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode);
|
||||
|
||||
private:
|
||||
UBool isLineBufferAvailable(int32_t i) {
|
||||
return defaultLineIndex!=i && blockLineIndex!=i;
|
||||
}
|
||||
|
||||
/** Resets the field iterator and returns the line's first field (the line type field). */
|
||||
const char *firstField();
|
||||
|
||||
UBool parseProperty(UniProps &props, const char *field, UnicodeSet &newValues,
|
||||
UErrorCode &errorCode);
|
||||
UChar32 parseCodePoint(const char *s, UErrorCode &errorCode);
|
||||
UBool parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode);
|
||||
void parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode);
|
||||
|
||||
static const int32_t kNumLineBuffers=3;
|
||||
|
||||
PropertyNames *icuPnames; // owned
|
||||
const PropertyNames *pnames; // aliased
|
||||
FILE *file;
|
||||
int32_t defaultLineIndex, blockLineIndex, lineIndex;
|
||||
int32_t lineNumber;
|
||||
LineType lineType;
|
||||
char *fieldLimit;
|
||||
char *lineLimit;
|
||||
|
||||
UVersionInfo ucdVersion;
|
||||
UniProps defaultProps, blockProps, cpProps;
|
||||
// Multiple lines so that default and block properties can maintain pointers
|
||||
// into their line buffers.
|
||||
char lines[kNumLineBuffers][4096];
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __PPUCD_H__
|
|
@ -265,6 +265,7 @@
|
|||
</ClCompile>
|
||||
<ClCompile Include="pkg_icu.cpp" />
|
||||
<ClCompile Include="pkgitems.cpp" />
|
||||
<ClCompile Include="ppucd.cpp" />
|
||||
<ClCompile Include="swapimpl.cpp" />
|
||||
<ClCompile Include="toolutil.cpp">
|
||||
<DisableLanguageExtensions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</DisableLanguageExtensions>
|
||||
|
@ -299,6 +300,7 @@
|
|||
<ClInclude Include="pkg_gencmn.h" />
|
||||
<ClInclude Include="pkg_icu.h" />
|
||||
<ClInclude Include="pkg_imp.h" />
|
||||
<ClInclude Include="ppucd.h" />
|
||||
<ClInclude Include="swapimpl.h" />
|
||||
<ClInclude Include="toolutil.h" />
|
||||
<ClInclude Include="ucbuf.h" />
|
||||
|
|
Loading…
Add table
Reference in a new issue