mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-3969 initial code for gencase, generate ucase.icu containing only and all case mapping properties
X-SVN-Rev: 16214
This commit is contained in:
parent
f415ea5e5a
commit
d5631e445c
7 changed files with 2118 additions and 0 deletions
100
icu4c/source/tools/gencase/Makefile.in
Normal file
100
icu4c/source/tools/gencase/Makefile.in
Normal file
|
@ -0,0 +1,100 @@
|
|||
## Makefile.in for ICU - tools/gencase
|
||||
## Copyright (c) 1999-2004, International Business Machines Corporation and
|
||||
## others. All Rights Reserved.
|
||||
## Steven R. Loomis
|
||||
|
||||
## Source directory information
|
||||
srcdir = @srcdir@
|
||||
top_srcdir = @top_srcdir@
|
||||
|
||||
top_builddir = ../..
|
||||
|
||||
include $(top_builddir)/icudefs.mk
|
||||
|
||||
##
|
||||
|
||||
TARGET_STUB_NAME = gencase
|
||||
|
||||
SECTION = 8
|
||||
|
||||
MAN_FILES = $(TARGET_STUB_NAME).$(SECTION)
|
||||
|
||||
## Build directory information
|
||||
subdir = tools/gencase
|
||||
|
||||
## Extra files to remove for 'make clean'
|
||||
CLEANFILES = *~ $(DEPS) $(MAN_FILES)
|
||||
|
||||
## Target information
|
||||
TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
|
||||
|
||||
CPPFLAGS += -I$(top_builddir)/common -I$(top_srcdir)/common -I$(srcdir)/../toolutil
|
||||
LIBS = $(LIBICUTOOLUTIL) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
|
||||
|
||||
OBJECTS = gencase.o store.o
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
||||
## List of phony targets
|
||||
.PHONY : all all-local install install-local clean clean-local \
|
||||
distclean distclean-local dist dist-local check \
|
||||
check-local install-man
|
||||
|
||||
## Clear suffix list
|
||||
.SUFFIXES :
|
||||
|
||||
## List of standard targets
|
||||
all: all-local
|
||||
install: install-local
|
||||
clean: clean-local
|
||||
distclean : distclean-local
|
||||
dist: dist-local
|
||||
check: all check-local
|
||||
|
||||
all-local: $(TARGET) $(MAN_FILES)
|
||||
|
||||
install-local: all-local install-man
|
||||
$(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
|
||||
$(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)
|
||||
|
||||
# man page
|
||||
install-man: $(MAN_FILES)
|
||||
$(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
|
||||
$(INSTALL_DATA) $< $(DESTDIR)$(mandir)/man$(SECTION)
|
||||
|
||||
%.$(SECTION): $(srcdir)/%.$(SECTION).in
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
# build postscript and pdf formats
|
||||
#$(TARGET).ps: $(TARGET).$(SECTION)
|
||||
# groff -man < $< > $@
|
||||
|
||||
#$(TARGET).pdf: $(TARGET).ps
|
||||
# ps2pdf $< $@
|
||||
|
||||
dist-local:
|
||||
|
||||
clean-local:
|
||||
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
|
||||
$(RMV) $(TARGET) $(OBJECTS)
|
||||
|
||||
distclean-local: clean-local
|
||||
$(RMV) Makefile
|
||||
|
||||
check-local: all-local
|
||||
|
||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
$(TARGET) : $(OBJECTS)
|
||||
$(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
|
||||
|
||||
ifeq (,$(MAKECMDGOALS))
|
||||
-include $(DEPS)
|
||||
else
|
||||
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
|
||||
-include $(DEPS)
|
||||
endif
|
||||
endif
|
126
icu4c/source/tools/gencase/gencase.8.in
Normal file
126
icu4c/source/tools/gencase/gencase.8.in
Normal file
|
@ -0,0 +1,126 @@
|
|||
.\" Hey, Emacs! This is -*-nroff-*- you know...
|
||||
.\"
|
||||
.\" genprops.8: manual page for the genprops utility
|
||||
.\"
|
||||
.\" Copyright (C) 2000-2001 IBM, Inc. and others.
|
||||
.\"
|
||||
.TH GENPROPS 8 "16 January 2001" "ICU MANPAGE" "ICU @VERSION@ Manual"
|
||||
.SH NAME
|
||||
.B genprops
|
||||
\- compile properties from the Unicode Character Database
|
||||
.SH SYNOPSIS
|
||||
.B genprops
|
||||
[
|
||||
.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
|
||||
]
|
||||
[
|
||||
.BR "\-v\fP, \fB\-\-verbose"
|
||||
]
|
||||
[
|
||||
.BI "\-u\fP, \fB\-\-unicode" " version"
|
||||
]
|
||||
[
|
||||
.BI "\-c\fP, \fB\-\-copyright"
|
||||
]
|
||||
[
|
||||
.BI "\-s\fP, \fB\-\-sourcedir" " source"
|
||||
]
|
||||
[
|
||||
.BI "\-d\fP, \fB\-\-destdir" " destination"
|
||||
]
|
||||
[
|
||||
.I suffix
|
||||
]
|
||||
.SH DESCRIPTION
|
||||
.B genprops
|
||||
reads some of the Unicode Character Database files and compiles their
|
||||
information information into a binary form.
|
||||
The resulting file,
|
||||
.BR icudata.dat ,
|
||||
can then be read directly by ICU, or used by
|
||||
.BR pkgdata (8)
|
||||
for incorporation into a larger archive or library.
|
||||
.LP
|
||||
The files read by
|
||||
.B genprops
|
||||
are described in the
|
||||
.B FILES
|
||||
section. If
|
||||
.I suffix
|
||||
is passed on the command line, the names of these files will actually
|
||||
be changed to include a dash followed by
|
||||
.I suffix
|
||||
in their basename. For example, the file
|
||||
.B UnicodeData.txt
|
||||
would be looked for under the name
|
||||
.BR UnicodeData\-\fIsuffix\fP.txt .
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
|
||||
Print help about usage and exit.
|
||||
.TP
|
||||
.BR "\-v\fP, \fB\-\-verbose"
|
||||
Display extra informative messages during execution.
|
||||
.TP
|
||||
.BI "\-u\fP, \fB\-\-unicode" " version"
|
||||
Specify which
|
||||
.I version
|
||||
of Unicode the Unicode Character Database refers to.
|
||||
Defaults to
|
||||
.BR 3.0.0 .
|
||||
.TP
|
||||
.BI "\-c\fP, \fB\-\-copyright"
|
||||
Include a copyright notice into the binary data.
|
||||
.TP
|
||||
.BI "\-s\fP, \fB\-\-sourcedir" " source"
|
||||
Set the source directory to
|
||||
.IR source .
|
||||
The default source directory is specified by the environment variable
|
||||
.BR ICU_DATA .
|
||||
.TP
|
||||
.BI "\-d\fP, \fB\-\-destdir" " destination"
|
||||
Set the destination directory to
|
||||
.IR destination .
|
||||
The default destination directory is specified by the environment variable
|
||||
.BR ICU_DATA .
|
||||
.SH ENVIRONMENT
|
||||
.TP 10
|
||||
.B ICU_DATA
|
||||
Specifies the directory containing ICU data. Defaults to
|
||||
.BR @thepkgicudatadir@/@PACKAGE@/@VERSION@/ .
|
||||
Some tools in ICU depend on the presence of the trailing slash. It is thus
|
||||
important to make sure that it is present if
|
||||
.B ICU_DATA
|
||||
is set.
|
||||
.SH FILES
|
||||
The following files are read by
|
||||
.B genprops
|
||||
and are looked for in the
|
||||
.I source
|
||||
directory.
|
||||
.TP 20
|
||||
.B UnicodeData.txt
|
||||
The main file in the Unicode Character Database. Contains character
|
||||
properties, combining classes information, decompositions, names,
|
||||
etc.\|.\|..
|
||||
.TP
|
||||
.B BidiMirroring.txt
|
||||
Properties for substituting characters in an implementation of
|
||||
bidirectional mirroring.
|
||||
.TP
|
||||
.B SpecialCasing.txt
|
||||
List of properties required for full case mapping.
|
||||
.TP
|
||||
.B CaseFolding.txt
|
||||
Mapping from characters to their case-folded forms. (Note: this file
|
||||
is derived from
|
||||
.B UnicodeData.txt
|
||||
and
|
||||
.B SpecialCasing.txt
|
||||
when generated by the Unicode Consortium.)
|
||||
.SH VERSION
|
||||
@VERSION@
|
||||
.SH COPYRIGHT
|
||||
Copyright (C) 2000-2002 IBM, Inc. and others.
|
||||
.SH SEE ALSO
|
||||
.BR pkgdata (8)
|
776
icu4c/source/tools/gencase/gencase.c
Normal file
776
icu4c/source/tools/gencase/gencase.c
Normal file
|
@ -0,0 +1,776 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: gencase.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2004aug28
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* This program reads several of the Unicode character database text files,
|
||||
* parses them, and the case mapping properties for each character.
|
||||
* It then writes a binary file containing the properties
|
||||
* that is designed to be used directly for random-access to
|
||||
* the properties of each Unicode character.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uset.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "unicode/uclean.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "uarrsort.h"
|
||||
#include "unewdata.h"
|
||||
#include "uoptions.h"
|
||||
#include "uparse.h"
|
||||
#include "uprops.h"
|
||||
#include "propsvec.h"
|
||||
#include "gencase.h"
|
||||
|
||||
#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
/* data --------------------------------------------------------------------- */
|
||||
|
||||
static UNewTrie *trie;
|
||||
uint32_t *pv;
|
||||
static int32_t pvCount;
|
||||
|
||||
UBool beVerbose=FALSE, haveCopyright=TRUE;
|
||||
|
||||
/*
|
||||
* Unicode set collecting the case-sensitive characters;
|
||||
* see uchar.h UCHAR_CASE_SENSITIVE.
|
||||
* Add code points from case mappings/foldings in
|
||||
* the root locale and with default options.
|
||||
*/
|
||||
static USet *caseSensitive;
|
||||
|
||||
/* prototypes --------------------------------------------------------------- */
|
||||
|
||||
static void
|
||||
parseSpecialCasing(const char *filename, UErrorCode *pErrorCode);
|
||||
|
||||
static void
|
||||
parseCaseFolding(const char *filename, UErrorCode *pErrorCode);
|
||||
|
||||
static void
|
||||
parseDB(const char *filename, UErrorCode *pErrorCode);
|
||||
|
||||
/* parse files with multiple binary properties ------------------------------ */
|
||||
|
||||
/* TODO: more common code, move functions to uparse.h|c */
|
||||
|
||||
/* TODO: similar to genprops/props2.c but not the same */
|
||||
|
||||
struct Binary {
|
||||
const char *propName;
|
||||
int32_t vecWord;
|
||||
uint32_t vecValue, vecMask;
|
||||
};
|
||||
typedef struct Binary Binary;
|
||||
|
||||
struct Binaries {
|
||||
const char *ucdFile;
|
||||
const Binary *binaries;
|
||||
int32_t binariesCount;
|
||||
};
|
||||
typedef struct Binaries Binaries;
|
||||
|
||||
static const Binary
|
||||
propListNames[]={
|
||||
{ "Soft_Dotted", 0, UCASE_SOFT_DOTTED, UCASE_DOT_MASK }
|
||||
};
|
||||
|
||||
static const Binaries
|
||||
propListBinaries={
|
||||
"PropList", propListNames, LENGTHOF(propListNames)
|
||||
};
|
||||
|
||||
static const Binary
|
||||
derCorePropsNames[]={
|
||||
{ "Lowercase", 0, UCASE_LOWER, UCASE_TYPE_MASK },
|
||||
{ "Uppercase", 0, UCASE_UPPER, UCASE_TYPE_MASK }
|
||||
};
|
||||
|
||||
static const Binaries
|
||||
derCorePropsBinaries={
|
||||
"DerivedCoreProperties", derCorePropsNames, LENGTHOF(derCorePropsNames)
|
||||
};
|
||||
|
||||
static void U_CALLCONV
|
||||
binariesLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
const Binaries *bin;
|
||||
char *s;
|
||||
uint32_t start, limit;
|
||||
int32_t i;
|
||||
|
||||
bin=(const Binaries *)context;
|
||||
|
||||
u_parseCodePointRange(fields[0][0], &start, &limit, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "gencase: syntax error in %s.txt field 0 at %s\n", bin->ucdFile, fields[0][0]);
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
++limit;
|
||||
|
||||
/* parse binary property name */
|
||||
s=(char *)u_skipWhitespace(fields[1][0]);
|
||||
for(i=0;; ++i) {
|
||||
if(i==bin->binariesCount) {
|
||||
/* ignore unrecognized properties */
|
||||
return;
|
||||
}
|
||||
if(isToken(bin->binaries[i].propName, s)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(bin->binaries[i].vecMask==0) {
|
||||
fprintf(stderr, "gencase error: mask value %d==0 for %s %s\n",
|
||||
(int)bin->binaries[i].vecMask, bin->ucdFile, bin->binaries[i].propName);
|
||||
exit(U_INTERNAL_PROGRAM_ERROR);
|
||||
}
|
||||
|
||||
if(!upvec_setValue(pv, start, limit, bin->binaries[i].vecWord, bin->binaries[i].vecValue, bin->binaries[i].vecMask, pErrorCode)) {
|
||||
fprintf(stderr, "gencase error: unable to set %s, code: %s\n",
|
||||
bin->binaries[i].propName, u_errorName(*pErrorCode));
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
parseBinariesFile(char *filename, char *basename, const char *suffix,
|
||||
const Binaries *bin,
|
||||
UErrorCode *pErrorCode) {
|
||||
char *fields[2][2];
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
writeUCDFilename(basename, bin->ucdFile, suffix);
|
||||
|
||||
u_parseDelimitedFile(filename, ';', fields, 2, binariesLineFn, (void *)bin, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "error parsing %s.txt: %s\n", bin->ucdFile, u_errorName(*pErrorCode));
|
||||
}
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
enum
|
||||
{
|
||||
HELP_H,
|
||||
HELP_QUESTION_MARK,
|
||||
VERBOSE,
|
||||
COPYRIGHT,
|
||||
DESTDIR,
|
||||
SOURCEDIR,
|
||||
UNICODE_VERSION,
|
||||
ICUDATADIR
|
||||
};
|
||||
|
||||
/* Keep these values in sync with the above enums */
|
||||
static UOption options[]={
|
||||
UOPTION_HELP_H,
|
||||
UOPTION_HELP_QUESTION_MARK,
|
||||
UOPTION_VERBOSE,
|
||||
UOPTION_COPYRIGHT,
|
||||
UOPTION_DESTDIR,
|
||||
UOPTION_SOURCEDIR,
|
||||
{ "unicode", NULL, NULL, NULL, 'u', UOPT_REQUIRES_ARG, 0 },
|
||||
UOPTION_ICUDATADIR
|
||||
};
|
||||
|
||||
extern int
|
||||
main(int argc, char* argv[]) {
|
||||
char filename[300];
|
||||
const char *srcDir=NULL, *destDir=NULL, *suffix=NULL;
|
||||
char *basename=NULL;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
|
||||
U_MAIN_INIT_ARGS(argc, argv);
|
||||
|
||||
/* preset then read command line options */
|
||||
options[DESTDIR].value=u_getDataDirectory();
|
||||
options[SOURCEDIR].value="";
|
||||
options[UNICODE_VERSION].value="";
|
||||
options[ICUDATADIR].value=u_getDataDirectory();
|
||||
argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
|
||||
|
||||
/* error handling, printing usage message */
|
||||
if(argc<0) {
|
||||
fprintf(stderr,
|
||||
"error in command line argument \"%s\"\n",
|
||||
argv[-argc]);
|
||||
}
|
||||
if(argc<0 || options[HELP_H].doesOccur || options[HELP_QUESTION_MARK].doesOccur) {
|
||||
/*
|
||||
* Broken into chucks because the C89 standard says the minimum
|
||||
* required supported string length is 509 bytes.
|
||||
*/
|
||||
fprintf(stderr,
|
||||
"Usage: %s [-options] [suffix]\n"
|
||||
"\n"
|
||||
"read the UnicodeData.txt file and other Unicode properties files and\n"
|
||||
"create a binary file " UCASE_DATA_NAME "." UCASE_DATA_TYPE " with the character properties\n"
|
||||
"\n",
|
||||
argv[0]);
|
||||
fprintf(stderr,
|
||||
"Options:\n"
|
||||
"\t-h or -? or --help this usage text\n"
|
||||
"\t-v or --verbose verbose output\n"
|
||||
"\t-c or --copyright include a copyright notice\n"
|
||||
"\t-u or --unicode Unicode version, followed by the version like 3.0.0\n");
|
||||
fprintf(stderr,
|
||||
"\t-d or --destdir destination directory, followed by the path\n"
|
||||
"\t-s or --sourcedir source directory, followed by the path\n"
|
||||
"\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
|
||||
"\t followed by path, defaults to %s\n"
|
||||
"\tsuffix suffix that is to be appended with a '-'\n"
|
||||
"\t to the source file basenames before opening;\n"
|
||||
"\t 'gencase new' will read UnicodeData-new.txt etc.\n",
|
||||
u_getDataDirectory());
|
||||
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
/* get the options values */
|
||||
beVerbose=options[VERBOSE].doesOccur;
|
||||
haveCopyright=options[COPYRIGHT].doesOccur;
|
||||
srcDir=options[SOURCEDIR].value;
|
||||
destDir=options[DESTDIR].value;
|
||||
|
||||
if(argc>=2) {
|
||||
suffix=argv[1];
|
||||
} else {
|
||||
suffix=NULL;
|
||||
}
|
||||
|
||||
if(options[UNICODE_VERSION].doesOccur) {
|
||||
setUnicodeVersion(options[UNICODE_VERSION].value);
|
||||
}
|
||||
/* else use the default dataVersion in store.c */
|
||||
|
||||
if (options[ICUDATADIR].doesOccur) {
|
||||
u_setDataDirectory(options[ICUDATADIR].value);
|
||||
}
|
||||
|
||||
/* prepare the filename beginning with the source dir */
|
||||
uprv_strcpy(filename, srcDir);
|
||||
basename=filename+uprv_strlen(filename);
|
||||
if(basename>filename && *(basename-1)!=U_FILE_SEP_CHAR) {
|
||||
*basename++=U_FILE_SEP_CHAR;
|
||||
}
|
||||
|
||||
/* initialize */
|
||||
pv=upvec_open(1, 10000);
|
||||
caseSensitive=uset_open(1, 0); /* empty set (start>end) */
|
||||
|
||||
/* process SpecialCasing.txt */
|
||||
writeUCDFilename(basename, "SpecialCasing", suffix);
|
||||
parseSpecialCasing(filename, &errorCode);
|
||||
|
||||
/* process CaseFolding.txt */
|
||||
writeUCDFilename(basename, "CaseFolding", suffix);
|
||||
parseCaseFolding(filename, &errorCode);
|
||||
|
||||
/* process additional properties files */
|
||||
*basename=0;
|
||||
|
||||
parseBinariesFile(filename, basename, suffix, &propListBinaries, &errorCode);
|
||||
|
||||
parseBinariesFile(filename, basename, suffix, &derCorePropsBinaries, &errorCode);
|
||||
|
||||
/* process UnicodeData.txt */
|
||||
writeUCDFilename(basename, "UnicodeData", suffix);
|
||||
parseDB(filename, &errorCode);
|
||||
|
||||
/* process parsed data */
|
||||
makeCaseClosure();
|
||||
|
||||
makeExceptions();
|
||||
|
||||
if(U_SUCCESS(errorCode)) {
|
||||
/* write the properties data file */
|
||||
generateData(destDir);
|
||||
}
|
||||
|
||||
u_cleanup();
|
||||
return errorCode;
|
||||
}
|
||||
|
||||
U_CFUNC void
|
||||
writeUCDFilename(char *basename, const char *filename, const char *suffix) {
|
||||
int32_t length=(int32_t)uprv_strlen(filename);
|
||||
uprv_strcpy(basename, filename);
|
||||
if(suffix!=NULL) {
|
||||
basename[length++]='-';
|
||||
uprv_strcpy(basename+length, suffix);
|
||||
length+=(int32_t)uprv_strlen(suffix);
|
||||
}
|
||||
uprv_strcpy(basename+length, ".txt");
|
||||
}
|
||||
|
||||
/* TODO: move to toolutil */
|
||||
U_CFUNC UBool
|
||||
isToken(const char *token, const char *s) {
|
||||
const char *z;
|
||||
int32_t j;
|
||||
|
||||
s=u_skipWhitespace(s);
|
||||
for(j=0;; ++j) {
|
||||
if(token[j]!=0) {
|
||||
if(s[j]!=token[j]) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
z=u_skipWhitespace(s+j);
|
||||
if(*z==';' || *z==0) {
|
||||
return TRUE;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static void
|
||||
_set_addAll(USet *set, const UChar *s, int32_t length) {
|
||||
UChar32 c;
|
||||
int32_t i;
|
||||
|
||||
/* needs length>=0 */
|
||||
for(i=0; i<length; /* U16_NEXT advances i */) {
|
||||
U16_NEXT(s, i, length, c);
|
||||
uset_add(set, c);
|
||||
}
|
||||
}
|
||||
|
||||
/* parser for SpecialCasing.txt --------------------------------------------- */
|
||||
|
||||
#define MAX_SPECIAL_CASING_COUNT 500
|
||||
|
||||
static SpecialCasing specialCasings[MAX_SPECIAL_CASING_COUNT];
|
||||
static int32_t specialCasingCount=0;
|
||||
|
||||
static void U_CALLCONV
|
||||
specialCasingLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
char *end;
|
||||
|
||||
/* get code point */
|
||||
specialCasings[specialCasingCount].code=(UChar32)uprv_strtoul(u_skipWhitespace(fields[0][0]), &end, 16);
|
||||
end=(char *)u_skipWhitespace(end);
|
||||
if(end<=fields[0][0] || end!=fields[0][1]) {
|
||||
fprintf(stderr, "gencase: syntax error in SpecialCasing.txt field 0 at %s\n", fields[0][0]);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
/* is this a complex mapping? */
|
||||
if(*(end=(char *)u_skipWhitespace(fields[4][0]))!=0 && *end!=';' && *end!='#') {
|
||||
/* there is some condition text in the fifth field */
|
||||
specialCasings[specialCasingCount].isComplex=TRUE;
|
||||
|
||||
/* do not store any actual mappings for this */
|
||||
specialCasings[specialCasingCount].lowerCase[0]=0;
|
||||
specialCasings[specialCasingCount].upperCase[0]=0;
|
||||
specialCasings[specialCasingCount].titleCase[0]=0;
|
||||
} else {
|
||||
/* just set the "complex" flag and get the case mappings */
|
||||
specialCasings[specialCasingCount].isComplex=FALSE;
|
||||
specialCasings[specialCasingCount].lowerCase[0]=
|
||||
(UChar)u_parseString(fields[1][0], specialCasings[specialCasingCount].lowerCase+1, 31, NULL, pErrorCode);
|
||||
specialCasings[specialCasingCount].upperCase[0]=
|
||||
(UChar)u_parseString(fields[3][0], specialCasings[specialCasingCount].upperCase+1, 31, NULL, pErrorCode);
|
||||
specialCasings[specialCasingCount].titleCase[0]=
|
||||
(UChar)u_parseString(fields[2][0], specialCasings[specialCasingCount].titleCase+1, 31, NULL, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "gencase: error parsing special casing at %s\n", fields[0][0]);
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
|
||||
uset_add(caseSensitive, (UChar32)specialCasings[specialCasingCount].code);
|
||||
_set_addAll(caseSensitive, specialCasings[specialCasingCount].lowerCase+1, specialCasings[specialCasingCount].lowerCase[0]);
|
||||
_set_addAll(caseSensitive, specialCasings[specialCasingCount].upperCase+1, specialCasings[specialCasingCount].upperCase[0]);
|
||||
_set_addAll(caseSensitive, specialCasings[specialCasingCount].titleCase+1, specialCasings[specialCasingCount].titleCase[0]);
|
||||
}
|
||||
|
||||
if(++specialCasingCount==MAX_SPECIAL_CASING_COUNT) {
|
||||
fprintf(stderr, "gencase: too many special casing mappings\n");
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
exit(U_INDEX_OUTOFBOUNDS_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t U_CALLCONV
|
||||
compareSpecialCasings(const void *context, const void *left, const void *right) {
|
||||
return ((const SpecialCasing *)left)->code-((const SpecialCasing *)right)->code;
|
||||
}
|
||||
|
||||
static void
|
||||
parseSpecialCasing(const char *filename, UErrorCode *pErrorCode) {
|
||||
char *fields[5][2];
|
||||
int32_t i, j;
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
u_parseDelimitedFile(filename, ';', fields, 5, specialCasingLineFn, NULL, pErrorCode);
|
||||
|
||||
/* sort the special casing entries by code point */
|
||||
if(specialCasingCount>0) {
|
||||
uprv_sortArray(specialCasings, specialCasingCount, sizeof(SpecialCasing),
|
||||
compareSpecialCasings, NULL, FALSE, pErrorCode);
|
||||
}
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* replace multiple entries for any code point by one "complex" one */
|
||||
j=0;
|
||||
for(i=1; i<specialCasingCount; ++i) {
|
||||
if(specialCasings[i-1].code==specialCasings[i].code) {
|
||||
/* there is a duplicate code point */
|
||||
specialCasings[i-1].code=0x7fffffff; /* remove this entry in the following sorting */
|
||||
specialCasings[i].isComplex=TRUE; /* make the following one complex */
|
||||
specialCasings[i].lowerCase[0]=0;
|
||||
specialCasings[i].upperCase[0]=0;
|
||||
specialCasings[i].titleCase[0]=0;
|
||||
++j;
|
||||
}
|
||||
}
|
||||
|
||||
/* if some entries just were removed, then re-sort */
|
||||
if(j>0) {
|
||||
uprv_sortArray(specialCasings, specialCasingCount, sizeof(SpecialCasing),
|
||||
compareSpecialCasings, NULL, FALSE, pErrorCode);
|
||||
specialCasingCount-=j;
|
||||
}
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add one complex mapping to caseSensitive that was filtered out above:
|
||||
* Greek final Sigma has a conditional mapping but not locale-sensitive,
|
||||
* and it is taken when lowercasing just U+03A3 alone.
|
||||
* 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
|
||||
*/
|
||||
uset_add(caseSensitive, 0x3c2);
|
||||
}
|
||||
|
||||
/* parser for CaseFolding.txt ----------------------------------------------- */
|
||||
|
||||
#define MAX_CASE_FOLDING_COUNT 2000
|
||||
|
||||
static CaseFolding caseFoldings[MAX_CASE_FOLDING_COUNT];
|
||||
static int32_t caseFoldingCount=0;
|
||||
|
||||
static void U_CALLCONV
|
||||
caseFoldingLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
char *end;
|
||||
static UChar32 prevCode=0;
|
||||
int32_t count;
|
||||
char status;
|
||||
|
||||
/* get code point */
|
||||
caseFoldings[caseFoldingCount].code=(UChar32)uprv_strtoul(u_skipWhitespace(fields[0][0]), &end, 16);
|
||||
end=(char *)u_skipWhitespace(end);
|
||||
if(end<=fields[0][0] || end!=fields[0][1]) {
|
||||
fprintf(stderr, "gencase: syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
/* get the status of this mapping */
|
||||
caseFoldings[caseFoldingCount].status=status=*u_skipWhitespace(fields[1][0]);
|
||||
if(status!='L' && status!='E' && status!='C' && status!='S' && status!='F' && status!='I' && status!='T') {
|
||||
fprintf(stderr, "gencase: unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
/* ignore all case folding mappings that are the same as the UnicodeData.txt lowercase mappings */
|
||||
if(status=='L') {
|
||||
return;
|
||||
}
|
||||
|
||||
/* get the mapping */
|
||||
count=caseFoldings[caseFoldingCount].full[0]=
|
||||
(UChar)u_parseString(fields[2][0], caseFoldings[caseFoldingCount].full+1, 31, (uint32_t *)&caseFoldings[caseFoldingCount].simple, pErrorCode);
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
fprintf(stderr, "gencase: error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
|
||||
exit(*pErrorCode);
|
||||
}
|
||||
|
||||
/* there is a simple mapping only if there is exactly one code point (count is in UChars) */
|
||||
if(count==0 || count>2 || (count==2 && UTF_IS_SINGLE(caseFoldings[caseFoldingCount].full[1]))) {
|
||||
caseFoldings[caseFoldingCount].simple=0;
|
||||
}
|
||||
|
||||
/* update the case-sensitive set */
|
||||
if(status!='T') {
|
||||
uset_add(caseSensitive, (UChar32)caseFoldings[caseFoldingCount].code);
|
||||
_set_addAll(caseSensitive, caseFoldings[caseFoldingCount].full+1, caseFoldings[caseFoldingCount].full[0]);
|
||||
}
|
||||
|
||||
/* check the status */
|
||||
if(status=='S') {
|
||||
/* check if there was a full mapping for this code point before */
|
||||
if( caseFoldingCount>0 &&
|
||||
caseFoldings[caseFoldingCount-1].code==caseFoldings[caseFoldingCount].code &&
|
||||
caseFoldings[caseFoldingCount-1].status=='F'
|
||||
) {
|
||||
/* merge the two entries */
|
||||
caseFoldings[caseFoldingCount-1].simple=caseFoldings[caseFoldingCount].simple;
|
||||
return;
|
||||
}
|
||||
} else if(status=='F') {
|
||||
/* check if there was a simple mapping for this code point before */
|
||||
if( caseFoldingCount>0 &&
|
||||
caseFoldings[caseFoldingCount-1].code==caseFoldings[caseFoldingCount].code &&
|
||||
caseFoldings[caseFoldingCount-1].status=='S'
|
||||
) {
|
||||
/* merge the two entries */
|
||||
uprv_memcpy(caseFoldings[caseFoldingCount-1].full, caseFoldings[caseFoldingCount].full, 32*U_SIZEOF_UCHAR);
|
||||
return;
|
||||
}
|
||||
} else if(status=='I' || status=='T') {
|
||||
/* check if there was a default mapping for this code point before (remove it) */
|
||||
while(caseFoldingCount>0 &&
|
||||
caseFoldings[caseFoldingCount-1].code==caseFoldings[caseFoldingCount].code
|
||||
) {
|
||||
prevCode=0;
|
||||
--caseFoldingCount;
|
||||
}
|
||||
/* store only a marker for special handling for cases like dotless i */
|
||||
caseFoldings[caseFoldingCount].simple=0;
|
||||
caseFoldings[caseFoldingCount].full[0]=0;
|
||||
}
|
||||
|
||||
/* check that the code points (caseFoldings[caseFoldingCount].code) are in ascending order */
|
||||
if(caseFoldings[caseFoldingCount].code<=prevCode && caseFoldings[caseFoldingCount].code>0) {
|
||||
fprintf(stderr, "gencase: error - CaseFolding entries out of order, U+%04lx after U+%04lx\n",
|
||||
(unsigned long)caseFoldings[caseFoldingCount].code,
|
||||
(unsigned long)prevCode);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
prevCode=caseFoldings[caseFoldingCount].code;
|
||||
|
||||
if(++caseFoldingCount==MAX_CASE_FOLDING_COUNT) {
|
||||
fprintf(stderr, "gencase: too many case folding mappings\n");
|
||||
*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
||||
exit(U_INDEX_OUTOFBOUNDS_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
parseCaseFolding(const char *filename, UErrorCode *pErrorCode) {
|
||||
char *fields[3][2];
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
u_parseDelimitedFile(filename, ';', fields, 3, caseFoldingLineFn, NULL, pErrorCode);
|
||||
}
|
||||
|
||||
/* parser for UnicodeData.txt ----------------------------------------------- */
|
||||
|
||||
static int32_t specialCasingIndex=0, caseFoldingIndex=0;
|
||||
|
||||
static void U_CALLCONV
|
||||
unicodeDataLineFn(void *context,
|
||||
char *fields[][2], int32_t fieldCount,
|
||||
UErrorCode *pErrorCode) {
|
||||
Props p;
|
||||
char *end;
|
||||
static UChar32 prevCode=0;
|
||||
UChar32 value;
|
||||
UBool something=FALSE;
|
||||
|
||||
/* reset the properties */
|
||||
uprv_memset(&p, 0, sizeof(Props));
|
||||
|
||||
/* get the character code, field 0 */
|
||||
p.code=(UChar32)uprv_strtoul(fields[0][0], &end, 16);
|
||||
if(end<=fields[0][0] || end!=fields[0][1]) {
|
||||
fprintf(stderr, "gencase: syntax error in field 0 at %s\n", fields[0][0]);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
/* get general category, field 2 */
|
||||
if(isToken("Lt", fields[2][0])) {
|
||||
p.isTitle=TRUE;
|
||||
something=TRUE;
|
||||
}
|
||||
|
||||
/* get canonical combining class, field 3 */
|
||||
value=(UChar32)uprv_strtoul(fields[3][0], &end, 10);
|
||||
if(end<=fields[3][0] || end!=fields[3][1] || value>0xff) {
|
||||
fprintf(stderr, "gencase: syntax error in field 3 at %s\n", fields[0][0]);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
if(value>0) {
|
||||
p.cc=(uint8_t)value;
|
||||
something=TRUE;
|
||||
}
|
||||
|
||||
/* get uppercase mapping, field 12 */
|
||||
value=(UChar32)uprv_strtoul(fields[12][0], &end, 16);
|
||||
if(end!=fields[12][1]) {
|
||||
fprintf(stderr, "gencase: syntax error in field 12 at code 0x%lx\n",
|
||||
(unsigned long)p.code);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
if(value!=0 && value!=p.code) {
|
||||
p.upperCase=value;
|
||||
uset_add(caseSensitive, p.code);
|
||||
uset_add(caseSensitive, value);
|
||||
something=TRUE;
|
||||
}
|
||||
|
||||
/* get lowercase value, field 13 */
|
||||
value=(UChar32)uprv_strtoul(fields[13][0], &end, 16);
|
||||
if(end!=fields[13][1]) {
|
||||
fprintf(stderr, "gencase: syntax error in field 13 at code 0x%lx\n",
|
||||
(unsigned long)p.code);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
if(value!=0 && value!=p.code) {
|
||||
p.lowerCase=value;
|
||||
uset_add(caseSensitive, p.code);
|
||||
uset_add(caseSensitive, value);
|
||||
something=TRUE;
|
||||
}
|
||||
|
||||
/* get titlecase value, field 14 */
|
||||
value=(UChar32)uprv_strtoul(fields[14][0], &end, 16);
|
||||
if(end!=fields[14][1]) {
|
||||
fprintf(stderr, "gencase: syntax error in field 14 at code 0x%lx\n",
|
||||
(unsigned long)p.code);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
if(value!=0 && value!=p.code) {
|
||||
p.titleCase=value;
|
||||
uset_add(caseSensitive, p.code);
|
||||
uset_add(caseSensitive, value);
|
||||
something=TRUE;
|
||||
}
|
||||
|
||||
/* set additional properties from previously parsed files */
|
||||
if(specialCasingIndex<specialCasingCount && p.code==specialCasings[specialCasingIndex].code) {
|
||||
p.specialCasing=specialCasings+specialCasingIndex++;
|
||||
something=TRUE;
|
||||
} else {
|
||||
p.specialCasing=NULL;
|
||||
}
|
||||
if(caseFoldingIndex<caseFoldingCount && p.code==caseFoldings[caseFoldingIndex].code) {
|
||||
p.caseFolding=caseFoldings+caseFoldingIndex++;
|
||||
something=TRUE;
|
||||
|
||||
/* ignore "Common" mappings (simple==full) that map to the same code point as the regular lowercase mapping */
|
||||
if( p.caseFolding->status=='C' &&
|
||||
p.caseFolding->simple==p.lowerCase
|
||||
) {
|
||||
p.caseFolding=NULL;
|
||||
}
|
||||
} else {
|
||||
p.caseFolding=NULL;
|
||||
}
|
||||
|
||||
/* check for non-character code points */
|
||||
if((p.code&0xfffe)==0xfffe || (uint32_t)(p.code-0xfdd0)<0x20) {
|
||||
fprintf(stderr, "gencase: error - properties for non-character code point U+%04lx\n",
|
||||
(unsigned long)p.code);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
/* check that the code points (p.code) are in ascending order */
|
||||
if(p.code<=prevCode && p.code>0) {
|
||||
fprintf(stderr, "gencase: error - UnicodeData entries out of order, U+%04lx after U+%04lx\n",
|
||||
(unsigned long)p.code, (unsigned long)prevCode);
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
/* properties for a single code point */
|
||||
if(something) {
|
||||
setProps(&p);
|
||||
}
|
||||
|
||||
prevCode=p.code;
|
||||
}
|
||||
|
||||
static void
|
||||
parseDB(const char *filename, UErrorCode *pErrorCode) {
|
||||
char *fields[15][2];
|
||||
UChar32 start, end;
|
||||
int32_t i;
|
||||
|
||||
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
u_parseDelimitedFile(filename, ';', fields, 15, unicodeDataLineFn, NULL, pErrorCode);
|
||||
|
||||
/* are all sub-properties consumed? */
|
||||
if(specialCasingIndex<specialCasingCount) {
|
||||
fprintf(stderr, "gencase: error - some code points in SpecialCasing.txt are missing from UnicodeData.txt\n");
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
if(caseFoldingIndex<caseFoldingCount) {
|
||||
fprintf(stderr, "gencase: error - some code points in CaseFolding.txt are missing from UnicodeData.txt\n");
|
||||
*pErrorCode=U_PARSE_ERROR;
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
if(U_FAILURE(*pErrorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
for(i=0;
|
||||
0==uset_getItem(caseSensitive, i, &start, &end, NULL, 0, pErrorCode) && U_SUCCESS(*pErrorCode);
|
||||
++i
|
||||
) {
|
||||
addCaseSensitive(start, end);
|
||||
}
|
||||
if(*pErrorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
|
||||
*pErrorCode=U_ZERO_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
194
icu4c/source/tools/gencase/gencase.dsp
Normal file
194
icu4c/source/tools/gencase/gencase.dsp
Normal file
|
@ -0,0 +1,194 @@
|
|||
# Microsoft Developer Studio Project File - Name="gencase" - Package Owner=<4>
|
||||
# Microsoft Developer Studio Generated Build File, Format Version 6.00
|
||||
# ** DO NOT EDIT **
|
||||
|
||||
# TARGTYPE "Win32 (x86) Console Application" 0x0103
|
||||
|
||||
CFG=gencase - Win32 Debug
|
||||
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
|
||||
!MESSAGE use the Export Makefile command and run
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "gencase.mak".
|
||||
!MESSAGE
|
||||
!MESSAGE You can specify a configuration when running NMAKE
|
||||
!MESSAGE by defining the macro CFG on the command line. For example:
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "gencase.mak" CFG="gencase - Win32 Debug"
|
||||
!MESSAGE
|
||||
!MESSAGE Possible choices for configuration are:
|
||||
!MESSAGE
|
||||
!MESSAGE "gencase - Win32 Release" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE "gencase - Win32 Debug" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE "gencase - Win64 Release" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE "gencase - Win64 Debug" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE
|
||||
|
||||
# Begin Project
|
||||
# PROP AllowPerConfigDependencies 0
|
||||
# PROP Scc_ProjName ""
|
||||
# PROP Scc_LocalPath ""
|
||||
CPP=cl.exe
|
||||
RSC=rc.exe
|
||||
|
||||
!IF "$(CFG)" == "gencase - Win32 Release"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 0
|
||||
# PROP BASE Output_Dir "Release"
|
||||
# PROP BASE Intermediate_Dir "Release"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 0
|
||||
# PROP Output_Dir "Release"
|
||||
# PROP Intermediate_Dir "Release"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
MTL=midl.exe
|
||||
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
|
||||
# ADD CPP /nologo /G6 /MD /Za /W3 /GX /O2 /I "..\toolutil" /I "..\..\common" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
|
||||
# ADD BASE RSC /l 0x409 /d "NDEBUG"
|
||||
# ADD RSC /l 0x409 /d "NDEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
|
||||
# ADD LINK32 icuuc.lib icutu.lib /nologo /subsystem:console /machine:I386 /libpath:"..\toolutil\Release" /libpath:"..\..\..\lib\Release" /libpath:"..\..\..\lib"
|
||||
# Begin Custom Build
|
||||
TargetPath=.\Release\gencase.exe
|
||||
InputPath=.\Release\gencase.exe
|
||||
InputName=gencase
|
||||
SOURCE="$(InputPath)"
|
||||
|
||||
"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(TargetPath) ..\..\..\bin
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "gencase - Win32 Debug"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 1
|
||||
# PROP BASE Output_Dir "Debug"
|
||||
# PROP BASE Intermediate_Dir "Debug"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 1
|
||||
# PROP Output_Dir "Debug"
|
||||
# PROP Intermediate_Dir "Debug"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
MTL=midl.exe
|
||||
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
|
||||
# ADD CPP /nologo /G6 /MDd /Za /W3 /Gm /GX /ZI /Od /I "..\toolutil" /I "..\..\common" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FR /FD /GZ /c
|
||||
# ADD BASE RSC /l 0x409 /d "_DEBUG"
|
||||
# ADD RSC /l 0x409 /d "_DEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
|
||||
# ADD LINK32 icuucd.lib icutud.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\toolutil\Debug" /libpath:"..\..\..\lib\Debug" /libpath:"..\..\..\lib"
|
||||
# Begin Custom Build
|
||||
TargetPath=.\Debug\gencase.exe
|
||||
InputPath=.\Debug\gencase.exe
|
||||
InputName=gencase
|
||||
SOURCE="$(InputPath)"
|
||||
|
||||
"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(TargetPath) ..\..\..\bin
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "gencase - Win64 Release"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 0
|
||||
# PROP BASE Output_Dir "Release"
|
||||
# PROP BASE Intermediate_Dir "Release"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 0
|
||||
# PROP Output_Dir "Release"
|
||||
# PROP Intermediate_Dir "Release"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
MTL=midl.exe
|
||||
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
|
||||
# ADD CPP /nologo /MD /Za /W3 /GX /Zi /O2 /I "..\toolutil" /I "..\..\common" /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /QIA64_fmaopt /Wp64 /Zm600 /c
|
||||
# ADD BASE RSC /l 0x409 /d "NDEBUG"
|
||||
# ADD RSC /l 0x409 /d "NDEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:IX86 /machine:IA64
|
||||
# ADD LINK32 icuuc.lib icutu.lib /nologo /subsystem:console /machine:IX86 /libpath:"..\toolutil\Release" /libpath:"..\..\..\lib\Release" /libpath:"..\..\..\lib" /machine:IA64
|
||||
# Begin Custom Build
|
||||
TargetPath=.\Release\gencase.exe
|
||||
InputPath=.\Release\gencase.exe
|
||||
InputName=gencase
|
||||
SOURCE="$(InputPath)"
|
||||
|
||||
"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(TargetPath) ..\..\..\bin
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "gencase - Win64 Debug"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 1
|
||||
# PROP BASE Output_Dir "Debug"
|
||||
# PROP BASE Intermediate_Dir "Debug"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 1
|
||||
# PROP Output_Dir "Debug"
|
||||
# PROP Intermediate_Dir "Debug"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
MTL=midl.exe
|
||||
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
|
||||
# ADD CPP /nologo /MDd /Za /W3 /Gm /GX /Zi /Od /I "..\toolutil" /I "..\..\common" /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FR /FD /GZ /QIA64_fmaopt /Wp64 /Zm600 /c
|
||||
# ADD BASE RSC /l 0x409 /d "_DEBUG"
|
||||
# ADD RSC /l 0x409 /d "_DEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:IX86 /pdbtype:sept /machine:IA64
|
||||
# ADD LINK32 icuucd.lib icutud.lib /nologo /subsystem:console /incremental:no /debug /machine:IX86 /pdbtype:sept /libpath:"..\toolutil\Debug" /libpath:"..\..\..\lib\Debug" /libpath:"..\..\..\lib" /machine:IA64
|
||||
# Begin Custom Build
|
||||
TargetPath=.\Debug\gencase.exe
|
||||
InputPath=.\Debug\gencase.exe
|
||||
InputName=gencase
|
||||
SOURCE="$(InputPath)"
|
||||
|
||||
"..\..\..\bin\$(InputName).exe" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(TargetPath) ..\..\..\bin
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ENDIF
|
||||
|
||||
# Begin Target
|
||||
|
||||
# Name "gencase - Win32 Release"
|
||||
# Name "gencase - Win32 Debug"
|
||||
# Name "gencase - Win64 Release"
|
||||
# Name "gencase - Win64 Debug"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\gencase.c
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\gencase.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\store.c
|
||||
# End Source File
|
||||
# End Target
|
||||
# End Project
|
187
icu4c/source/tools/gencase/gencase.h
Normal file
187
icu4c/source/tools/gencase/gencase.h
Normal file
|
@ -0,0 +1,187 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: genprops.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2004aug28
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __GENCASE_H__
|
||||
#define __GENCASE_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "utrie.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
/* file definitions --------------------------------------------------------- */
|
||||
|
||||
#define UCASE_DATA_NAME "ucase"
|
||||
#define UCASE_DATA_TYPE "icu"
|
||||
|
||||
/* format "cAsE" */
|
||||
#define UCASE_FMT_0 0x63
|
||||
#define UCASE_FMT_1 0x41
|
||||
#define UCASE_FMT_2 0x53
|
||||
#define UCASE_FMT_3 0x45
|
||||
|
||||
/* indexes into indexes[] */
|
||||
enum {
|
||||
UCASE_IX_INDEX_TOP,
|
||||
UCASE_IX_LENGTH,
|
||||
UCASE_IX_TRIE_SIZE,
|
||||
UCASE_IX_EXC_LENGTH,
|
||||
|
||||
UCASE_IX_TOP=16
|
||||
};
|
||||
|
||||
/* definitions for 16-bit case properties word ------------------------------ */
|
||||
|
||||
/* 2-bit constants for types of cased characters */
|
||||
#define UCASE_TYPE_MASK 3
|
||||
enum {
|
||||
UCASE_NONE,
|
||||
UCASE_LOWER,
|
||||
UCASE_UPPER,
|
||||
UCASE_TITLE
|
||||
};
|
||||
|
||||
#define UCASE_SENSITIVE 4
|
||||
#define UCASE_EXCEPTION 8
|
||||
|
||||
#define UCASE_DOT_MASK 0x30
|
||||
enum {
|
||||
UCASE_NO_DOT=0,
|
||||
UCASE_SOFT_DOTTED=0x10,
|
||||
UCASE_ABOVE=0x20, /* "above" accents with cc=230 */
|
||||
UCASE_OTHER_ACCENT=0x30 /* other character (0<cc!=230) */
|
||||
};
|
||||
|
||||
/* no exception: bits 15..6 are a 10-bit signed case mapping delta */
|
||||
#define UCASE_DELTA_SHIFT 6
|
||||
#define UCASE_DELTA_MASK 0xffc0
|
||||
#define UCASE_MAX_DELTA 0x1ff
|
||||
#define UCASE_MIN_DELTA (-UCASE_MAX_DELTA-1)
|
||||
|
||||
#define UCASE_GET_DELTA(props) ((int16_t)(props)>>UCASE_DELTA_SHIFT)
|
||||
|
||||
/* exception: bits 15..4 are an unsigned 12-bit index into the exceptions array */
|
||||
#define UCASE_EXC_SHIFT 4
|
||||
#define UCASE_EXC_MASK 0xfff0
|
||||
#define UCASE_MAX_EXCEPTIONS 0x1000
|
||||
|
||||
/* definitions for 16-bit main exceptions word ------------------------------ */
|
||||
|
||||
/* first 8 bits indicate values in optional slots */
|
||||
enum {
|
||||
UCASE_EXC_LOWER,
|
||||
UCASE_EXC_FOLD,
|
||||
UCASE_EXC_UPPER,
|
||||
UCASE_EXC_TITLE,
|
||||
UCASE_EXC_4, /* reserved */
|
||||
UCASE_EXC_5, /* reserved */
|
||||
UCASE_EXC_6, /* reserved */
|
||||
UCASE_EXC_FULL_MAPPINGS
|
||||
};
|
||||
|
||||
/* each slot is 2 uint16_t instead of 1 */
|
||||
#define UCASE_EXC_DOUBLE_SLOTS 0x100
|
||||
|
||||
/* reserved: exception bits 11..9 */
|
||||
|
||||
/* UCASE_EXC_DOT_MASK=UCASE_DOT_MASK<<UCASE_EXC_DOT_SHIFT */
|
||||
#define UCASE_EXC_DOT_SHIFT 8
|
||||
|
||||
/* normally stored in the main word, but pushed out for larger exception indexes */
|
||||
#define UCASE_EXC_DOT_MASK 0x3000
|
||||
enum {
|
||||
UCASE_EXC_NO_DOT=0,
|
||||
UCASE_EXC_SOFT_DOTTED=0x1000,
|
||||
UCASE_EXC_ABOVE=0x2000, /* "above" accents with cc=230 */
|
||||
UCASE_EXC_OTHER_ACCENT=0x3000 /* other character (0<cc!=230) */
|
||||
};
|
||||
|
||||
/* complex/conditional mappings */
|
||||
#define UCASE_EXC_CONDITIONAL_SPECIAL 0x4000
|
||||
#define UCASE_EXC_CONDITIONAL_FOLD 0x8000
|
||||
|
||||
/* definitions for lengths word for full case mappings */
|
||||
#define UCASE_FULL_LOWER 0xf
|
||||
#define UCASE_FULL_FOLDING 0xf0
|
||||
#define UCASE_FULL_UPPER 0xf00
|
||||
#define UCASE_FULL_TITLE 0xf000
|
||||
|
||||
/* gencase ------------------------------------------------------------------ */
|
||||
|
||||
#define UGENCASE_EXC_SHIFT 16
|
||||
#define UGENCASE_EXC_MASK 0xffff0000
|
||||
|
||||
/* special casing data */
|
||||
typedef struct {
|
||||
UChar32 code;
|
||||
UBool isComplex;
|
||||
UChar lowerCase[32], upperCase[32], titleCase[32];
|
||||
} SpecialCasing;
|
||||
|
||||
/* case folding data */
|
||||
typedef struct {
|
||||
UChar32 code, simple;
|
||||
char status;
|
||||
UChar full[32];
|
||||
} CaseFolding;
|
||||
|
||||
/* case mapping properties */
|
||||
typedef struct {
|
||||
UChar32 code, lowerCase, upperCase, titleCase;
|
||||
SpecialCasing *specialCasing;
|
||||
CaseFolding *caseFolding;
|
||||
uint8_t cc;
|
||||
UBool isTitle;
|
||||
} Props;
|
||||
|
||||
/* global flags */
|
||||
extern UBool beVerbose, haveCopyright;
|
||||
|
||||
/* properties vectors in gencase.c */
|
||||
extern uint32_t *pv;
|
||||
|
||||
/* prototypes */
|
||||
U_CFUNC void
|
||||
writeUCDFilename(char *basename, const char *filename, const char *suffix);
|
||||
|
||||
U_CFUNC UBool
|
||||
isToken(const char *token, const char *s);
|
||||
|
||||
extern void
|
||||
setUnicodeVersion(const char *v);
|
||||
|
||||
extern void
|
||||
setProps(Props *p);
|
||||
|
||||
U_CFUNC uint32_t U_EXPORT2
|
||||
getFoldedPropsValue(UNewTrie *trie, UChar32 start, int32_t offset);
|
||||
|
||||
extern void
|
||||
addCaseSensitive(UChar32 first, UChar32 last);
|
||||
|
||||
extern void
|
||||
makeCaseClosure(void);
|
||||
|
||||
extern void
|
||||
makeExceptions(void);
|
||||
|
||||
extern void
|
||||
generateData(const char *dataDir);
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
#endif
|
||||
|
166
icu4c/source/tools/gencase/gencase.vcproj
Normal file
166
icu4c/source/tools/gencase/gencase.vcproj
Normal file
|
@ -0,0 +1,166 @@
|
|||
<?xml version="1.0" encoding="Windows-1252"?>
|
||||
<VisualStudioProject
|
||||
ProjectType="Visual C++"
|
||||
Version="7.10"
|
||||
Name="gencase"
|
||||
SccProjectName=""
|
||||
SccLocalPath="">
|
||||
<Platforms>
|
||||
<Platform
|
||||
Name="Win32"/>
|
||||
</Platforms>
|
||||
<Configurations>
|
||||
<Configuration
|
||||
Name="Release|Win32"
|
||||
OutputDirectory=".\Release"
|
||||
IntermediateDirectory=".\Release"
|
||||
ConfigurationType="1"
|
||||
UseOfMFC="0"
|
||||
ATLMinimizesCRunTimeLibraryUsage="FALSE"
|
||||
CharacterSet="2">
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
InlineFunctionExpansion="2"
|
||||
ImproveFloatingPointConsistency="TRUE"
|
||||
AdditionalIncludeDirectories="..\toolutil,..\..\common"
|
||||
PreprocessorDefinitions="WIN32,NDEBUG,_CONSOLE"
|
||||
StringPooling="TRUE"
|
||||
RuntimeLibrary="2"
|
||||
EnableFunctionLevelLinking="TRUE"
|
||||
DisableLanguageExtensions="TRUE"
|
||||
PrecompiledHeaderFile=".\Release/gencase.pch"
|
||||
AssemblerListingLocation=".\Release/"
|
||||
ObjectFile=".\Release/"
|
||||
ProgramDataBaseFileName=".\Release/"
|
||||
WarningLevel="3"
|
||||
SuppressStartupBanner="TRUE"
|
||||
CompileAs="0"/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="copy "$(TargetPath)" ..\..\..\bin
|
||||
"
|
||||
Outputs="..\..\..\bin\$(TargetFileName)"/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
OutputFile=".\Release/gencase.exe"
|
||||
LinkIncremental="1"
|
||||
SuppressStartupBanner="TRUE"
|
||||
ProgramDatabaseFile=".\Release/gencase.pdb"
|
||||
SubSystem="1"/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
TypeLibraryName=".\Release/gencase.tlb"/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"/>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
PreprocessorDefinitions="NDEBUG"
|
||||
Culture="1033"/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"/>
|
||||
<Tool
|
||||
Name="VCManagedWrapperGeneratorTool"/>
|
||||
<Tool
|
||||
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Debug|Win32"
|
||||
OutputDirectory=".\Debug"
|
||||
IntermediateDirectory=".\Debug"
|
||||
ConfigurationType="1"
|
||||
UseOfMFC="0"
|
||||
ATLMinimizesCRunTimeLibraryUsage="FALSE"
|
||||
CharacterSet="2">
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
ImproveFloatingPointConsistency="TRUE"
|
||||
OptimizeForProcessor="2"
|
||||
AdditionalIncludeDirectories="..\toolutil,..\..\common"
|
||||
PreprocessorDefinitions="WIN32,_DEBUG,_CONSOLE"
|
||||
BasicRuntimeChecks="3"
|
||||
RuntimeLibrary="3"
|
||||
DisableLanguageExtensions="TRUE"
|
||||
PrecompiledHeaderFile=".\Debug/gencase.pch"
|
||||
AssemblerListingLocation=".\Debug/"
|
||||
ObjectFile=".\Debug/"
|
||||
ProgramDataBaseFileName=".\Debug/"
|
||||
BrowseInformation="1"
|
||||
WarningLevel="3"
|
||||
SuppressStartupBanner="TRUE"
|
||||
DebugInformationFormat="4"
|
||||
CompileAs="0"/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="copy "$(TargetPath)" ..\..\..\bin
|
||||
"
|
||||
Outputs="..\..\..\bin\$(TargetFileName)"/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
OutputFile=".\Debug/gencase.exe"
|
||||
LinkIncremental="2"
|
||||
SuppressStartupBanner="TRUE"
|
||||
GenerateDebugInformation="TRUE"
|
||||
ProgramDatabaseFile=".\Debug/gencase.pdb"
|
||||
SubSystem="1"/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
TypeLibraryName=".\Debug/gencase.tlb"/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"/>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
PreprocessorDefinitions="_DEBUG"
|
||||
Culture="1033"/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"/>
|
||||
<Tool
|
||||
Name="VCManagedWrapperGeneratorTool"/>
|
||||
<Tool
|
||||
Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
|
||||
</Configuration>
|
||||
</Configurations>
|
||||
<References>
|
||||
</References>
|
||||
<Files>
|
||||
<Filter
|
||||
Name="Source Files"
|
||||
Filter="c;cpp;rc">
|
||||
<File
|
||||
RelativePath=".\gencase.c">
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\store.c">
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Header Files"
|
||||
Filter="h">
|
||||
<File
|
||||
RelativePath=".\gencase.h">
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Resource Files"
|
||||
Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe">
|
||||
</Filter>
|
||||
</Files>
|
||||
<Globals>
|
||||
</Globals>
|
||||
</VisualStudioProject>
|
569
icu4c/source/tools/gencase/store.c
Normal file
569
icu4c/source/tools/gencase/store.c
Normal file
|
@ -0,0 +1,569 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: store.c
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2004aug28
|
||||
* created by: Markus W. Scherer
|
||||
*
|
||||
* Store Unicode case mapping properties efficiently for
|
||||
* random access.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "filestrm.h"
|
||||
#include "utrie.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unewdata.h"
|
||||
#include "propsvec.h"
|
||||
#include "gencase.h"
|
||||
|
||||
/* Unicode case mapping properties file format ---------------------------------
|
||||
|
||||
The file format prepared and written here contains several data
|
||||
structures that store indexes or data.
|
||||
|
||||
Before the data contents described below, there are the headers required by
|
||||
the udata API for loading ICU data. Especially, a UDataInfo structure
|
||||
precedes the actual data. It contains platform properties values and the
|
||||
file format version.
|
||||
|
||||
The following is a description of format version 1 .
|
||||
|
||||
The file contains the following structures:
|
||||
|
||||
const int32_t indexes[i0] with values i0, i1, ...:
|
||||
(see UCASE_IX_... constants for names of indexes)
|
||||
|
||||
i0 indexLength; -- length of indexes[] (UCASE_IX_TOP)
|
||||
i1 dataLength; -- length in bytes of the post-header data (incl. indexes[])
|
||||
i2 trieSize; -- size in bytes of the case mapping properties trie
|
||||
i3 exceptionsLength; -- length in uint16_t of the exceptions array
|
||||
|
||||
i4..indexes[i0] reservedIndexes; -- reserved values; 0 for now
|
||||
|
||||
|
||||
Serizalied trie, see utrie.h;
|
||||
|
||||
const uint16_t exceptions[exceptionsLength];
|
||||
|
||||
|
||||
Trie data word:
|
||||
Bits
|
||||
if(exception) {
|
||||
15..4 unsigned exception index
|
||||
} else {
|
||||
if(not uncased) {
|
||||
15..6 signed delta to simple case mapping code point
|
||||
(add delta to input code point)
|
||||
}
|
||||
5..4 0 normal character with cc=0
|
||||
1 soft-dotted character
|
||||
2 cc=230
|
||||
3 other cc
|
||||
}
|
||||
3 exception
|
||||
2 case sensitive
|
||||
1..0 0 uncased
|
||||
1 lowercase
|
||||
2 uppercase
|
||||
3 titlecase
|
||||
|
||||
|
||||
Exceptions:
|
||||
A sub-array of the exceptions array is indexed by the exception index in a
|
||||
trie word.
|
||||
The sub-array consists of the following fields:
|
||||
uint16_t excWord;
|
||||
uint16_t optional values [];
|
||||
UTF-16 strings for full (string) mappings for lowercase, case folding, uppercase, titlecase
|
||||
|
||||
excWord: (see UCASE_EXC_...)
|
||||
Bits
|
||||
15 conditional case folding
|
||||
14 conditional special casing
|
||||
13..12 same as non-exception trie data bits 5..4
|
||||
moved here because the exception index needs more bits than the delta
|
||||
0 normal character with cc=0
|
||||
1 soft-dotted character
|
||||
2 cc=230
|
||||
3 other cc
|
||||
11.. 9 reserved
|
||||
8 if set, then for each optional-value slot there are 2 uint16_t values
|
||||
(high and low parts of 32-bit values)
|
||||
instead of single ones
|
||||
7.. 0 bits for which optional value is present
|
||||
|
||||
Optional-value slots:
|
||||
0 lowercase mapping (code point)
|
||||
1 case folding (code point)
|
||||
2 uppercase mapping (code point)
|
||||
3 titlecase mapping (code point)
|
||||
4..6 reserved
|
||||
7 there is at least one full (string) case mapping
|
||||
the length of each is encoded in a nibble of this optional value,
|
||||
and the strings follow this optional value in the same order:
|
||||
lower/fold/upper/title
|
||||
|
||||
For space saving, some values are not stored. Lookups are as follows:
|
||||
- If special casing is conditional, then no full lower/upper/title mapping
|
||||
strings are stored.
|
||||
- If case folding is conditional, then no simple or full case foldings are
|
||||
stored.
|
||||
- Fall back in this order:
|
||||
full (string) mapping -- if full mappings are used
|
||||
simple (code point) mapping of the same type
|
||||
simple fold->simple lower
|
||||
simple title->simple upper
|
||||
finally, the original code point (no mapping)
|
||||
|
||||
----------------------------------------------------------------------------- */
|
||||
|
||||
/* UDataInfo cf. udata.h */
|
||||
static UDataInfo dataInfo={
|
||||
sizeof(UDataInfo),
|
||||
0,
|
||||
|
||||
U_IS_BIG_ENDIAN,
|
||||
U_CHARSET_FAMILY,
|
||||
U_SIZEOF_UCHAR,
|
||||
0,
|
||||
|
||||
/* dataFormat="cAsE" */
|
||||
{ UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
|
||||
{ 1, 0, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */
|
||||
{ 4, 0, 1, 0 } /* dataVersion */
|
||||
};
|
||||
|
||||
enum {
|
||||
/* maximum number of exceptions expected */
|
||||
MAX_EXC_COUNT=1000
|
||||
};
|
||||
|
||||
/* exceptions values */
|
||||
static uint16_t exceptions[UCASE_MAX_EXCEPTIONS+100];
|
||||
static uint16_t exceptionsTop=0;
|
||||
static Props excProps[MAX_EXC_COUNT];
|
||||
static uint16_t exceptionsCount=0;
|
||||
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
extern void
|
||||
setUnicodeVersion(const char *v) {
|
||||
UVersionInfo version;
|
||||
u_versionFromString(version, v);
|
||||
uprv_memcpy(dataInfo.dataVersion, version, 4);
|
||||
}
|
||||
|
||||
/* store a character's properties ------------------------------------------- */
|
||||
|
||||
extern void
|
||||
setProps(Props *p) {
|
||||
UErrorCode errorCode;
|
||||
uint32_t value;
|
||||
int32_t delta;
|
||||
uint16_t count;
|
||||
|
||||
/* count the case mappings and other values competing for the value bit field */
|
||||
value=upvec_getValue(pv, p->code, 0);
|
||||
delta=0;
|
||||
count=0;
|
||||
|
||||
if(p->isTitle) {
|
||||
/* the Titlecase property is read late, from UnicodeData.txt */
|
||||
value|=UCASE_TITLE;
|
||||
}
|
||||
|
||||
if(p->upperCase!=0) {
|
||||
/* uppercase mapping as delta if the character is lowercase */
|
||||
if((value&UCASE_TYPE_MASK)==UCASE_LOWER) {
|
||||
delta=p->upperCase-p->code;
|
||||
} else {
|
||||
value|=UCASE_EXCEPTION;
|
||||
}
|
||||
}
|
||||
if(p->lowerCase!=0) {
|
||||
/* lowercase mapping as delta if the character is uppercase or titlecase */
|
||||
if((value&UCASE_TYPE_MASK)==UCASE_UPPER || (value&UCASE_TYPE_MASK)==UCASE_TITLE) {
|
||||
delta=p->lowerCase-p->code;
|
||||
} else {
|
||||
value|=UCASE_EXCEPTION;
|
||||
}
|
||||
}
|
||||
if(p->upperCase!=p->titleCase) {
|
||||
value|=UCASE_EXCEPTION;
|
||||
}
|
||||
if(p->specialCasing!=NULL) {
|
||||
value|=UCASE_EXCEPTION;
|
||||
}
|
||||
if(p->caseFolding!=NULL) {
|
||||
value|=UCASE_EXCEPTION;
|
||||
}
|
||||
|
||||
if(delta<UCASE_MIN_DELTA || UCASE_MAX_DELTA<delta) {
|
||||
value|=UCASE_EXCEPTION;
|
||||
}
|
||||
|
||||
if(p->cc!=0) {
|
||||
if(value&UCASE_DOT_MASK) {
|
||||
fprintf(stderr, "gencase: a soft-dotted character has cc!=0\n");
|
||||
exit(U_INTERNAL_PROGRAM_ERROR);
|
||||
}
|
||||
if(p->cc==230) {
|
||||
value|=UCASE_ABOVE;
|
||||
} else {
|
||||
value|=UCASE_OTHER_ACCENT;
|
||||
}
|
||||
}
|
||||
|
||||
/* handle exceptions */
|
||||
if(value&UCASE_EXCEPTION) {
|
||||
/* simply store exceptions for later processing and encoding */
|
||||
value|=(uint32_t)exceptionsCount<<UGENCASE_EXC_SHIFT;
|
||||
uprv_memcpy(excProps+exceptionsCount, p, sizeof(*p));
|
||||
if(++exceptionsCount==MAX_EXC_COUNT) {
|
||||
fprintf(stderr, "gencase: too many exceptions\n");
|
||||
exit(U_INDEX_OUTOFBOUNDS_ERROR);
|
||||
}
|
||||
} else {
|
||||
/* store the simple case mapping delta */
|
||||
value|=((uint32_t)delta<<UCASE_DELTA_SHIFT)&UCASE_DELTA_MASK;
|
||||
}
|
||||
|
||||
errorCode=U_ZERO_ERROR;
|
||||
if(!upvec_setValue(pv, p->code, p->code+1, 0, value, 0xffffffff, &errorCode)) {
|
||||
fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n",
|
||||
u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
extern void
|
||||
addCaseSensitive(UChar32 first, UChar32 last) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
if(!upvec_setValue(pv, first, last+1, 0, UCASE_SENSITIVE, UCASE_SENSITIVE, &errorCode)) {
|
||||
fprintf(stderr, "gencase error: unable to set UCASE_SENSITIVE, code: %s\n",
|
||||
u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
}
|
||||
|
||||
extern void
|
||||
makeCaseClosure() {
|
||||
/* TODO */
|
||||
}
|
||||
|
||||
/* exceptions --------------------------------------------------------------- */
|
||||
|
||||
static UBool
|
||||
fullMappingEqualsSimple(const UChar *s, UChar32 simple, UChar32 c) {
|
||||
int32_t i, length;
|
||||
UChar32 full;
|
||||
|
||||
length=*s++;
|
||||
if(length==0 || length>U16_MAX_LENGTH) {
|
||||
return FALSE;
|
||||
}
|
||||
i=0;
|
||||
U16_NEXT(s, i, length, full);
|
||||
|
||||
if(simple==0) {
|
||||
simple=c; /* UCD has no simple mapping if it's the same as the code point itself */
|
||||
}
|
||||
return (UBool)(i==length && full==simple);
|
||||
}
|
||||
|
||||
static uint16_t
|
||||
makeException(uint32_t value, Props *p) {
|
||||
uint32_t slots[8];
|
||||
uint32_t slotBits;
|
||||
uint16_t excWord, excIndex, excTop, i, count, length, fullLengths;
|
||||
UBool doubleSlots;
|
||||
|
||||
/* excIndex will be returned for storing in the trie word */
|
||||
excIndex=exceptionsTop;
|
||||
if(excIndex>=UCASE_MAX_EXCEPTIONS) {
|
||||
fprintf(stderr, "gencase error: too many exceptions words\n");
|
||||
exit(U_BUFFER_OVERFLOW_ERROR);
|
||||
}
|
||||
|
||||
excTop=excIndex+1; /* +1 for excWord which will be stored at excIndex */
|
||||
|
||||
/* copy and shift the soft-dotted bits */
|
||||
excWord=((uint16_t)value&UCASE_DOT_MASK)<<UCASE_EXC_DOT_SHIFT;
|
||||
|
||||
/* set the bits for conditional mappings */
|
||||
if(p->specialCasing!=NULL && p->specialCasing->isComplex) {
|
||||
excWord|=UCASE_EXC_CONDITIONAL_SPECIAL;
|
||||
p->specialCasing=NULL;
|
||||
}
|
||||
if(p->caseFolding!=NULL && p->caseFolding->simple==0 && p->caseFolding->full[0]==0) {
|
||||
excWord|=UCASE_EXC_CONDITIONAL_FOLD;
|
||||
p->caseFolding=NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note:
|
||||
* UCD stores no simple mappings when they are the same as the code point itself.
|
||||
* SpecialCasing and CaseFolding do store simple mappings even if they are
|
||||
* the same as the code point itself.
|
||||
* Comparisons between simple regular mappings and simple special/folding
|
||||
* mappings need to compensate for the difference by comparing with the
|
||||
* original code point if a simple UCD mapping is missing (0).
|
||||
*/
|
||||
|
||||
/* remove redundant data */
|
||||
if(p->specialCasing!=NULL) {
|
||||
/* do not store full mappings if they are the same as the simple ones */
|
||||
if(fullMappingEqualsSimple(p->specialCasing->lowerCase, p->lowerCase, p->code)) {
|
||||
p->specialCasing->lowerCase[0]=0;
|
||||
}
|
||||
if(fullMappingEqualsSimple(p->specialCasing->upperCase, p->upperCase, p->code)) {
|
||||
p->specialCasing->upperCase[0]=0;
|
||||
}
|
||||
if(fullMappingEqualsSimple(p->specialCasing->titleCase, p->titleCase, p->code)) {
|
||||
p->specialCasing->titleCase[0]=0;
|
||||
}
|
||||
}
|
||||
if( p->caseFolding!=NULL &&
|
||||
fullMappingEqualsSimple(p->caseFolding->full, p->caseFolding->simple, p->code)
|
||||
) {
|
||||
p->caseFolding->full[0]=0;
|
||||
}
|
||||
|
||||
/* write the optional slots */
|
||||
slotBits=0;
|
||||
count=0;
|
||||
|
||||
if(p->lowerCase!=0) {
|
||||
slots[count]=(uint32_t)p->lowerCase;
|
||||
slotBits|=slots[count];
|
||||
++count;
|
||||
excWord|=U_MASK(UCASE_EXC_LOWER);
|
||||
}
|
||||
if( p->caseFolding!=NULL &&
|
||||
(p->lowerCase!=0 ?
|
||||
p->caseFolding->simple!=p->lowerCase :
|
||||
p->caseFolding->simple!=p->code)
|
||||
) {
|
||||
slots[count]=(uint32_t)p->caseFolding->simple;
|
||||
slotBits|=slots[count];
|
||||
++count;
|
||||
excWord|=U_MASK(UCASE_EXC_FOLD);
|
||||
}
|
||||
if(p->upperCase!=0) {
|
||||
slots[count]=(uint32_t)p->upperCase;
|
||||
slotBits|=slots[count];
|
||||
++count;
|
||||
excWord|=U_MASK(UCASE_EXC_UPPER);
|
||||
}
|
||||
if(p->upperCase!=p->titleCase) {
|
||||
if(p->titleCase!=0) {
|
||||
slots[count]=(uint32_t)p->titleCase;
|
||||
} else {
|
||||
slots[count]=(uint32_t)p->code;
|
||||
}
|
||||
slotBits|=slots[count];
|
||||
++count;
|
||||
excWord|=U_MASK(UCASE_EXC_TITLE);
|
||||
}
|
||||
|
||||
/* lengths of full case mapping strings, stored in the last slot */
|
||||
fullLengths=0;
|
||||
if(p->specialCasing!=NULL) {
|
||||
fullLengths=p->specialCasing->lowerCase[0];
|
||||
fullLengths|=p->specialCasing->upperCase[0]<<8;
|
||||
fullLengths|=p->specialCasing->titleCase[0]<<12;
|
||||
}
|
||||
if(p->caseFolding!=NULL) {
|
||||
fullLengths|=p->caseFolding->full[0]<<4;
|
||||
}
|
||||
if(fullLengths!=0) {
|
||||
slots[count]=fullLengths;
|
||||
slotBits|=slots[count];
|
||||
++count;
|
||||
excWord|=U_MASK(UCASE_EXC_FULL_MAPPINGS);
|
||||
}
|
||||
|
||||
/* write slots */
|
||||
doubleSlots=(UBool)(slotBits>0xffff);
|
||||
if(!doubleSlots) {
|
||||
for(i=0; i<count; ++i) {
|
||||
exceptions[excTop++]=(uint16_t)slots[i];
|
||||
}
|
||||
} else {
|
||||
excWord|=UCASE_EXC_DOUBLE_SLOTS;
|
||||
for(i=0; i<count; ++i) {
|
||||
exceptions[excTop++]=(uint16_t)(slots[i]>>16);
|
||||
exceptions[excTop++]=(uint16_t)slots[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* write the full case mapping strings */
|
||||
if(p->specialCasing!=NULL) {
|
||||
length=(uint16_t)p->specialCasing->lowerCase[0];
|
||||
u_memcpy((UChar *)exceptions+excTop, p->specialCasing->lowerCase+1, length);
|
||||
excTop+=length;
|
||||
}
|
||||
if(p->caseFolding!=NULL) {
|
||||
length=(uint16_t)p->caseFolding->full[0];
|
||||
u_memcpy((UChar *)exceptions+excTop, p->caseFolding->full+1, length);
|
||||
excTop+=length;
|
||||
}
|
||||
if(p->specialCasing!=NULL) {
|
||||
length=(uint16_t)p->specialCasing->upperCase[0];
|
||||
u_memcpy((UChar *)exceptions+excTop, p->specialCasing->upperCase+1, length);
|
||||
excTop+=length;
|
||||
|
||||
length=(uint16_t)p->specialCasing->titleCase[0];
|
||||
u_memcpy((UChar *)exceptions+excTop, p->specialCasing->titleCase+1, length);
|
||||
excTop+=length;
|
||||
}
|
||||
|
||||
exceptionsTop=excTop;
|
||||
|
||||
/* write the main exceptions word */
|
||||
exceptions[excIndex]=excWord;
|
||||
|
||||
return excIndex;
|
||||
}
|
||||
|
||||
extern void
|
||||
makeExceptions() {
|
||||
uint32_t *row;
|
||||
uint32_t value;
|
||||
int32_t i;
|
||||
uint16_t excIndex;
|
||||
|
||||
i=0;
|
||||
while((row=upvec_getRow(pv, i, NULL, NULL))!=NULL) {
|
||||
value=*row;
|
||||
if(value&UCASE_EXCEPTION) {
|
||||
excIndex=makeException(value, excProps+(value>>UGENCASE_EXC_SHIFT));
|
||||
*row=(value&~(UGENCASE_EXC_MASK|UCASE_EXC_MASK))|(excIndex<<UCASE_EXC_SHIFT);
|
||||
}
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
/* generate output data ----------------------------------------------------- */
|
||||
|
||||
/* TODO: create/use default folding function?! */
|
||||
|
||||
/* folding value: just store the offset (16 bits) if there is any non-0 entry */
|
||||
U_CFUNC uint32_t U_EXPORT2
|
||||
getFoldedPropsValue(UNewTrie *trie, UChar32 start, int32_t offset) {
|
||||
uint32_t value;
|
||||
UChar32 limit;
|
||||
UBool inBlockZero;
|
||||
|
||||
limit=start+0x400;
|
||||
while(start<limit) {
|
||||
value=utrie_get32(trie, start, &inBlockZero);
|
||||
if(inBlockZero) {
|
||||
start+=UTRIE_DATA_BLOCK_LENGTH;
|
||||
} else if(value!=0) {
|
||||
return (uint32_t)(offset|0x8000);
|
||||
} else {
|
||||
++start;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern void
|
||||
generateData(const char *dataDir) {
|
||||
static int32_t indexes[UCASE_IX_TOP]={
|
||||
UCASE_IX_TOP
|
||||
};
|
||||
static uint8_t trieBlock[40000];
|
||||
|
||||
const uint32_t *row;
|
||||
UChar32 start, limit;
|
||||
int32_t i;
|
||||
|
||||
UNewDataMemory *pData;
|
||||
UNewTrie *pTrie;
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
int32_t trieSize;
|
||||
long dataLength;
|
||||
|
||||
pTrie=utrie_open(NULL, NULL, 20000, 0, 0, TRUE);
|
||||
if(pTrie==NULL) {
|
||||
fprintf(stderr, "gencase error: unable to create a UNewTrie\n");
|
||||
exit(U_MEMORY_ALLOCATION_ERROR);
|
||||
}
|
||||
|
||||
for(i=0; (row=upvec_getRow(pv, i, &start, &limit))!=NULL; ++i) {
|
||||
if(!utrie_setRange32(pTrie, start, limit, *row, TRUE)) {
|
||||
fprintf(stderr, "gencase error: unable to set trie value (overflow)\n");
|
||||
exit(U_BUFFER_OVERFLOW_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), getFoldedPropsValue, TRUE, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize);
|
||||
exit(errorCode);
|
||||
}
|
||||
|
||||
indexes[UCASE_IX_EXC_LENGTH]=exceptionsTop;
|
||||
indexes[UCASE_IX_TRIE_SIZE]=trieSize;
|
||||
indexes[UCASE_IX_LENGTH]=(int32_t)sizeof(indexes)+trieSize+2*exceptionsTop;
|
||||
|
||||
if(beVerbose) {
|
||||
printf("trie size in bytes: %5d\n", (int)trieSize);
|
||||
printf("number of code points with exceptions: %5d\n", exceptionsCount);
|
||||
printf("size in bytes of exceptions: %5d\n", 2*exceptionsTop);
|
||||
printf("data size: %5d\n", (int)indexes[UCASE_IX_LENGTH]);
|
||||
}
|
||||
|
||||
/* write the data */
|
||||
pData=udata_create(dataDir, UCASE_DATA_TYPE, UCASE_DATA_NAME, &dataInfo,
|
||||
haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "gencase: unable to create data memory, %s\n", u_errorName(errorCode));
|
||||
exit(errorCode);
|
||||
}
|
||||
|
||||
udata_writeBlock(pData, indexes, sizeof(indexes));
|
||||
udata_writeBlock(pData, trieBlock, trieSize);
|
||||
udata_writeBlock(pData, exceptions, 2*exceptionsTop);
|
||||
|
||||
/* finish up */
|
||||
dataLength=udata_finish(pData, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
fprintf(stderr, "gencase: error %d writing the output file\n", errorCode);
|
||||
exit(errorCode);
|
||||
}
|
||||
|
||||
if(dataLength!=indexes[UCASE_IX_LENGTH]) {
|
||||
fprintf(stderr, "gencase: data length %ld != calculated size %d\n",
|
||||
dataLength, (int)indexes[UCASE_IX_LENGTH]);
|
||||
exit(U_INTERNAL_PROGRAM_ERROR);
|
||||
}
|
||||
|
||||
utrie_close(pTrie);
|
||||
}
|
||||
|
||||
/*
|
||||
* Hey, Emacs, please set the following:
|
||||
*
|
||||
* Local Variables:
|
||||
* indent-tabs-mode: nil
|
||||
* End:
|
||||
*
|
||||
*/
|
Loading…
Add table
Reference in a new issue