mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-983 New project for the collation elements serializer
X-SVN-Rev: 5007
This commit is contained in:
parent
f622af73e1
commit
44364df079
4 changed files with 501 additions and 0 deletions
|
@ -174,6 +174,18 @@ Package=<4>
|
|||
|
||||
###############################################################################
|
||||
|
||||
Project: "dumpce"=..\tools\dumpce\dumpce.dsp - Package Owner=<4>
|
||||
|
||||
Package=<5>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
Package=<4>
|
||||
{{{
|
||||
}}}
|
||||
|
||||
###############################################################################
|
||||
|
||||
Project: "genccode"=..\tools\genccode\genccode.dsp - Package Owner=<4>
|
||||
|
||||
Package=<5>
|
||||
|
|
87
icu4c/source/tools/dumpce/Makefile.in
Normal file
87
icu4c/source/tools/dumpce/Makefile.in
Normal file
|
@ -0,0 +1,87 @@
|
|||
## Makefile.in for ICU - tools/dumpce
|
||||
## Copyright (c) 2001, International Business Machines Corporation and
|
||||
## others. All Rights Reserved.
|
||||
|
||||
## Source directory information
|
||||
srcdir = @srcdir@
|
||||
top_srcdir = @top_srcdir@
|
||||
|
||||
top_builddir = ../..
|
||||
|
||||
include $(top_builddir)/icudefs.mk
|
||||
|
||||
## Platform-specific setup
|
||||
include @platform_make_fragment@
|
||||
|
||||
## Build directory information
|
||||
subdir = tools/dumpce
|
||||
|
||||
## Extra files to remove for 'make clean'
|
||||
CLEANFILES = *~ $(DEPS)
|
||||
|
||||
## Target information
|
||||
TARGET = dumpce
|
||||
|
||||
DEFS = @DEFS@
|
||||
CPPFLAGS = @CPPFLAGS@ -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(top_srcdir)/tools/toolutil
|
||||
CFLAGS = @CFLAGS@
|
||||
CXXFLAGS = @CXXFLAGS@
|
||||
ENABLE_RPATH = @ENABLE_RPATH@
|
||||
ifeq ($(ENABLE_RPATH),YES)
|
||||
RPATHLDFLAGS = $(LD_RPATH)$(LD_RPATH_PRE)$(libdir)
|
||||
endif
|
||||
LDFLAGS = @LDFLAGS@ $(RPATHLDFLAGS)
|
||||
INVOKE = $(LDLIBRARYPATH_ENVVAR)=$(top_builddir)/common:$(top_builddir)/i18n:$(top_builddir)/tools/toolutil:$$$(LDLIBRARYPATH_ENVVAR)
|
||||
LIBS = $(LIBICUI18N) $(LIBICUUC) $(LIBICUTOOLUTIL) @LIBS@ @LIB_M@
|
||||
|
||||
OBJECTS = dumpce.o
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
||||
## List of phony targets
|
||||
.PHONY : all all-local install install-local clean clean-local \
|
||||
distclean distclean-local dist dist-local check check-local
|
||||
|
||||
## Clear suffix list
|
||||
.SUFFIXES :
|
||||
|
||||
## List of standard targets
|
||||
all: all-local
|
||||
install: install-local
|
||||
clean: clean-local
|
||||
distclean : distclean-local
|
||||
dist: dist-local
|
||||
check: all check-local
|
||||
|
||||
all-local: $(TARGET)
|
||||
|
||||
install-local:
|
||||
|
||||
dist-local:
|
||||
|
||||
clean-local:
|
||||
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
|
||||
$(RMV) $(OBJECTS) $(TARGET)
|
||||
|
||||
distclean-local: clean-local
|
||||
$(RMV) Makefile
|
||||
|
||||
check-local: all-local
|
||||
|
||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
$(TARGET) : $(OBJECTS)
|
||||
$(LINK.cc) -o $@ $^ $(LIBS)
|
||||
|
||||
invoke:
|
||||
ICU_DATA=$${ICU_DATA:-$(top_builddir)/data/} TZ=PST8PDT $(INVOKE) $(INVOCATION)
|
||||
|
||||
ifeq (,$(MAKECMDGOALS))
|
||||
-include $(DEPS)
|
||||
else
|
||||
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
|
||||
-include $(DEPS)
|
||||
endif
|
||||
endif
|
299
icu4c/source/tools/dumpce/dumpce.cpp
Normal file
299
icu4c/source/tools/dumpce/dumpce.cpp
Normal file
|
@ -0,0 +1,299 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (C) 2001 IBM, Inc. All Rights Reserved.
|
||||
*
|
||||
********************************************************************/
|
||||
/********************************************************************************
|
||||
*
|
||||
* File dumpce.cpp
|
||||
*
|
||||
* Modification History:
|
||||
* Name Date Description
|
||||
* synwee May 31 2001 Creation
|
||||
*
|
||||
*********************************************************************************
|
||||
*/
|
||||
|
||||
/**
|
||||
* This program outputs the collation elements used for a requested tailoring.
|
||||
*
|
||||
* Usage:
|
||||
* dumpce options...
|
||||
* -locale name ICU locale to use. Default is en_US
|
||||
* -outputfile file_name Path for outputing the serialized collation
|
||||
* elements. Default standard output.
|
||||
*/
|
||||
#include <unicode/utypes.h>
|
||||
#include <unicode/ucol.h>
|
||||
#include <unicode/uloc.h>
|
||||
#include <unicode/ucoleitr.h>
|
||||
#include <unicode/uchar.h>
|
||||
#include <unicode/utf16.h>
|
||||
#include <stdio.h>
|
||||
#include "cmemory.h"
|
||||
#include "ucol_tok.h"
|
||||
#include "cstring.h"
|
||||
#include "uoptions.h"
|
||||
#include "ucol_imp.h"
|
||||
|
||||
/**
|
||||
* Command line option variables.
|
||||
* These global variables are set according to the options specified on the
|
||||
* command line by the user.
|
||||
*/
|
||||
static UOption options[]={
|
||||
UOPTION_HELP_H,
|
||||
UOPTION_HELP_QUESTION_MARK,
|
||||
{"locale", NULL, NULL, NULL, 'l', UOPT_REQUIRES_ARG, 0},
|
||||
{"serialize", NULL, NULL, NULL, 's', UOPT_NO_ARG, 0},
|
||||
{"outputfile", NULL, NULL, NULL, 'o', UOPT_OPTIONAL_ARG, 0},
|
||||
UOPTION_VERBOSE
|
||||
};
|
||||
|
||||
static UCollator *collator = 0;
|
||||
|
||||
/**
|
||||
* Writes the hexadecimal of a null-terminated array of codepoints into a
|
||||
* file
|
||||
* @param f UFILE instance to store
|
||||
* @param c codepoints array
|
||||
*/
|
||||
void serialize(FILE *f, const UChar *c)
|
||||
{
|
||||
UChar cp = *(c ++);
|
||||
|
||||
fprintf(f, " %04x", cp);
|
||||
|
||||
while (*c != 0) {
|
||||
cp = *(c ++);
|
||||
fprintf(f, " %04x", cp);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the hexadecimal of a non-null-terminated array of codepoints into a
|
||||
* file
|
||||
* @param f UFILE instance to store
|
||||
* @param c codepoints array
|
||||
* @param l codepoints array length
|
||||
*/
|
||||
void serialize(FILE *f, const UChar *c, int l)
|
||||
{
|
||||
int count = 1;
|
||||
UChar cp = *(c ++);
|
||||
|
||||
fprintf(f, " %04x", cp);
|
||||
|
||||
while (count < l) {
|
||||
cp = *(c ++);
|
||||
fprintf(f, " %04x", cp);
|
||||
count ++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the iterator to the argument string and outputs the collation elements.
|
||||
* @param f file output stream
|
||||
* @param iter collation element iterator
|
||||
*/
|
||||
void serialize(FILE *f, UCollationElements *iter) {
|
||||
UChar *codepoint = iter->iteratordata_.string;
|
||||
|
||||
if (iter->iteratordata_.flags & UCOL_ITER_HASLEN) {
|
||||
serialize(f, codepoint, iter->iteratordata_.endp - codepoint);
|
||||
}
|
||||
else {
|
||||
serialize(f, codepoint);
|
||||
}
|
||||
if (options[5].doesOccur) {
|
||||
serialize(stdout, codepoint);
|
||||
fprintf(stdout, "\n");
|
||||
}
|
||||
|
||||
fprintf(f, "; ");
|
||||
|
||||
UErrorCode error = U_ZERO_ERROR;
|
||||
uint32_t ce = ucol_next(iter, &error);
|
||||
if (U_FAILURE(error)) {
|
||||
fprintf(f, "Error retrieving collation elements\n");
|
||||
return;
|
||||
}
|
||||
fprintf(f, "[");
|
||||
while (TRUE) {
|
||||
fprintf(f, "%08x", ce);
|
||||
ce = ucol_next(iter, &error);
|
||||
if (ce == UCOL_NULLORDER) {
|
||||
break;
|
||||
}
|
||||
fprintf(f, " ");
|
||||
if (U_FAILURE(error)) {
|
||||
fprintf(stdout, "Error retrieving collation elements");
|
||||
return;
|
||||
}
|
||||
}
|
||||
fprintf(f, "]\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Serializes the contraction within the given argument rule
|
||||
* @param f file output stream
|
||||
* @param r rule
|
||||
* @param rlen rule length
|
||||
* @param iter iterator to iterate over collation elements
|
||||
*/
|
||||
void serialize(FILE *f, UChar *rule, int rlen, UCollationElements *iter) {
|
||||
const UChar *current = NULL;
|
||||
uint32_t strength = 0;
|
||||
uint32_t chOffset = 0;
|
||||
uint32_t chLen = 0;
|
||||
uint32_t exOffset = 0;
|
||||
uint32_t exLen = 0;
|
||||
uint8_t specs = 0;
|
||||
UBool rstart = TRUE;
|
||||
UColTokenParser src;
|
||||
UColOptionSet opts;
|
||||
UErrorCode error = U_ZERO_ERROR;
|
||||
|
||||
src.opts = &opts;
|
||||
|
||||
src.source = src.current = rule;
|
||||
src.end = rule + rlen;
|
||||
src.extraCurrent = src.end;
|
||||
src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
|
||||
|
||||
while ((current = ucol_tok_parseNextToken(&src, &strength, &chOffset,
|
||||
&chLen, &exOffset, &exLen,
|
||||
&specs, rstart, &error))
|
||||
!= NULL) {
|
||||
// contractions handled here
|
||||
if (chLen > 1) {
|
||||
ucol_setText(iter, rule + chOffset, chLen, &error);
|
||||
if (U_FAILURE(error)) {
|
||||
fprintf(stdout, "Error setting text in iterator\n");
|
||||
return;
|
||||
}
|
||||
serialize(f, iter);
|
||||
}
|
||||
rstart = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Output the collation element belonging to the locale into a file
|
||||
*/
|
||||
void serialize() {
|
||||
UErrorCode error = U_ZERO_ERROR;
|
||||
UChar32 codepoint = 0;
|
||||
UChar str[128];
|
||||
int strlen = 0;
|
||||
|
||||
// FileStream *f;
|
||||
FILE *f;
|
||||
if (options[4].doesOccur) {
|
||||
f = fopen(options[4].value, "w");
|
||||
if (f == NULL) {
|
||||
fprintf(stdout, "Cannot open file:%s\n",
|
||||
(char *)options[4].value);
|
||||
return;
|
||||
}
|
||||
}
|
||||
else {
|
||||
f = stdout;
|
||||
}
|
||||
|
||||
UVersionInfo version;
|
||||
ucol_getVersion(collator, version);
|
||||
fprintf(f, "# This file contains the serialized collation elements\n");
|
||||
fprintf(f, "# as of the collation version indicated below.\n");
|
||||
fprintf(f, "# Data format: xxxx xxxx..; [yyyyyyyy yyyyyy..]\n");
|
||||
fprintf(f, "# where xxxx are codepoints in hexadecimals\n");
|
||||
fprintf(f, "# and yyyyyyyy are the corresponding\n");
|
||||
fprintf(f, "# collation elements in hexadecimals\n");
|
||||
fprintf(f, "# Collation version number: %d.%d.%d.%d\n", version[0],
|
||||
version[1], version[2], version[3]);
|
||||
|
||||
UCollationElements *iter = ucol_openElements(collator, str, strlen,
|
||||
&error);
|
||||
if (U_FAILURE(error)) {
|
||||
fprintf(stdout, "Error creating iterator\n");
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(f, "\n# Range of unicode characters\n\n");
|
||||
|
||||
while (codepoint <= UCHAR_MAX_VALUE) {
|
||||
if (u_isdefined(codepoint)) {
|
||||
strlen = 0;
|
||||
UTF16_APPEND_CHAR_UNSAFE(str, strlen, codepoint);
|
||||
str[strlen] = 0;
|
||||
ucol_setText(iter, str, strlen, &error);
|
||||
if (U_FAILURE(error)) {
|
||||
fprintf(stdout, "Error setting text in iterator\n");
|
||||
return;
|
||||
}
|
||||
serialize(f, iter);
|
||||
}
|
||||
codepoint ++;
|
||||
}
|
||||
|
||||
fprintf(f, "\n# Contractions\n\n");
|
||||
|
||||
UChar ucarules[0x10000];
|
||||
UChar *rules = ucarules;
|
||||
int32_t rulelength = ucol_getRulesEx(collator, UCOL_FULL_RULES,
|
||||
ucarules, 0x10000);
|
||||
if (rulelength > 0x10000) {
|
||||
rules = (UChar *)uprv_malloc(sizeof(UChar) * rulelength);
|
||||
ucol_getRulesEx(collator, UCOL_FULL_RULES, rules, rulelength);
|
||||
}
|
||||
serialize(f, rules, rulelength, iter);
|
||||
if (rules != ucarules) {
|
||||
uprv_free(rules);
|
||||
}
|
||||
|
||||
ucol_closeElements(iter);
|
||||
if (options[4].doesOccur) {
|
||||
fclose(f);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Main -- process command line, read in and pre-process the test file,
|
||||
* call other functions to do the actual tests.
|
||||
*/
|
||||
int main(int argc, char *argv[]) {
|
||||
|
||||
argc = u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]),
|
||||
options);
|
||||
|
||||
// error handling, printing usage message
|
||||
if (argc < 0) {
|
||||
fprintf(stdout, "error in command line argument:");
|
||||
fprintf(stdout, argv[-argc]);
|
||||
}
|
||||
if (argc < 0 || options[0].doesOccur || options[1].doesOccur) {
|
||||
fprintf(stdout, "Usage: strperf options...\n"
|
||||
"-help Display this message.\n"
|
||||
"-locale name ICU locale to use. Default is en_US\n"
|
||||
"-serialize Serializes the collation elements in -locale and outputs them into -outputfile\n"
|
||||
"-outputfile file_name Path for outputing the serialized collation elements. Defaults to stdout if no defined\n");
|
||||
return argc < 0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
|
||||
}
|
||||
|
||||
// Set up an ICU collator
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
collator = ucol_open((char *)options[2].value, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
fprintf(stdout, "Collator creation failed:");
|
||||
fprintf(stdout, u_errorName(status));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (options[3].doesOccur) {
|
||||
serialize();
|
||||
}
|
||||
|
||||
ucol_close(collator);
|
||||
|
||||
return 0;
|
||||
}
|
103
icu4c/source/tools/dumpce/dumpce.dsp
Normal file
103
icu4c/source/tools/dumpce/dumpce.dsp
Normal file
|
@ -0,0 +1,103 @@
|
|||
# Microsoft Developer Studio Project File - Name="dumpce" - Package Owner=<4>
|
||||
# Microsoft Developer Studio Generated Build File, Format Version 6.00
|
||||
# ** DO NOT EDIT **
|
||||
|
||||
# TARGTYPE "Win32 (x86) Console Application" 0x0103
|
||||
|
||||
CFG=dumpce - Win32 Debug
|
||||
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
|
||||
!MESSAGE use the Export Makefile command and run
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "dumpce.mak".
|
||||
!MESSAGE
|
||||
!MESSAGE You can specify a configuration when running NMAKE
|
||||
!MESSAGE by defining the macro CFG on the command line. For example:
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "dumpce.mak" CFG="dumpce - Win32 Debug"
|
||||
!MESSAGE
|
||||
!MESSAGE Possible choices for configuration are:
|
||||
!MESSAGE
|
||||
!MESSAGE "dumpce - Win32 Release" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE "dumpce - Win32 Debug" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE
|
||||
|
||||
# Begin Project
|
||||
# PROP AllowPerConfigDependencies 0
|
||||
# PROP Scc_ProjName ""
|
||||
# PROP Scc_LocalPath ""
|
||||
CPP=cl.exe
|
||||
RSC=rc.exe
|
||||
|
||||
!IF "$(CFG)" == "dumpce - Win32 Release"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 0
|
||||
# PROP BASE Output_Dir "Release"
|
||||
# PROP BASE Intermediate_Dir "Release"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 0
|
||||
# PROP Output_Dir "Release"
|
||||
# PROP Intermediate_Dir "Release"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
|
||||
# ADD CPP /nologo /G6 /MD /W3 /GX /O2 /Ob2 /I "..\..\..\include" /I "..\..\common" /I "..\..\i18n" /I "..\..\tools\toolutil" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
|
||||
# SUBTRACT CPP /YX
|
||||
# ADD BASE RSC /l 0x409 /d "NDEBUG"
|
||||
# ADD RSC /l 0x409 /d "NDEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
|
||||
# ADD LINK32 icuuc.lib icuin.lib icutu.lib /nologo /subsystem:console /machine:I386 /libpath:"..\..\..\lib\\"
|
||||
|
||||
!ELSEIF "$(CFG)" == "dumpce - Win32 Debug"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 1
|
||||
# PROP BASE Output_Dir "Debug"
|
||||
# PROP BASE Intermediate_Dir "Debug"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 1
|
||||
# PROP Output_Dir "Debug"
|
||||
# PROP Intermediate_Dir "Debug"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
|
||||
# ADD CPP /nologo /G6 /MDd /W3 /Gm /GX /ZI /Od /I "..\..\..\include" /I "..\..\common" /I "..\..\i18n" /I "..\..\tools\toolutil" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FR /YX /FD /GZ /c
|
||||
# ADD BASE RSC /l 0x409 /d "_DEBUG"
|
||||
# ADD RSC /l 0x409 /d "_DEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
|
||||
# ADD LINK32 icuucd.lib icuind.lib icutud.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\..\..\lib\\"
|
||||
|
||||
!ENDIF
|
||||
|
||||
# Begin Target
|
||||
|
||||
# Name "dumpce - Win32 Release"
|
||||
# Name "dumpce - Win32 Debug"
|
||||
# Begin Group "Source Files"
|
||||
|
||||
# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\dumpce.cpp
|
||||
# End Source File
|
||||
# End Group
|
||||
# Begin Group "Header Files"
|
||||
|
||||
# PROP Default_Filter "h;hpp;hxx;hm;inl"
|
||||
# End Group
|
||||
# Begin Group "Resource Files"
|
||||
|
||||
# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
|
||||
# End Group
|
||||
# End Target
|
||||
# End Project
|
Loading…
Add table
Reference in a new issue