ICU-983 New project for the collation elements serializer

X-SVN-Rev: 5007
This commit is contained in:
Syn Wee Quek 2001-06-15 17:37:26 +00:00
parent f622af73e1
commit 44364df079
4 changed files with 501 additions and 0 deletions

View file

@ -174,6 +174,18 @@ Package=<4>
###############################################################################
Project: "dumpce"=..\tools\dumpce\dumpce.dsp - Package Owner=<4>
Package=<5>
{{{
}}}
Package=<4>
{{{
}}}
###############################################################################
Project: "genccode"=..\tools\genccode\genccode.dsp - Package Owner=<4>
Package=<5>

View file

@ -0,0 +1,87 @@
## Makefile.in for ICU - tools/dumpce
## Copyright (c) 2001, International Business Machines Corporation and
## others. All Rights Reserved.
## Source directory information
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = ../..
include $(top_builddir)/icudefs.mk
## Platform-specific setup
include @platform_make_fragment@
## Build directory information
subdir = tools/dumpce
## Extra files to remove for 'make clean'
CLEANFILES = *~ $(DEPS)
## Target information
TARGET = dumpce
DEFS = @DEFS@
CPPFLAGS = @CPPFLAGS@ -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(top_srcdir)/tools/toolutil
CFLAGS = @CFLAGS@
CXXFLAGS = @CXXFLAGS@
ENABLE_RPATH = @ENABLE_RPATH@
ifeq ($(ENABLE_RPATH),YES)
RPATHLDFLAGS = $(LD_RPATH)$(LD_RPATH_PRE)$(libdir)
endif
LDFLAGS = @LDFLAGS@ $(RPATHLDFLAGS)
INVOKE = $(LDLIBRARYPATH_ENVVAR)=$(top_builddir)/common:$(top_builddir)/i18n:$(top_builddir)/tools/toolutil:$$$(LDLIBRARYPATH_ENVVAR)
LIBS = $(LIBICUI18N) $(LIBICUUC) $(LIBICUTOOLUTIL) @LIBS@ @LIB_M@
OBJECTS = dumpce.o
DEPS = $(OBJECTS:.o=.d)
## List of phony targets
.PHONY : all all-local install install-local clean clean-local \
distclean distclean-local dist dist-local check check-local
## Clear suffix list
.SUFFIXES :
## List of standard targets
all: all-local
install: install-local
clean: clean-local
distclean : distclean-local
dist: dist-local
check: all check-local
all-local: $(TARGET)
install-local:
dist-local:
clean-local:
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
$(RMV) $(OBJECTS) $(TARGET)
distclean-local: clean-local
$(RMV) Makefile
check-local: all-local
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
$(TARGET) : $(OBJECTS)
$(LINK.cc) -o $@ $^ $(LIBS)
invoke:
ICU_DATA=$${ICU_DATA:-$(top_builddir)/data/} TZ=PST8PDT $(INVOKE) $(INVOCATION)
ifeq (,$(MAKECMDGOALS))
-include $(DEPS)
else
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
-include $(DEPS)
endif
endif

View file

@ -0,0 +1,299 @@
/********************************************************************
* COPYRIGHT:
* Copyright (C) 2001 IBM, Inc. All Rights Reserved.
*
********************************************************************/
/********************************************************************************
*
* File dumpce.cpp
*
* Modification History:
* Name Date Description
* synwee May 31 2001 Creation
*
*********************************************************************************
*/
/**
* This program outputs the collation elements used for a requested tailoring.
*
* Usage:
* dumpce options...
* -locale name ICU locale to use. Default is en_US
* -outputfile file_name Path for outputing the serialized collation
* elements. Default standard output.
*/
#include <unicode/utypes.h>
#include <unicode/ucol.h>
#include <unicode/uloc.h>
#include <unicode/ucoleitr.h>
#include <unicode/uchar.h>
#include <unicode/utf16.h>
#include <stdio.h>
#include "cmemory.h"
#include "ucol_tok.h"
#include "cstring.h"
#include "uoptions.h"
#include "ucol_imp.h"
/**
* Command line option variables.
* These global variables are set according to the options specified on the
* command line by the user.
*/
static UOption options[]={
UOPTION_HELP_H,
UOPTION_HELP_QUESTION_MARK,
{"locale", NULL, NULL, NULL, 'l', UOPT_REQUIRES_ARG, 0},
{"serialize", NULL, NULL, NULL, 's', UOPT_NO_ARG, 0},
{"outputfile", NULL, NULL, NULL, 'o', UOPT_OPTIONAL_ARG, 0},
UOPTION_VERBOSE
};
static UCollator *collator = 0;
/**
* Writes the hexadecimal of a null-terminated array of codepoints into a
* file
* @param f UFILE instance to store
* @param c codepoints array
*/
void serialize(FILE *f, const UChar *c)
{
UChar cp = *(c ++);
fprintf(f, " %04x", cp);
while (*c != 0) {
cp = *(c ++);
fprintf(f, " %04x", cp);
}
}
/**
* Writes the hexadecimal of a non-null-terminated array of codepoints into a
* file
* @param f UFILE instance to store
* @param c codepoints array
* @param l codepoints array length
*/
void serialize(FILE *f, const UChar *c, int l)
{
int count = 1;
UChar cp = *(c ++);
fprintf(f, " %04x", cp);
while (count < l) {
cp = *(c ++);
fprintf(f, " %04x", cp);
count ++;
}
}
/**
* Sets the iterator to the argument string and outputs the collation elements.
* @param f file output stream
* @param iter collation element iterator
*/
void serialize(FILE *f, UCollationElements *iter) {
UChar *codepoint = iter->iteratordata_.string;
if (iter->iteratordata_.flags & UCOL_ITER_HASLEN) {
serialize(f, codepoint, iter->iteratordata_.endp - codepoint);
}
else {
serialize(f, codepoint);
}
if (options[5].doesOccur) {
serialize(stdout, codepoint);
fprintf(stdout, "\n");
}
fprintf(f, "; ");
UErrorCode error = U_ZERO_ERROR;
uint32_t ce = ucol_next(iter, &error);
if (U_FAILURE(error)) {
fprintf(f, "Error retrieving collation elements\n");
return;
}
fprintf(f, "[");
while (TRUE) {
fprintf(f, "%08x", ce);
ce = ucol_next(iter, &error);
if (ce == UCOL_NULLORDER) {
break;
}
fprintf(f, " ");
if (U_FAILURE(error)) {
fprintf(stdout, "Error retrieving collation elements");
return;
}
}
fprintf(f, "]\n");
}
/**
* Serializes the contraction within the given argument rule
* @param f file output stream
* @param r rule
* @param rlen rule length
* @param iter iterator to iterate over collation elements
*/
void serialize(FILE *f, UChar *rule, int rlen, UCollationElements *iter) {
const UChar *current = NULL;
uint32_t strength = 0;
uint32_t chOffset = 0;
uint32_t chLen = 0;
uint32_t exOffset = 0;
uint32_t exLen = 0;
uint8_t specs = 0;
UBool rstart = TRUE;
UColTokenParser src;
UColOptionSet opts;
UErrorCode error = U_ZERO_ERROR;
src.opts = &opts;
src.source = src.current = rule;
src.end = rule + rlen;
src.extraCurrent = src.end;
src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
while ((current = ucol_tok_parseNextToken(&src, &strength, &chOffset,
&chLen, &exOffset, &exLen,
&specs, rstart, &error))
!= NULL) {
// contractions handled here
if (chLen > 1) {
ucol_setText(iter, rule + chOffset, chLen, &error);
if (U_FAILURE(error)) {
fprintf(stdout, "Error setting text in iterator\n");
return;
}
serialize(f, iter);
}
rstart = FALSE;
}
}
/**
* Output the collation element belonging to the locale into a file
*/
void serialize() {
UErrorCode error = U_ZERO_ERROR;
UChar32 codepoint = 0;
UChar str[128];
int strlen = 0;
// FileStream *f;
FILE *f;
if (options[4].doesOccur) {
f = fopen(options[4].value, "w");
if (f == NULL) {
fprintf(stdout, "Cannot open file:%s\n",
(char *)options[4].value);
return;
}
}
else {
f = stdout;
}
UVersionInfo version;
ucol_getVersion(collator, version);
fprintf(f, "# This file contains the serialized collation elements\n");
fprintf(f, "# as of the collation version indicated below.\n");
fprintf(f, "# Data format: xxxx xxxx..; [yyyyyyyy yyyyyy..]\n");
fprintf(f, "# where xxxx are codepoints in hexadecimals\n");
fprintf(f, "# and yyyyyyyy are the corresponding\n");
fprintf(f, "# collation elements in hexadecimals\n");
fprintf(f, "# Collation version number: %d.%d.%d.%d\n", version[0],
version[1], version[2], version[3]);
UCollationElements *iter = ucol_openElements(collator, str, strlen,
&error);
if (U_FAILURE(error)) {
fprintf(stdout, "Error creating iterator\n");
return;
}
fprintf(f, "\n# Range of unicode characters\n\n");
while (codepoint <= UCHAR_MAX_VALUE) {
if (u_isdefined(codepoint)) {
strlen = 0;
UTF16_APPEND_CHAR_UNSAFE(str, strlen, codepoint);
str[strlen] = 0;
ucol_setText(iter, str, strlen, &error);
if (U_FAILURE(error)) {
fprintf(stdout, "Error setting text in iterator\n");
return;
}
serialize(f, iter);
}
codepoint ++;
}
fprintf(f, "\n# Contractions\n\n");
UChar ucarules[0x10000];
UChar *rules = ucarules;
int32_t rulelength = ucol_getRulesEx(collator, UCOL_FULL_RULES,
ucarules, 0x10000);
if (rulelength > 0x10000) {
rules = (UChar *)uprv_malloc(sizeof(UChar) * rulelength);
ucol_getRulesEx(collator, UCOL_FULL_RULES, rules, rulelength);
}
serialize(f, rules, rulelength, iter);
if (rules != ucarules) {
uprv_free(rules);
}
ucol_closeElements(iter);
if (options[4].doesOccur) {
fclose(f);
}
}
/**
* Main -- process command line, read in and pre-process the test file,
* call other functions to do the actual tests.
*/
int main(int argc, char *argv[]) {
argc = u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]),
options);
// error handling, printing usage message
if (argc < 0) {
fprintf(stdout, "error in command line argument:");
fprintf(stdout, argv[-argc]);
}
if (argc < 0 || options[0].doesOccur || options[1].doesOccur) {
fprintf(stdout, "Usage: strperf options...\n"
"-help Display this message.\n"
"-locale name ICU locale to use. Default is en_US\n"
"-serialize Serializes the collation elements in -locale and outputs them into -outputfile\n"
"-outputfile file_name Path for outputing the serialized collation elements. Defaults to stdout if no defined\n");
return argc < 0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
}
// Set up an ICU collator
UErrorCode status = U_ZERO_ERROR;
collator = ucol_open((char *)options[2].value, &status);
if (U_FAILURE(status)) {
fprintf(stdout, "Collator creation failed:");
fprintf(stdout, u_errorName(status));
return -1;
}
if (options[3].doesOccur) {
serialize();
}
ucol_close(collator);
return 0;
}

View file

@ -0,0 +1,103 @@
# Microsoft Developer Studio Project File - Name="dumpce" - Package Owner=<4>
# Microsoft Developer Studio Generated Build File, Format Version 6.00
# ** DO NOT EDIT **
# TARGTYPE "Win32 (x86) Console Application" 0x0103
CFG=dumpce - Win32 Debug
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
!MESSAGE use the Export Makefile command and run
!MESSAGE
!MESSAGE NMAKE /f "dumpce.mak".
!MESSAGE
!MESSAGE You can specify a configuration when running NMAKE
!MESSAGE by defining the macro CFG on the command line. For example:
!MESSAGE
!MESSAGE NMAKE /f "dumpce.mak" CFG="dumpce - Win32 Debug"
!MESSAGE
!MESSAGE Possible choices for configuration are:
!MESSAGE
!MESSAGE "dumpce - Win32 Release" (based on "Win32 (x86) Console Application")
!MESSAGE "dumpce - Win32 Debug" (based on "Win32 (x86) Console Application")
!MESSAGE
# Begin Project
# PROP AllowPerConfigDependencies 0
# PROP Scc_ProjName ""
# PROP Scc_LocalPath ""
CPP=cl.exe
RSC=rc.exe
!IF "$(CFG)" == "dumpce - Win32 Release"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 0
# PROP BASE Output_Dir "Release"
# PROP BASE Intermediate_Dir "Release"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 0
# PROP Output_Dir "Release"
# PROP Intermediate_Dir "Release"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
# ADD CPP /nologo /G6 /MD /W3 /GX /O2 /Ob2 /I "..\..\..\include" /I "..\..\common" /I "..\..\i18n" /I "..\..\tools\toolutil" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
# SUBTRACT CPP /YX
# ADD BASE RSC /l 0x409 /d "NDEBUG"
# ADD RSC /l 0x409 /d "NDEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
# ADD LINK32 icuuc.lib icuin.lib icutu.lib /nologo /subsystem:console /machine:I386 /libpath:"..\..\..\lib\\"
!ELSEIF "$(CFG)" == "dumpce - Win32 Debug"
# PROP BASE Use_MFC 0
# PROP BASE Use_Debug_Libraries 1
# PROP BASE Output_Dir "Debug"
# PROP BASE Intermediate_Dir "Debug"
# PROP BASE Target_Dir ""
# PROP Use_MFC 0
# PROP Use_Debug_Libraries 1
# PROP Output_Dir "Debug"
# PROP Intermediate_Dir "Debug"
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
# ADD CPP /nologo /G6 /MDd /W3 /Gm /GX /ZI /Od /I "..\..\..\include" /I "..\..\common" /I "..\..\i18n" /I "..\..\tools\toolutil" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FR /YX /FD /GZ /c
# ADD BASE RSC /l 0x409 /d "_DEBUG"
# ADD RSC /l 0x409 /d "_DEBUG"
BSC32=bscmake.exe
# ADD BASE BSC32 /nologo
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
# ADD LINK32 icuucd.lib icuind.lib icutud.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\..\..\lib\\"
!ENDIF
# Begin Target
# Name "dumpce - Win32 Release"
# Name "dumpce - Win32 Debug"
# Begin Group "Source Files"
# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
# Begin Source File
SOURCE=.\dumpce.cpp
# End Source File
# End Group
# Begin Group "Header Files"
# PROP Default_Filter "h;hpp;hxx;hm;inl"
# End Group
# Begin Group "Resource Files"
# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
# End Group
# End Target
# End Project