ICU-8972 preparseucd.py writes pnames_data.h with formatting nearly identical to genpname/preparse.pl; delete obsolete genpname/*

X-SVN-Rev: 31160
This commit is contained in:
Markus Scherer 2011-12-20 07:43:38 +00:00
parent 6bb13512c1
commit cc68c8c6e2
10 changed files with 172 additions and 2187 deletions

13
.gitignore vendored
View file

@ -1024,19 +1024,6 @@ tools/unicode/c/gennorm/gennorm.vcproj.*.*.user
tools/unicode/c/gennorm/release
tools/unicode/c/gennorm/x64
tools/unicode/c/gennorm/x86
tools/unicode/c/genpname/*.d
tools/unicode/c/genpname/*.o
tools/unicode/c/genpname/*.pdb
tools/unicode/c/genpname/*.plg
tools/unicode/c/genpname/Debug
tools/unicode/c/genpname/Makefile
tools/unicode/c/genpname/Release
tools/unicode/c/genpname/debug
tools/unicode/c/genpname/genpname
tools/unicode/c/genpname/genpname.vcproj.*.*.user
tools/unicode/c/genpname/release
tools/unicode/c/genpname/x64
tools/unicode/c/genpname/x86
tools/unicode/c/genprops/*.d
tools/unicode/c/genprops/*.ncb
tools/unicode/c/genprops/*.o

View file

@ -1,9 +0,0 @@
# Copyright (C) 2010, International Business Machines
# Corporation and others. All Rights Reserved.
#
# created on: 2010jun03
# created by: Markus W. Scherer
# edited on: 2010jul20
# edited by: Stuart G. Gill
add_executable(genpname genpname.cpp)
target_link_libraries(genpname icuuc icutu)

View file

@ -1,97 +0,0 @@
## Makefile.in for ICU - tools/genpname
## Copyright (c) 1999-2005, International Business Machines Corporation and
## others. All Rights Reserved.
## Steven R. Loomis
## Source directory information
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = ../..
include $(top_builddir)/icudefs.mk
## Build directory information
subdir = tools/genpname
TARGET_STUB_NAME = genpname
SECTION = 8
MAN_FILES = $(TARGET_STUB_NAME).$(SECTION)
## Extra files to remove for 'make clean'
CLEANFILES = *~ $(DEPS)
## Target information
TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
ifneq ($(top_builddir),$(top_srcdir))
CPPFLAGS += -I$(top_builddir)/common
endif
CPPFLAGS += -I$(top_srcdir)/common -I$(srcdir)/../toolutil
LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
OBJECTS = genpname.o
DEPS = $(OBJECTS:.o=.d)
## List of phony targets
.PHONY : all all-local install install-local clean clean-local \
distclean distclean-local dist dist-local check check-local install-man
## Clear suffix list
.SUFFIXES :
## List of standard targets
all: all-local
install: install-local
clean: clean-local
distclean : distclean-local
dist: dist-local
check: all check-local
all-local: $(TARGET)
install-local: all-local
# $(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
# $(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)
install-man: $(MAN_FILES)
# $(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
# $(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
dist-local:
clean-local:
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
$(RMV) $(TARGET) $(OBJECTS)
distclean-local: clean-local
$(RMV) Makefile
check-local: all-local
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
$(TARGET) : $(OBJECTS)
$(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
$(POST_BUILD_STEP)
%.$(SECTION): $(srcdir)/%.$(SECTION).in
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
ifeq (,$(MAKECMDGOALS))
-include $(DEPS)
else
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
-include $(DEPS)
endif
endif

View file

@ -1,52 +0,0 @@
######################################################################
# Copyright (c) 2003-2011, International Business Machines
# Corporation and others. All Rights Reserved.
######################################################################
# Author: Alan Liu
# Created: February 20 2003
# Since: ICU 2.6
######################################################################
# This file follows the format of PropertyAliases.txt
# It contains synthetic property aliases not present
# in the UCD. Unlike PropertyAliases.txt, it should
# NOT contain a version number.
# ================================================
# ================================================
# Miscellaneous Properties
# ================================================
# ================================================
# Enumerated Properties
# ================================================
# lccc(c)=ccc(NFD(c)[0])
# tccc(c)=ccc(NFD(c)[last])
lccc; Lead_Canonical_Combining_Class
tccc; Trail_Canonical_Combining_Class
# ================================================
# Bitmask Properties
# ================================================
gcm ; General_Category_Mask
# ================================================
# Binary Properties
# ================================================
Sensitive ; Case_Sensitive
nfdinert; NFD_Inert
nfkdinert; NFKD_Inert
nfcinert; NFC_Inert
nfkcinert; NFKC_Inert
segstart; Segment_Starter
# C/POSIX character classes that do not have Unicode property [value] aliases
# see uchar.h
n/a; alnum
n/a; blank
n/a; graph
n/a; print
n/a; xdigit

View file

@ -1,85 +0,0 @@
########################################################################
# Copyright (c) 2006-2011, International Business Machines
# Corporation and others. All Rights Reserved.
########################################################################
# file name: SyntheticPropertyValueAliases.txt
# encoding: US-ASCII
# tab size: 8 (not used)
# indentation: 4
# created by: gensvpa.pl
########################################################################
# This file follows the format of PropertyValueAliases.txt
# It contains synthetic property value aliases not present
# in the UCD. Unlike PropertyValueAliases.txt, it should
# NOT contain a version number.
########################################################################
# THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW
# WHAT YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
########################################################################
# set the same names as short and long names to fit the syntax without
# inventing names that we would have to support forever
# Script (sc)
sc ; Blis ; Blis
sc ; Cirt ; Cirt
sc ; Cyrs ; Cyrs
sc ; Egyd ; Egyd
sc ; Egyh ; Egyh
sc ; Geok ; Geok
sc ; Hans ; Hans
sc ; Hant ; Hant
sc ; Hmng ; Hmng
sc ; Hung ; Hung
sc ; Inds ; Inds
sc ; Jpan ; Jpan
sc ; Latf ; Latf
sc ; Latg ; Latg
sc ; Lina ; Lina
sc ; Maya ; Maya
sc ; Moon ; Moon
sc ; Perm ; Perm
sc ; Roro ; Roro
sc ; Sara ; Sara
sc ; Sgnw ; Sgnw
sc ; Syre ; Syre
sc ; Syrj ; Syrj
sc ; Syrn ; Syrn
sc ; Teng ; Teng
sc ; Visp ; Visp
sc ; Zxxx ; Zxxx
sc ; Kore ; Kore
sc ; Mani ; Mani
sc ; Phlp ; Phlp
sc ; Phlv ; Phlv
sc ; Zmth ; Zmth
sc ; Zsym ; Zsym
sc ; Nkgb ; Nkgb
sc ; Bass ; Bass
sc ; Dupl ; Dupl
sc ; Elba ; Elba
sc ; Gran ; Gran
sc ; Kpel ; Kpel
sc ; Loma ; Loma
sc ; Mend ; Mend
sc ; Narb ; Narb
sc ; Nbat ; Nbat
sc ; Palm ; Palm
sc ; Sind ; Sind
sc ; Wara ; Wara
sc ; Afak ; Afak
sc ; Jurc ; Jurc
sc ; Mroo ; Mroo
sc ; Nshu ; Nshu
sc ; Tang ; Tang
sc ; Wole ; Wole
sc ; Khoj ; Khoj
sc ; Tirh ; Tirh

View file

@ -1,403 +0,0 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="genpname"
ProjectGUID="{DBC0AF0B-B9FF-4B23-905B-4D4CDC2A91CB}"
TargetFrameworkVersion="131072"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Release|Win32"
OutputDirectory=".\x86\Release"
IntermediateDirectory=".\x86\Release"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin&#x0D;&#x0A;"
Outputs="..\..\..\bin\$(TargetFileName)"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\x86\Release/genpname.tlb"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\..\common;..\toolutil"
PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
StringPooling="true"
RuntimeLibrary="2"
EnableFunctionLevelLinking="true"
DisableLanguageExtensions="true"
TreatWChar_tAsBuiltInType="true"
PrecompiledHeaderFile=".\x86\Release/genpname.pch"
AssemblerListingLocation=".\x86\Release/"
ObjectFile=".\x86\Release/"
ProgramDataBaseFileName=".\x86\Release/"
WarningLevel="3"
SuppressStartupBanner="true"
CompileAs="0"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1033"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\x86\Release/genpname.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\x86\Release/genpname.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|Win32"
OutputDirectory=".\x86\Debug"
IntermediateDirectory=".\x86\Debug"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin&#x0D;&#x0A;"
Outputs="..\..\..\bin\$(TargetFileName)"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TypeLibraryName=".\x86\Debug/genpname.tlb"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\..\common;..\toolutil"
PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
BufferSecurityCheck="true"
DisableLanguageExtensions="true"
TreatWChar_tAsBuiltInType="true"
PrecompiledHeaderFile=".\x86\Debug/genpname.pch"
AssemblerListingLocation=".\x86\Debug/"
ObjectFile=".\x86\Debug/"
ProgramDataBaseFileName=".\x86\Debug/"
BrowseInformation="1"
WarningLevel="3"
SuppressStartupBanner="true"
DebugInformationFormat="4"
CompileAs="0"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1033"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\x86\Debug/genpname.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\x86\Debug/genpname.pdb"
SubSystem="1"
RandomizedBaseAddress="1"
DataExecutionPrevention="0"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
UseFAT32Workaround="true"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory=".\x64\Release"
IntermediateDirectory=".\x64\Release"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin64&#x0D;&#x0A;"
Outputs="..\..\..\bin64\$(TargetFileName)"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\x64\Release/genpname.tlb"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..\..\common;..\toolutil"
PreprocessorDefinitions="WIN64;WIN32;NDEBUG;_CRT_SECURE_NO_DEPRECATE"
StringPooling="true"
RuntimeLibrary="2"
EnableFunctionLevelLinking="true"
DisableLanguageExtensions="true"
TreatWChar_tAsBuiltInType="true"
PrecompiledHeaderFile=".\x64\Release/genpname.pch"
AssemblerListingLocation=".\x64\Release/"
ObjectFile=".\x64\Release/"
ProgramDataBaseFileName=".\x64\Release/"
WarningLevel="3"
SuppressStartupBanner="true"
CompileAs="0"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="NDEBUG"
Culture="1033"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\x64\Release/genpname.exe"
LinkIncremental="1"
SuppressStartupBanner="true"
ProgramDatabaseFile=".\x64\Release/genpname.pdb"
SubSystem="1"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory=".\x64\Debug"
IntermediateDirectory=".\x64\Debug"
ConfigurationType="1"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
CommandLine="copy &quot;$(TargetPath)&quot; ..\..\..\bin64&#x0D;&#x0A;"
Outputs="..\..\..\bin64\$(TargetFileName)"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
TypeLibraryName=".\x64\Debug/genpname.tlb"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\..\common;..\toolutil"
PreprocessorDefinitions="WIN64;WIN32;_DEBUG;_CRT_SECURE_NO_DEPRECATE"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
BufferSecurityCheck="true"
DisableLanguageExtensions="true"
TreatWChar_tAsBuiltInType="true"
PrecompiledHeaderFile=".\x64\Debug/genpname.pch"
AssemblerListingLocation=".\x64\Debug/"
ObjectFile=".\x64\Debug/"
ProgramDataBaseFileName=".\x64\Debug/"
BrowseInformation="1"
WarningLevel="3"
SuppressStartupBanner="true"
DebugInformationFormat="3"
CompileAs="0"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1033"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile=".\x64\Debug/genpname.exe"
LinkIncremental="2"
SuppressStartupBanner="true"
GenerateDebugInformation="true"
ProgramDatabaseFile=".\x64\Debug/genpname.pdb"
SubSystem="1"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
UseFAT32Workaround="true"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath=".\data.h"
>
</File>
<File
RelativePath=".\genpname.cpp"
>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View file

@ -1,161 +0,0 @@
#!/usr/bin/perl
#*
#*******************************************************************************
#* Copyright (C) 2006, International Business Machines
#* Corporation and others. All Rights Reserved.
#*******************************************************************************
#*
#* file name: genspva.pl
#* encoding: US-ASCII
#* tab size: 8 (not used)
#* indentation:4
#*
#* Created by: Ram Viswanadha
#*
#* This file filters iso15924-utf8-<date>.txt
#*
use File::Find;
use File::Basename;
use IO::File;
use Cwd;
use File::Copy;
use Getopt::Long;
use File::Path;
use File::Copy;
#run the program
main();
#---------------------------------------------------------------------
# The main program
sub main(){
GetOptions(
"--destdir=s" => \$destdir,
"--iso15924=s" => \$iso,
"--prop=s" => \$prop,
"--code-start=s" => \$code,
);
usage() unless defined $destdir;
usage() unless defined $iso;
usage() unless defined $prop;
$outfile = "$destdir/SyntheticPropertyValueAliases.txt";
$propFH = IO::File->new($prop,"r")
or die "could not open the file $prop for reading: $! \n";
$isoFH = IO::File->new($iso,"r")
or die "could not open the file $iso for reading: $! \n";
$outFH = IO::File->new($outfile,"w")
or die "could not open the file $outfile for reading: $! \n";
my @propLines;
while (<$propFH>) {
next if(!($_ =~/sc ; /));
push(@propLines, $_);
}
printHeader($outFH);
if(defined $code){
print "Please add the following to UScriptCode enum in uscript.h.\n";
print "#ifndef U_HIDE_DRAFT_API\n";
}
while (<$isoFH>) {
next if($_=~/^#/);#skip if the line starts with a comment char
($script, $t, $name, $rest) = split(/;/,$_,4);
#sc ; Arab
$outstr = "sc ; $script";
$encoded = 0; #false
# seach the propLines to make sure that this scipt code is not
# encoded in Unicode
foreach $key (@propLines){
if($key =~ /$outstr/){
$encoded = 1;
}
}
next if($encoded == 1);
#ignore private use codes
next if($script =~ /Qa[ab][a-z]/);
#if($script eq "Qaaa"){
# $outstr = $outstr." ; Private_Use_Start\n";
#}elsif($script eq "Qabx"){
# $outstr = $outstr." ; Private_Use_End\n";
#}else{
# $outstr = $outstr." ; $script \n";
#}
$outstr = $outstr." ; $script \n";
print $outFH $outstr;
#print to console
if(defined $code){
if($name =~ /[(\s,\x80-\xFF]/){
$name = $script;
}
$name =~s/-/_/g;
$scriptcode = "USCRIPT_".uc($name);
print " $scriptcode = $code, /* $script */\n";
$code++;
}
}
if(defined $code){
print "#endif /* U_HIDE_DRAFT_API */\n";
}
for($i=0; $i<2; $i++){
}
close($isoFH);
close($propFH);
close($outFH);
}
#-----------------------------------------------------------------------
sub printHeader{
($outFH) = @_;
($DAY, $MONTH, $YEAR) = (localtime)[3,4,5];
$YEAR += 1900;
#We will print our copyright here + warnings
print $outFH <<END_HEADER_COMMENT;
########################################################################
# Copyright (c) 2006-$YEAR, International Business Machines
# Corporation and others. All Rights Reserved.
########################################################################
# file name: SyntheticPropertyValueAliases.txt
# encoding: US-ASCII
# tab size: 8 (not used)
# indentation: 4
# created by: gensvpa.pl
########################################################################
# This file follows the format of PropertyValueAliases.txt
# It contains synthetic property value aliases not present
# in the UCD. Unlike PropertyValueAliases.txt, it should
# NOT contain a version number.
########################################################################
# THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW
# WHAT YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
########################################################################
# set the same names as short and long names to fit the syntax without
# inventing names that we would have to support forever
# Script (sc)
END_HEADER_COMMENT
}
#-----------------------------------------------------------------------
sub usage {
print << "END";
Usage:
gensvpa.pl
Options:
--destdir=<directory>
--iso15924=<file name>
--prop=<PropertyValueAliases.txt>
--code-start=s
e.g.: gensvpa.pl --destdir=<icu>/source/tools/genpname --iso15924=iso15924-utf8-20041025.txt --prop=<icu>/source/data/unidata --code-start=60
END
exit(0);
}

File diff suppressed because it is too large Load diff

View file

@ -2,18 +2,7 @@
* Copyright (C) 2002-2011, International Business Machines Corporation and
* others. All Rights Reserved.
*
* MACHINE GENERATED FILE. !!! Do not edit manually !!!
*
* Generated from
* uchar.h
* uscript.h
* Blocks.txt
* PropertyAliases.txt
* PropertyValueAliases.txt
*
* Date: Fri Dec 2 13:00:14 2011
* Unicode version: 6.1.0
* Script: preparse.pl
* machine-generated by: icu/tools/unicode/py/preparseucd.py
*/
/* Unicode version 6.1.0 */
@ -24,7 +13,6 @@ const uint8_t VERSION_3 = 0;
const int32_t STRING_COUNT = 1096;
/* to be sorted */
const AliasName STRING_TABLE[] = {
AliasName("", 0),
AliasName("A", 1),
@ -1124,9 +1112,6 @@ const AliasName STRING_TABLE[] = {
AliasName("xdigit", 1095),
};
/* to be filled in */
int32_t REMAP[1096];
const int32_t NAME_GROUP_COUNT = 1547;
int32_t NAME_GROUP[] = {
@ -2887,6 +2872,7 @@ const Property PROPERTY[] = {
Property((int32_t) UCHAR_WHITE_SPACE, 176, VALUES_binprop_COUNT, VALUES_binprop),
Property((int32_t) UCHAR_XID_CONTINUE, 179, VALUES_binprop_COUNT, VALUES_binprop),
Property((int32_t) UCHAR_XID_START, 181, VALUES_binprop_COUNT, VALUES_binprop),
Property((int32_t) UCHAR_NUMERIC_VALUE, 183, 0, NULL),
Property((int32_t) UCHAR_BIDI_CLASS, 185, VALUES_bc_COUNT, VALUES_bc),
Property((int32_t) UCHAR_BLOCK, 187, VALUES_blk_COUNT, VALUES_blk),
Property((int32_t) UCHAR_CANONICAL_COMBINING_CLASS, 189, VALUES_ccc_COUNT, VALUES_ccc),
@ -2908,6 +2894,8 @@ const Property PROPERTY[] = {
Property((int32_t) UCHAR_SENTENCE_BREAK, 221, VALUES_SB_COUNT, VALUES_SB),
Property((int32_t) UCHAR_TRAIL_CANONICAL_COMBINING_CLASS, 223, VALUES_tccc_COUNT, VALUES_tccc),
Property((int32_t) UCHAR_WORD_BREAK, 225, VALUES_WB_COUNT, VALUES_WB),
Property((int32_t) UCHAR_GENERAL_CATEGORY_MASK, 227, VALUES_gcm_COUNT, VALUES_gcm),
Property((int32_t) UCHAR_SCRIPT_EXTENSIONS, 229, 0, NULL),
Property((int32_t) UCHAR_AGE, 231, 0, NULL),
Property((int32_t) UCHAR_BIDI_MIRRORING_GLYPH, 233, 0, NULL),
Property((int32_t) UCHAR_CASE_FOLDING, 235, 0, NULL),
@ -2921,9 +2909,4 @@ const Property PROPERTY[] = {
Property((int32_t) UCHAR_TITLECASE_MAPPING, 252, 0, NULL),
Property((int32_t) UCHAR_UNICODE_1_NAME, 254, 0, NULL),
Property((int32_t) UCHAR_UPPERCASE_MAPPING, 256, 0, NULL),
Property((int32_t) UCHAR_NUMERIC_VALUE, 183, 0, NULL),
Property((int32_t) UCHAR_GENERAL_CATEGORY_MASK, 227, VALUES_gcm_COUNT, VALUES_gcm),
Property((int32_t) UCHAR_SCRIPT_EXTENSIONS, 229, 0, NULL),
};
/*eof*/

View file

@ -28,6 +28,7 @@
import array
import bisect
import codecs
import datetime
import os
import os.path
import re
@ -177,7 +178,9 @@ def GetProperty(pname):
def GetShortPropertyName(pname):
if pname in _null_values: return pname # pname is already the short name.
prop = GetProperty(pname)
return prop[1][0] if prop else "" # "" for ignored properties.
if not prop: return "" # For ignored properties.
name = prop[1][0]
return name if name else prop[1][1] # Long name if no short name.
def GetShortPropertyValueName(prop, vname):
@ -396,10 +399,15 @@ def AddBinaryProperty(short_name, long_name):
_properties[NormPropName(long_name)] = prop
def AddPOSIXBinaryProperty(short_name, long_name):
AddBinaryProperty(short_name, long_name)
def AddPOSIXBinaryProperty(name):
# We only define a long name for ICU-specific (non-UCD) POSIX properties.
_null_values[name] = False
bin_prop = _properties["Math"]
prop = ("Binary", ["", name], bin_prop[2], bin_prop[3])
_properties[name] = prop
_properties[NormPropName(name)] = prop
# This is to match UProperty UCHAR_POSIX_ALNUM etc.
_properties["posix" + NormPropName(short_name)] = _properties[short_name]
_properties["posix" + NormPropName(name)] = prop
# Match a comment line like
@ -509,11 +517,11 @@ def ParsePropertyAliases(in_file):
AddBinaryProperty("segstart", "Segment_Starter")
# C/POSIX character classes that do not have Unicode property [value] aliases.
# See uchar.h.
AddPOSIXBinaryProperty("alnum", "alnum")
AddPOSIXBinaryProperty("blank", "blank")
AddPOSIXBinaryProperty("graph", "graph")
AddPOSIXBinaryProperty("print", "print")
AddPOSIXBinaryProperty("xdigit", "xdigit")
AddPOSIXBinaryProperty("alnum")
AddPOSIXBinaryProperty("blank")
AddPOSIXBinaryProperty("graph")
AddPOSIXBinaryProperty("print")
AddPOSIXBinaryProperty("xdigit")
def ParsePropertyValueAliases(in_file):
@ -1624,12 +1632,14 @@ def ParseUCharHeader(icu_src_root):
def WritePNamesDataHeader(out_path):
# Build a sorted list of (key0, enum) tuples
# Build a sorted list of (key0, enum, aliases) tuples
# to emulate the output order of the old genpname/preparse.pl.
# key0 is either a preparse.pl property type string (for property names)
# or a Unicode short property name (for property value names).
# enum is the ICU4C enum constant name.
# TODO: rename prop to not collide with usual properties[x]
# aliases is the tuple of the property's or value's names and aliases.
# (We use a tuple, not the original list,
# so that we can use it as a dict key.)
# TODO: once we are sure this works, simplify the order;
# for example, change all "_bp" etc. to just ""
# (outputs property names first in enum order),
@ -1646,9 +1656,12 @@ def WritePNamesDataHeader(out_path):
"Numeric": "_dp",
"String": "_sp"
}
pnames_data = [("binprop", "0"), ("binprop", "1")]
# Only properties that have ICU API.
pnames_data = [
("binprop", "0", tuple(_binary_values["N"])),
("binprop", "1", tuple(_binary_values["Y"]))
]
missing_enums = []
# Only properties that have ICU API.
for (pname, prop_enum) in _property_name_to_enum.iteritems():
prop = _properties[pname]
# Sometimes the uchar.h UProperty type differs
@ -1659,7 +1672,7 @@ def WritePNamesDataHeader(out_path):
type = "_op"
else:
type = prop_type_to_old_type[prop[0]]
pnames_data.append((type, prop_enum))
pnames_data.append((type, prop_enum, tuple(prop[1])))
if type != "_bp" and pname != "age":
short_name_to_enum = prop[2]
if pname.endswith("ccc"):
@ -1667,15 +1680,16 @@ def WritePNamesDataHeader(out_path):
# as "enum" values.
# In the UCD data, these numeric strings are the first value names,
# followed by the short & long value names.
# Omit the numeric strings from the aliases as well.
for name in short_name_to_enum:
pnames_data.append((pname, name))
pnames_data.append((pname, name, tuple(prop[3][name][1:])))
else:
if pname == "gc":
# See comment about _gc_vname_to_enum in ParseUCharHeader().
short_name_to_enum = _gc_vname_to_enum
for (name, enum) in short_name_to_enum.iteritems():
if enum:
pnames_data.append((pname, enum))
pnames_data.append((pname, enum, tuple(prop[3][name])))
else:
missing_enums.append((pname, name))
if missing_enums:
@ -1683,13 +1697,144 @@ def WritePNamesDataHeader(out_path):
"missing uchar.h enum constants for some property values: %s" %
missing_enums)
pnames_data.sort()
for item in pnames_data:
print item
short_script_name_to_enum = _properties["sc"][2]
# print short_script_name_to_enum
# print _property_name_to_enum
# print _properties["ea"][2]
# print _properties["gcm"][2]
# Write pnames_data.h.
year = datetime.date.today().strftime("%Y")
with open(out_path, "w") as out_file:
out_file.write("""/**
* Copyright (C) 2002-""" + year +
""", International Business Machines Corporation and
* others. All Rights Reserved.
*
* machine-generated by: icu/tools/unicode/py/preparseucd.py
*/
""")
out_file.write("/* Unicode version %s */\n" % _ucd_version)
v = _ucd_version.split(".")
while len(v) < 4: v.append("0")
for i in xrange(4):
out_file.write("const uint8_t VERSION_%d = %s;\n" % (i, v[i]))
out_file.write("\n")
# Write the string table with all of the names and aliases
# of all of the properties and their values.
# Unique strings in ASCII order.
# The first string must be the empty string.
strings_set = set([""])
for (key0, enum, aliases) in pnames_data:
for s in aliases: strings_set.add(s)
strings = sorted(strings_set)
out_file.write("const int32_t STRING_COUNT = %d;\n\n" % len(strings))
# While printing, create a mapping from string table entry to index.
string_to_id = {}
out_file.write("const AliasName STRING_TABLE[] = {\n")
for s in strings:
i = len(string_to_id)
out_file.write(' AliasName("%s", %d),\n' % (s, i))
string_to_id[s] = i
out_file.write("};\n\n")
# Emit the name group table.
# [A table of name groups. A name group is the list of names and aliases
# for a property or property value.
# The name group table looks like this:
#
# 114, -115, 116, -117, 0, -118, 65, -64, ...
# [0] [2] [4] [6]
#
# The entry at [0] consists of 2 strings, 114 and 115.
# The entry at [2] consists of 116 and 117, etc.
# The last entry is negative.
# Build the name group list with nameGroup indices.
name_groups = []
# Count the total number of values, not just the groups.
name_groups_total_length = 0
# Check for duplicate name groups, and reuse the first of a kind.
group_to_int = {}
for (key0, enum, aliases) in pnames_data:
index = group_to_int.get(aliases)
if index == None:
index = name_groups_total_length
group_to_int[aliases] = index
name_groups.append([string_to_id[s] for s in aliases])
name_groups_total_length += len(aliases)
out_file.write("const int32_t NAME_GROUP_COUNT = %d;\n\n" %
name_groups_total_length)
out_file.write("int32_t NAME_GROUP[] = {\n")
# Emit one group per line, with annotations.
max_names = 0 # Maximum number of names & aliases per item.
start = 0
for group in name_groups:
line = " "
aliases = [] # For comments.
for i in group[:-1]:
line += "%d, " % i
aliases.append('"%s"' % strings[i])
# Negative entry terminates the group.
i = group[-1]
line += "%d, " % -i
aliases.append('"%s"' % strings[i])
out_file.write(
line + " " * (24 - len(line)) +
"/* %3d: " % start + ", ".join(aliases) + " */\n")
length = len(group)
if length > max_names: max_names = length
start += length
out_file.write("};\n\n")
out_file.write("#define MAX_NAMES_PER_GROUP %d\n\n" % max_names)
# Emit the enumerated property values.
i = 0
while i < len(pnames_data):
pname = pnames_data[i][0]
if pname.startswith("_"):
i += 1
continue
if pname == "binprop":
count = 2
elif pname == "gc":
count = len(_gc_vname_to_enum)
else:
count = len(_properties[pname][2])
out_file.write("const int32_t VALUES_%s_COUNT = %d;\n\n" %
(pname, count))
out_file.write("const Alias VALUES_%s[] = {\n" % pname)
limit = i + count
while i < limit:
(pname, enum, aliases) = pnames_data[i]
out_file.write(" Alias((int32_t) %s, %d),\n" %
(enum, group_to_int[aliases]))
i += 1
out_file.write("};\n\n")
# Emit the top-level properties (binary, enumerated, etc.).
out_file.write("const int32_t PROPERTY_COUNT = %d;\n\n" %
len(_property_name_to_enum))
out_file.write("const Property PROPERTY[] = {\n")
for (pname, enum, aliases) in pnames_data:
if not pname.startswith("_"): continue
pname = aliases[0]
if not pname: pname = aliases[1] # Long name if no short name.
prop = _properties[pname]
group_index = group_to_int[aliases]
if prop[2] and pname != "age": # Property with named values.
if prop[0] == "Binary": pname = "binprop"
out_file.write(
" Property((int32_t) %s, %d, VALUES_%s_COUNT, VALUES_%s),\n" %
(enum, group_index, pname, pname))
else:
out_file.write(" Property((int32_t) %s, %d, 0, NULL),\n" %
(enum, group_index))
out_file.write("};\n")
# main() ------------------------------------------------------------------- ***