mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
ICU-4301 committ the collation probe tools
X-SVN-Rev: 20601
This commit is contained in:
parent
615bf48660
commit
8fbddcf5c7
27 changed files with 7832 additions and 0 deletions
81
tools/colprobe/Makefile.in
Executable file
81
tools/colprobe/Makefile.in
Executable file
|
@ -0,0 +1,81 @@
|
|||
## Makefile.in for ICU - extra/colprobe
|
||||
## Copyright (c) 2001, International Business Machines Corporation and
|
||||
## others. All Rights Reserved.
|
||||
|
||||
## Source directory information
|
||||
srcdir = @srcdir@
|
||||
top_srcdir = @top_srcdir@
|
||||
|
||||
top_builddir = ../..
|
||||
|
||||
include $(top_builddir)/icudefs.mk
|
||||
|
||||
## Build directory information
|
||||
subdir = extra/colprobe
|
||||
|
||||
## Extra files to remove for 'make clean'
|
||||
CLEANFILES = *~ $(DEPS)
|
||||
|
||||
## Target information
|
||||
TARGET = colprobe
|
||||
LONGNAME = longname
|
||||
|
||||
CPPFLAGS += -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(top_srcdir)/tools/toolutil -I$(top_srcdir)/io
|
||||
LIBS = $(LIBICUI18N) $(LIBICUUC) $(LIBUSTDIO) $(LIBICUTOOLUTIL) $(DEFAULT_LIBS) $(LIB_M)
|
||||
|
||||
OBJECTS = colprobeNew.o line.o sortedlines.o strengthprobe.o uprinter.o
|
||||
LONGNAME_OBJ = longname.o
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
LONGNAME_DEPS = $(LONGNAME_OBJ:.o=.d)
|
||||
|
||||
## List of phony targets
|
||||
.PHONY : all all-local install install-local clean clean-local \
|
||||
distclean distclean-local dist dist-local check check-local
|
||||
|
||||
## Clear suffix list
|
||||
.SUFFIXES :
|
||||
|
||||
## List of standard targets
|
||||
all: all-local
|
||||
install: install-local
|
||||
clean: clean-local
|
||||
distclean : distclean-local
|
||||
dist: dist-local
|
||||
check: all check-local
|
||||
|
||||
all-local: $(TARGET)
|
||||
|
||||
install-local:
|
||||
|
||||
dist-local:
|
||||
|
||||
clean-local:
|
||||
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
|
||||
$(RMV) $(OBJECTS) $(TARGET)
|
||||
|
||||
distclean-local: clean-local
|
||||
$(RMV) Makefile
|
||||
|
||||
check-local: all-local
|
||||
|
||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||
cd $(top_builddir) \
|
||||
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
|
||||
|
||||
$(TARGET) : $(OBJECTS)
|
||||
$(LINK.cc) -o $@ $^ $(LIBS)
|
||||
|
||||
$(LONGNAME) : $(LONGNAME_OBJ)
|
||||
$(LINK.cc) -o $@ $^ $(LIBS)
|
||||
|
||||
invoke:
|
||||
ICU_DATA=$${ICU_DATA:-$(top_builddir)/data/} TZ=PST8PDT $(INVOKE) $(INVOCATION)
|
||||
|
||||
ifeq (,$(MAKECMDGOALS))
|
||||
-include $(DEPS)
|
||||
else
|
||||
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
|
||||
-include $(DEPS)
|
||||
endif
|
||||
endif
|
1730
tools/colprobe/colprobe.cpp
Executable file
1730
tools/colprobe/colprobe.cpp
Executable file
File diff suppressed because it is too large
Load diff
148
tools/colprobe/colprobe.dsp
Executable file
148
tools/colprobe/colprobe.dsp
Executable file
|
@ -0,0 +1,148 @@
|
|||
# Microsoft Developer Studio Project File - Name="colprobe" - Package Owner=<4>
|
||||
# Microsoft Developer Studio Generated Build File, Format Version 6.00
|
||||
# ** DO NOT EDIT **
|
||||
|
||||
# TARGTYPE "Win32 (x86) Console Application" 0x0103
|
||||
|
||||
CFG=colprobe - Win32 Debug
|
||||
!MESSAGE This is not a valid makefile. To build this project using NMAKE,
|
||||
!MESSAGE use the Export Makefile command and run
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "colprobe.mak".
|
||||
!MESSAGE
|
||||
!MESSAGE You can specify a configuration when running NMAKE
|
||||
!MESSAGE by defining the macro CFG on the command line. For example:
|
||||
!MESSAGE
|
||||
!MESSAGE NMAKE /f "colprobe.mak" CFG="colprobe - Win32 Debug"
|
||||
!MESSAGE
|
||||
!MESSAGE Possible choices for configuration are:
|
||||
!MESSAGE
|
||||
!MESSAGE "colprobe - Win32 Release" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE "colprobe - Win32 Debug" (based on "Win32 (x86) Console Application")
|
||||
!MESSAGE
|
||||
|
||||
# Begin Project
|
||||
# PROP AllowPerConfigDependencies 0
|
||||
# PROP Scc_ProjName ""
|
||||
# PROP Scc_LocalPath ""
|
||||
CPP=cl.exe
|
||||
RSC=rc.exe
|
||||
|
||||
!IF "$(CFG)" == "colprobe - Win32 Release"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 0
|
||||
# PROP BASE Output_Dir "Release"
|
||||
# PROP BASE Intermediate_Dir "Release"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 0
|
||||
# PROP Output_Dir "Release"
|
||||
# PROP Intermediate_Dir "Release"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
MTL=midl.exe
|
||||
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
|
||||
# ADD CPP /nologo /W3 /GX /O2 /I "../../../include" /I "../../tools/toolutil" /I "../../common" /I "../../i18n" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
|
||||
# ADD BASE RSC /l 0x409 /d "NDEBUG"
|
||||
# ADD RSC /l 0x409 /d "NDEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
|
||||
# ADD LINK32 icuio.lib icuuc.lib icuin.lib icutu.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 /libpath:"../../../lib"
|
||||
|
||||
!ELSEIF "$(CFG)" == "colprobe - Win32 Debug"
|
||||
|
||||
# PROP BASE Use_MFC 0
|
||||
# PROP BASE Use_Debug_Libraries 1
|
||||
# PROP BASE Output_Dir "Debug"
|
||||
# PROP BASE Intermediate_Dir "Debug"
|
||||
# PROP BASE Target_Dir ""
|
||||
# PROP Use_MFC 0
|
||||
# PROP Use_Debug_Libraries 1
|
||||
# PROP Output_Dir "Debug"
|
||||
# PROP Intermediate_Dir "Debug"
|
||||
# PROP Ignore_Export_Lib 0
|
||||
# PROP Target_Dir ""
|
||||
MTL=midl.exe
|
||||
# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
|
||||
# ADD CPP /nologo /W3 /Gm /GX /ZI /Od /I "../../../include" /I "../../tools/toolutil" /I "../../common" /I "../../i18n" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
|
||||
# ADD BASE RSC /l 0x409 /d "_DEBUG"
|
||||
# ADD RSC /l 0x409 /d "_DEBUG"
|
||||
BSC32=bscmake.exe
|
||||
# ADD BASE BSC32 /nologo
|
||||
# ADD BSC32 /nologo
|
||||
LINK32=link.exe
|
||||
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
|
||||
# ADD LINK32 icuiod.lib icuucd.lib icuind.lib icutud.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"../../../lib"
|
||||
|
||||
!ENDIF
|
||||
|
||||
# Begin Target
|
||||
|
||||
# Name "colprobe - Win32 Release"
|
||||
# Name "colprobe - Win32 Debug"
|
||||
# Begin Group "Source Files"
|
||||
|
||||
# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\colprobeNew.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\line.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\sortedlines.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\strengthprobe.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\targetsetgenerator.cpp
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\uprinter.cpp
|
||||
# End Source File
|
||||
# End Group
|
||||
# Begin Group "Header Files"
|
||||
|
||||
# PROP Default_Filter "h;hpp;hxx;hm;inl"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\colprobe.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\line.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\sortedlines.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\strengthprobe.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\targetsetgenerator.h
|
||||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\uprinter.h
|
||||
# End Source File
|
||||
# End Group
|
||||
# Begin Group "Resource Files"
|
||||
|
||||
# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
|
||||
# End Group
|
||||
# End Target
|
||||
# End Project
|
15
tools/colprobe/colprobe.h
Executable file
15
tools/colprobe/colprobe.h
Executable file
|
@ -0,0 +1,15 @@
|
|||
#ifndef COLPROBE_H
|
||||
#define COLPROBE_H
|
||||
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/normlzr.h"
|
||||
|
||||
typedef int (*CompareFn) (const void *elem1, const void *elem2);
|
||||
typedef int (*GetSortKeyFn) (const UChar *string, int32_t len, uint8_t *buffer, int32_t buffCapacity);
|
||||
//typedef int (__cdecl *CompareFn)(const void *elem1, const void *elem2);
|
||||
void generateRepertoire(const char *locale, UnicodeSet &rep, UBool &hanAppears, UErrorCode &status);
|
||||
UnicodeSet flatten(const UnicodeSet &source, UErrorCode &status);
|
||||
|
||||
//UnicodeSet generateRepertoire(const char *locale);
|
||||
|
||||
#endif
|
1078
tools/colprobe/colprobeNew.cpp
Executable file
1078
tools/colprobe/colprobeNew.cpp
Executable file
File diff suppressed because it is too large
Load diff
164
tools/colprobe/createComparisonTables.pl
Executable file
164
tools/colprobe/createComparisonTables.pl
Executable file
|
@ -0,0 +1,164 @@
|
|||
#! /usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
|
||||
my $locale = $ARGV[0];
|
||||
|
||||
|
||||
my $long_name = `/home/weiv/src/icu/source/extra/colprobe/longname $locale`;
|
||||
my $pageTitle = $locale."_collation";
|
||||
my $filename = $pageTitle.".html";
|
||||
|
||||
open TABLE, ">$filename";
|
||||
|
||||
|
||||
print TABLE <<"EndOfTemplate";
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
<title>$pageTitle</title>
|
||||
<style>
|
||||
<!--
|
||||
table { border-spacing: 0; border-collapse: collapse; width: 100%;
|
||||
border: 1px solid black }
|
||||
td, th { width: 10%; border-spacing: 0; border-collapse: collapse; color: black;
|
||||
vertical-align: top; border: 1px solid black }
|
||||
-->
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body bgcolor="#FFFFFF">
|
||||
|
||||
<p><b><font color="#FF0000">Collation:</font> $locale ($long_name) <a href="http://oss.software.ibm.com/cgi-bin/icu/lx/en/?_=$locale">Demo</a>,
|
||||
|
||||
<a href="../all_diff_xml/comparison_charts.html">Cover
|
||||
Page</a>, <a href="../all_diff_xml/index.html">Index</a></b></p>
|
||||
<table>
|
||||
<tr>
|
||||
EndOfTemplate
|
||||
|
||||
my $dirCommon = "common";
|
||||
my $refCommon = $dirCommon."/UCARules.txt";
|
||||
my $nameCommon = $dirCommon."/".$locale."_collation.html";
|
||||
my $colorCommon = "#AD989D";
|
||||
|
||||
my $loc;
|
||||
|
||||
if(!(-e $nameCommon)) {
|
||||
$locale =~ /_/;
|
||||
$loc = $`;
|
||||
$nameCommon = "$dirCommon/$loc"."_collation.html";
|
||||
}
|
||||
print TABLE " <th bgcolor=\"$colorCommon\">COMMON (<a href=\"$refCommon\">UCA</a> <a href=\"../$dirCommon/xml/$locale.xml\">xml</a>)</th>\n";
|
||||
|
||||
my $dirLinux = "linux";
|
||||
my $refLinux = $dirLinux."/".$locale.".utf8_default_raw.html";
|
||||
my $rawLinux = $dirLinux."/".$locale.".utf8_raw.html";
|
||||
my $defLinux = $dirLinux."/".$locale;
|
||||
my $nameLinux = "$dirLinux/$locale".".utf8_collation.html";
|
||||
my $colorLinux = "#1191F1";
|
||||
|
||||
print TABLE " <th bgcolor=\"$colorLinux\">LINUX (";
|
||||
if (!(-e $nameLinux)) {
|
||||
#try the variant that has @euro stuck in
|
||||
$nameLinux = "$dirLinux/$locale".'.utf8@euro_collation.html';
|
||||
if(-e $nameLinux) {
|
||||
$refLinux = $dirLinux."/".$locale.'.utf8@euro_default_raw.html';
|
||||
$rawLinux = $dirLinux."/".$locale.'.utf8@euro_raw.html';
|
||||
}
|
||||
}
|
||||
if (-e $nameLinux) {
|
||||
print TABLE "<a href=\"$rawLinux\">Ordering</a> <a href=\"$defLinux\">Definition</a> <a href=\"$refLinux\">base</a>";
|
||||
}
|
||||
|
||||
print TABLE " <a href=\"../$dirLinux/xml/$locale.xml\">xml</a>)</th>\n";
|
||||
|
||||
my $dirWin = "winxp";
|
||||
my $refWin = $dirWin."/".$locale."_default_raw.html";
|
||||
my $rawWin = $dirWin."/".$locale."_raw.html";
|
||||
my $nameWin = "$dirWin/$locale"."_collation.html";
|
||||
my $colorWin = "#98FB98";
|
||||
|
||||
print TABLE " <th bgcolor=\"$colorWin\">WINDOWS (";
|
||||
if (-e $nameWin) {
|
||||
print TABLE "<a href=\"$rawWin\">Ordering</a> <a href=\"$refWin\">base</a> ";
|
||||
}
|
||||
print TABLE "<a href=\"../windows/xml/$locale.xml\">xml</a>)</th>\n";
|
||||
|
||||
print TABLE " </tr>\n <tr>";
|
||||
|
||||
|
||||
readRules($nameCommon, "#AD989D", "Same as the UCA.");
|
||||
readRules($nameLinux, "#1191F1", "No data available.");
|
||||
readRules($nameWin, "#98FB98", "No data available.");
|
||||
|
||||
|
||||
print TABLE <<"EndOfFooter";
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
EndOfFooter
|
||||
|
||||
|
||||
sub readRules {
|
||||
# readRules($file, $color)
|
||||
my $filename = shift;
|
||||
my $color = shift;
|
||||
my $comment = shift;
|
||||
my $noLines = 0;
|
||||
my $printOut = 0;
|
||||
|
||||
my $file;
|
||||
|
||||
if(-e $filename) {
|
||||
open($file, "<$filename") || die "something very strange happened\n";
|
||||
print TABLE "<td bgcolor=\"$color\">\n";
|
||||
while (<$file>) {
|
||||
if (/\}\<br\>$/) {
|
||||
$printOut = 0;
|
||||
|
||||
}
|
||||
if ($printOut) {
|
||||
print TABLE $_;
|
||||
$noLines++;
|
||||
}
|
||||
if (/Sequence/) {
|
||||
$printOut = 1;
|
||||
print "found sequence\n";
|
||||
$noLines = 0;
|
||||
}
|
||||
|
||||
}
|
||||
if (!$noLines) {
|
||||
print TABLE "Same ordering as base\n";
|
||||
}
|
||||
print TABLE "</td>\n";
|
||||
} else {
|
||||
print TABLE "<td bgcolor=\"$color\">\n$comment</td>\n";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Tasting of food product
|
||||
# 650-574-4551 $50 1 hour
|
||||
|
||||
|
||||
# <td bgcolor="#AD989D">1.0-alpha</td>
|
||||
# <td bgcolor="#FF6633">1.0</td>
|
||||
# <td bgcolor="#FF6633">=</td>
|
||||
# <td bgcolor="#FF6633"><span title="006E {LATIN SMALL LETTER N}">&n</span><br>
|
||||
# <span title="006E 0079 {LATIN SMALL LETTER N} {LATIN SMALL LETTER Y}"> < ny</span><br>
|
||||
|
||||
# <span title="006E 006E 0079 {LATIN SMALL LETTER N} {LATIN SMALL LETTER N} {LATIN SMALL LETTER Y} / 006E 0079 {LATIN SMALL LETTER N} {LATIN SMALL LETTER Y}"> = nny / ny</span><br>
|
||||
# <span title="006E 0059 {LATIN SMALL LETTER N} {LATIN CAPITAL LETTER Y}"> <<< nY</span><br>
|
||||
# </td>
|
||||
# <td bgcolor="#FF6633">=</td>
|
||||
# <td bgcolor="#FFFF33">1.2</td>
|
||||
|
||||
# <td bgcolor="#98FB98">Windows XP</td>
|
||||
# <td bgcolor="#FF6633">=</td>
|
||||
# <td bgcolor="#FF6633">=</td>
|
209
tools/colprobe/doComparisonTable.pl
Executable file
209
tools/colprobe/doComparisonTable.pl
Executable file
|
@ -0,0 +1,209 @@
|
|||
#! /usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
use IO::File;
|
||||
|
||||
|
||||
my $locale = $ARGV[0];
|
||||
|
||||
|
||||
my $long_name = `/home/weiv/src/icu/source/extra/colprobe/longname $locale`;
|
||||
print "Long name is $long_name\n";
|
||||
my $pageTitle = $locale." collation";
|
||||
my $filename = $locale.".html";
|
||||
|
||||
open TABLE, ">$filename";
|
||||
|
||||
|
||||
print TABLE <<"EndOfTemplate";
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
<title>$pageTitle</title>
|
||||
<style>
|
||||
<!--
|
||||
table { border-spacing: 0; border-collapse: collapse; width: 100%;
|
||||
border: 1px solid black }
|
||||
td, th { width: 10%; border-spacing: 0; border-collapse: collapse; color: black;
|
||||
vertical-align: top; border: 1px solid black }
|
||||
-->
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body bgcolor="#FFFFFF">
|
||||
|
||||
<p><b><font color="#FF0000">Collation:</font> $locale ($long_name) <a href="http://oss.software.ibm.com/cgi-bin/icu/lx/en/?_=$locale">Demo</a>,
|
||||
|
||||
<a href="../../comparison_charts.html">Cover
|
||||
Page</a>, <a href="../main/index.html">Locale Diffs Index</a>, <a href="index.html">Collation Diffs Index</a></b></p>
|
||||
<table>
|
||||
<tr>
|
||||
EndOfTemplate
|
||||
|
||||
my $dirCommon = "icucollations";
|
||||
my $refCommon = $dirCommon."/UCARules.txt";
|
||||
my $nameCommon = $dirCommon."/".$locale."_collation.html";
|
||||
my $colorCommon = "#AD989D";
|
||||
|
||||
my $loc = $locale;
|
||||
|
||||
if(!(-e $nameCommon)) {
|
||||
$locale =~ /_/;
|
||||
$loc = $`;
|
||||
$nameCommon = "$dirCommon/$loc"."_collation.html";
|
||||
}
|
||||
|
||||
print "Common is $nameCommon\n";
|
||||
|
||||
print TABLE " <th bgcolor=\"$colorCommon\">COMMON (";
|
||||
if(-e $nameCommon) {
|
||||
print TABLE "<a href=\"../../common/collation/$loc.xml\">xml</a> ";
|
||||
}
|
||||
print TABLE "<a href=\"../../common/collation/root.xml\">UCA</a>)</th>\n";
|
||||
|
||||
my $dirLinux = "linuxcollations";
|
||||
my $refLinux = $dirLinux."/".$locale.".utf8_default_raw.html";
|
||||
my $rawLinux = $dirLinux."/".$locale.".utf8_raw.html";
|
||||
my $defLinux = $dirLinux."/".$locale;
|
||||
my $nameLinux = "$dirLinux/$locale"."_collation.html";
|
||||
my $colorLinux = "#1191F1";
|
||||
|
||||
print TABLE " <th bgcolor=\"$colorLinux\">LINUX";
|
||||
if (!(-e $nameLinux)) {
|
||||
#try the variant that has @euro stuck in
|
||||
$nameLinux = "$dirLinux/$locale".'.utf8@euro_collation.html';
|
||||
if(-e $nameLinux) {
|
||||
$refLinux = $dirLinux."/".$locale.'.utf8@euro_default_raw.html';
|
||||
$rawLinux = $dirLinux."/".$locale.'.utf8@euro_raw.html';
|
||||
}
|
||||
}
|
||||
if (-e $nameLinux) {
|
||||
print TABLE " (<a href=\"../../linux/collation/$locale.xml\">xml</a>";
|
||||
my $linuxBase = &getBaseLocale("$dirLinux/base", $locale);
|
||||
if($linuxBase ne "") {
|
||||
print TABLE " <a href=\"../../linux/collation/$linuxBase.xml\">Base ($linuxBase)</a>";
|
||||
}
|
||||
print TABLE ")";
|
||||
}
|
||||
print TABLE "</th>\n";
|
||||
|
||||
|
||||
my $dirWin = "w2kcollations";
|
||||
my $refWin = $dirWin."/".$locale."_default_raw.html";
|
||||
my $rawWin = $dirWin."/".$locale."_raw.html";
|
||||
my $nameWin = "$dirWin/$locale"."_collation.html";
|
||||
my $colorWin = "#98FB98";
|
||||
$loc = $locale;
|
||||
#try fallback for windows
|
||||
print TABLE " <th bgcolor=\"$colorWin\">WINDOWS";
|
||||
if(!(-e $nameWin)) {
|
||||
$locale =~ /_/;
|
||||
$loc = $`;
|
||||
$nameWin = "$dirWin/$loc"."_collation.html";
|
||||
}
|
||||
|
||||
print "Windows loc is $loc\n";
|
||||
|
||||
if (-e $nameWin) {
|
||||
print TABLE " (<a href=\"../../windows/collation/$loc.xml\">xml</a>";
|
||||
my $winBase = &getBaseLocale("$dirWin/base", $locale);
|
||||
if($winBase ne "") {
|
||||
print TABLE "<a href=\"../../windows/collation/$winBase.xml\">base ($winBase)</a>";
|
||||
}
|
||||
print TABLE ")";
|
||||
}
|
||||
print TABLE "</th>\n";
|
||||
print TABLE " </tr>\n <tr>";
|
||||
|
||||
|
||||
readRules($nameCommon, "#AD989D", "Same as the UCA.");
|
||||
readRules($nameLinux, "#1191F1", "No data available.");
|
||||
readRules($nameWin, "#98FB98", "No data available.");
|
||||
|
||||
|
||||
print TABLE <<"EndOfFooter";
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
EndOfFooter
|
||||
|
||||
|
||||
sub readRules {
|
||||
# readRules($file, $color)
|
||||
my $filename = shift;
|
||||
my $color = shift;
|
||||
my $comment = shift;
|
||||
my $noLines = 0;
|
||||
my $printOut = 0;
|
||||
|
||||
my $file;
|
||||
|
||||
if(-e $filename) {
|
||||
open($file, "<$filename") || die "something very strange happened\n";
|
||||
print TABLE "<td bgcolor=\"$color\">\n";
|
||||
while (<$file>) {
|
||||
if (/\}\<br\>$/) {
|
||||
$printOut = 0;
|
||||
|
||||
}
|
||||
if ($printOut) {
|
||||
if(!/^$/ && !/ <br>$/) {
|
||||
print TABLE $_;
|
||||
$noLines++;
|
||||
}
|
||||
}
|
||||
if (/Sequence/) {
|
||||
$printOut = 1;
|
||||
print "found sequence\n";
|
||||
$noLines = 0;
|
||||
}
|
||||
|
||||
}
|
||||
if (!$noLines) {
|
||||
print TABLE "Same ordering as base\n";
|
||||
}
|
||||
print TABLE "</td>\n";
|
||||
} else {
|
||||
print TABLE "<td bgcolor=\"$color\">\n$comment</td>\n";
|
||||
}
|
||||
}
|
||||
|
||||
sub getBaseLocale(){
|
||||
my $basefile = shift;
|
||||
my $locale = shift;
|
||||
my $baseFH = IO::File->new($basefile,"r")
|
||||
or die "could not open the file $basefile for reading: $! \n";
|
||||
my $bse;
|
||||
my $loc;
|
||||
while(defined ( my $line = <$baseFH>)){
|
||||
if( $line =~ /\<$locale\>/){
|
||||
($loc,$bse) = split (/\>/, $line);
|
||||
$bse =~ s/^\s+\<//;
|
||||
return $bse;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Tasting of food product
|
||||
# 650-574-4551 $50 1 hour
|
||||
|
||||
|
||||
# <td bgcolor="#AD989D">1.0-alpha</td>
|
||||
# <td bgcolor="#FF6633">1.0</td>
|
||||
# <td bgcolor="#FF6633">=</td>
|
||||
# <td bgcolor="#FF6633"><span title="006E {LATIN SMALL LETTER N}">&n</span><br>
|
||||
# <span title="006E 0079 {LATIN SMALL LETTER N} {LATIN SMALL LETTER Y}"> < ny</span><br>
|
||||
|
||||
# <span title="006E 006E 0079 {LATIN SMALL LETTER N} {LATIN SMALL LETTER N} {LATIN SMALL LETTER Y} / 006E 0079 {LATIN SMALL LETTER N} {LATIN SMALL LETTER Y}"> = nny / ny</span><br>
|
||||
# <span title="006E 0059 {LATIN SMALL LETTER N} {LATIN CAPITAL LETTER Y}"> <<< nY</span><br>
|
||||
# </td>
|
||||
# <td bgcolor="#FF6633">=</td>
|
||||
# <td bgcolor="#FFFF33">1.2</td>
|
||||
|
||||
# <td bgcolor="#98FB98">Windows XP</td>
|
||||
# <td bgcolor="#FF6633">=</td>
|
||||
# <td bgcolor="#FF6633">=</td>
|
246
tools/colprobe/extractCollationData.pl
Executable file
246
tools/colprobe/extractCollationData.pl
Executable file
|
@ -0,0 +1,246 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
use Unicode::UCD 'charinfo';
|
||||
use Unicode::Normalize;
|
||||
use utf8;
|
||||
use open ':utf8';
|
||||
|
||||
my $printout = 0;
|
||||
my $braces = 0;
|
||||
my $colls = 0;
|
||||
my $aliased = 0;
|
||||
my $newName = "";
|
||||
my $filename;
|
||||
my $suffix;
|
||||
my $locale;
|
||||
|
||||
NEW_FILE:
|
||||
foreach my $arg (@ARGV) {
|
||||
if($newName =~ /^$/) {
|
||||
$locale = $arg;
|
||||
$locale =~ s#^.*/##g;
|
||||
$locale =~ s/\.txt//;
|
||||
} else {
|
||||
$newName = "";
|
||||
}
|
||||
my $command = "/home/weiv/build/current/bin/uconv -x hex-any/Java -f utf8 -t utf8 $arg";
|
||||
print $command."\n";
|
||||
my @bundle = `$command`;
|
||||
foreach $_ (@bundle) {
|
||||
#while(<>) {
|
||||
#print $ARGV if eof;
|
||||
if(/^\/\//) {
|
||||
next;
|
||||
}
|
||||
if(/collations/) {
|
||||
print "found Collations\n";
|
||||
$colls = 1;
|
||||
if(/alias/) {
|
||||
print "collations are aliased\n";
|
||||
$aliased = 1;
|
||||
}
|
||||
}
|
||||
if($aliased) {
|
||||
print "processing aliased data: $_\n";
|
||||
if(/\{/) {
|
||||
print "Braces opened\n";
|
||||
$braces = 1;
|
||||
}
|
||||
if($braces && /\"(.*)\"/) {
|
||||
$newName = $1;
|
||||
print "Aliasing to $newName\n";
|
||||
}
|
||||
if($braces && /\}/) {
|
||||
$braces = 0;
|
||||
print "Braces closed\n";
|
||||
$aliased = 0;
|
||||
print "Switching from $filename to $newName\n";
|
||||
$arg =~ s/$locale\.txt$/$newName\.txt/;
|
||||
print "$arg\n";
|
||||
redo NEW_FILE;
|
||||
}
|
||||
|
||||
}
|
||||
if(/standard|phonebook|traditional|pinyin|stroke|direct/ && $colls) {
|
||||
print "found $& collation\n";
|
||||
$suffix = "_".uc($&);
|
||||
if(/standard/) {
|
||||
$suffix = "";
|
||||
}
|
||||
}
|
||||
if(/Sequence/ && $colls) {
|
||||
#binmode ARGV, ":utf8";
|
||||
$printout = 1;
|
||||
#$filename = $ARGV;
|
||||
$filename = $locale;
|
||||
if($suffix) {
|
||||
$filename .= "_".$suffix;
|
||||
}
|
||||
$filename .= "_collation.html";
|
||||
print "filename is $filename\n";
|
||||
#open(OUT, ">:utf8", "$filename");
|
||||
open(OUT, ">$filename");
|
||||
printHeading($arg);
|
||||
#next;
|
||||
}
|
||||
my $line = $_;
|
||||
if($line =~ /\{/ && $printout) {
|
||||
$braces++;
|
||||
}
|
||||
if($printout) {
|
||||
print OUT processLine($line);
|
||||
print OUT "\n";
|
||||
}
|
||||
if( $line =~ /\}/ && $printout) {
|
||||
$braces--;
|
||||
if($braces == 0) {
|
||||
$printout = 0;
|
||||
printFooting();
|
||||
close(OUT);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sub processLine {
|
||||
my $line = shift;
|
||||
$_ = $line;
|
||||
my $i = 0;
|
||||
my $j = 0;
|
||||
my $result;
|
||||
# remove comments
|
||||
s#//.*$##g;
|
||||
# remove "Sequence" if present
|
||||
s/Sequence\s*//;
|
||||
# remove leading brace if present
|
||||
s/^\s*{//;
|
||||
# remove trailing brace if present
|
||||
s/}\s*$//;
|
||||
# remove trailing quote
|
||||
s/"\s*$//;
|
||||
#remove lead quote
|
||||
s/^\s*"//;
|
||||
#separate options
|
||||
s/(\[.*\])/\n\1/g;
|
||||
#separate resets
|
||||
s/\s*\&\s*/\n\& /g;
|
||||
#separate strengths and insert spaces
|
||||
s/\s*(<{1,4})\s*/\n\1 /g;
|
||||
#separate equals and insert spaces
|
||||
s/\s*=\s*/\n= /g;
|
||||
|
||||
# break into individual reset/strength/setting lines
|
||||
my @lines = split(/\n/);
|
||||
|
||||
my $line;
|
||||
my $name;
|
||||
my $spanEnd = "";
|
||||
my $result = "";
|
||||
my $names = "";
|
||||
my $codes = "";
|
||||
my $lrm = "";
|
||||
|
||||
foreach $line (@lines) {
|
||||
# skip empty lines
|
||||
if($line =~ /^$/) {
|
||||
next;
|
||||
}
|
||||
$spanEnd = "";
|
||||
$name = "";
|
||||
$lrm = "";
|
||||
$line = NFC($line);
|
||||
# for resets and strengths we will get name for elements
|
||||
if($line =~ /<{1,4} |= |& \[.*\]|& /) {
|
||||
$name = "<span title=\"";
|
||||
$names = "";
|
||||
$codes = "";
|
||||
my $start = $&;
|
||||
my $rest = $';
|
||||
for ($j = 0; $j < length($rest); $j++) {
|
||||
my $char = substr($rest, $j, 1);
|
||||
my $charVal = ord($char);
|
||||
# some of elements are part of the syntax, so they are
|
||||
# entered without translation to the name
|
||||
if($charVal == 0x002F || $charVal == 0x007C) {
|
||||
$name .= $codes.$names." $char ";
|
||||
$codes = "";
|
||||
$names = "";
|
||||
} elsif($charVal == 0x0027) { #quote requires more processing
|
||||
#$name .= "'";
|
||||
} else {
|
||||
my $charinfo = charinfo($charVal);
|
||||
$codes .= $charinfo->{'code'}." ";
|
||||
$names .= "{".$charinfo->{'name'}."} ";
|
||||
if($charinfo->{'bidi'} eq "R" || $charinfo->{'bidi'} eq "AL") {
|
||||
$lrm = "‎";
|
||||
}
|
||||
#$name .= $charinfo->{'code'}." {".$charinfo->{'name'}."} ";
|
||||
}
|
||||
}
|
||||
$name .= $codes.$names."\" >";
|
||||
$spanEnd = "</span>";
|
||||
}
|
||||
#print $name."\n";
|
||||
if($line =~ /^<<<</) {
|
||||
$line = " $line";
|
||||
} elsif($line =~ /^<<</) {
|
||||
$line = " $line";
|
||||
} elsif($line =~ /^<</) {
|
||||
$line = " $line";
|
||||
} elsif($line =~ /^</) {
|
||||
$line = " $line";
|
||||
} elsif($line =~ /^=/) {
|
||||
$line = " $line";
|
||||
}
|
||||
# insert spaces around vertical bars (fix prefixes)
|
||||
|
||||
# insert spaces around slashes (fix expansions)
|
||||
$line =~ s#/# / #g;
|
||||
# replace &
|
||||
$line =~ s/\&/&/g;
|
||||
# replace spaces
|
||||
$line =~ s/ / /g;
|
||||
# replace <
|
||||
$line =~ s/</</g;
|
||||
# replace >
|
||||
$line =~ s/>/>/g;
|
||||
|
||||
#$lines[$i] = $name.$lrm.$line."</span><br>";
|
||||
#$i++;
|
||||
$result .= $name.$lrm.$line.$spanEnd."<br>\n";
|
||||
}
|
||||
|
||||
#$_ = join("\n", @lines);
|
||||
return $result;
|
||||
|
||||
}
|
||||
|
||||
sub printHeading {
|
||||
my $filename = shift;
|
||||
$filename =~ s/\.txt//;
|
||||
print OUT <<"EndOfHeading";
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
</head>
|
||||
# Collation data resource bundle generated for locale: $filename<br>
|
||||
# For platform icu reference platform UCA<br><br>
|
||||
|
||||
|
||||
$filename {<br>
|
||||
CollationElements {<br>
|
||||
Sequence {<br>
|
||||
EndOfHeading
|
||||
}
|
||||
|
||||
sub printFooting {
|
||||
print OUT <<"EndOfFooting";
|
||||
}<br>
|
||||
}<br>
|
||||
}<br>
|
||||
|
||||
</pre>
|
||||
</html>
|
||||
EndOfFooting
|
||||
}
|
24
tools/colprobe/gcd2.pl
Executable file
24
tools/colprobe/gcd2.pl
Executable file
|
@ -0,0 +1,24 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
#my $localeMinusA = `locale -a`;
|
||||
my $localeMinusA = `cat ~/src/icu/source/extra/colprobe/locale.txt`;
|
||||
my @locales = split(/\n/, $localeMinusA);
|
||||
my $locale;
|
||||
my $command;
|
||||
|
||||
my $platform = $ARGV[0];
|
||||
|
||||
mkdir $platform."logs2";
|
||||
mkdir $platform;
|
||||
|
||||
foreach $locale (@locales) {
|
||||
$command = "~/src/icu/source/extra/colprobe/colprobe --platform $platform --ref $platform --diff $locale >$platform"."logs2/$locale"."Log.txt 2>&1";
|
||||
($locale, $_) = split(/\./, $locale);
|
||||
$command .= "; cp /usr/share/i18n/locales/$locale $platform/";
|
||||
print "$command\n";
|
||||
`$command`;
|
||||
#chdir "..";
|
||||
|
||||
}
|
23
tools/colprobe/genCollData.pl
Executable file
23
tools/colprobe/genCollData.pl
Executable file
|
@ -0,0 +1,23 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
my $localeMinusA = `locale -a`;
|
||||
my @locales = split(/\n/, $localeMinusA);
|
||||
my $locale;
|
||||
my $command;
|
||||
|
||||
my $platform = $ARGV[0];
|
||||
|
||||
mkdir $platform."logs";
|
||||
mkdir $platform;
|
||||
|
||||
foreach $locale (@locales) {
|
||||
$command = "~/src/icu/source/extra/colprobe/colprobe --output resb --platform linux --ref linux $locale >$platform"."logs/$locale"."Log.txt 2>&1";
|
||||
($locale, $_) = split(/\./, $locale);
|
||||
$command .= "; cp /usr/share/i18n/locales/$locale $platform/";
|
||||
print "$command\n";
|
||||
`$command`;
|
||||
#chdir "..";
|
||||
|
||||
}
|
701
tools/colprobe/line.cpp
Executable file
701
tools/colprobe/line.cpp
Executable file
|
@ -0,0 +1,701 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* File line.cpp
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 03/18/2003 weiv Creation.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include "line.h"
|
||||
#include <stdio.h>
|
||||
|
||||
UnicodeSet * Line::needsQuoting = NULL;
|
||||
|
||||
void
|
||||
Line::init()
|
||||
{
|
||||
len = 0;
|
||||
expLen = 0;
|
||||
strength = UCOL_OFF;
|
||||
strengthFromEmpty = UCOL_OFF;
|
||||
cumulativeStrength = UCOL_OFF;
|
||||
expStrength = UCOL_OFF;
|
||||
previous = NULL;
|
||||
next = NULL;
|
||||
left = NULL;
|
||||
right = NULL;
|
||||
isContraction = FALSE;
|
||||
isExpansion = FALSE;
|
||||
isRemoved = FALSE;
|
||||
isReset = FALSE;
|
||||
expIndex = 0;
|
||||
firstCC = 0;
|
||||
lastCC = 0;
|
||||
sortKey = NULL;
|
||||
}
|
||||
|
||||
Line::Line()
|
||||
{
|
||||
init();
|
||||
memset(name, 0, 25*sizeof(UChar));
|
||||
memset(expansionString, 0, 25*sizeof(UChar));
|
||||
}
|
||||
|
||||
Line::Line(const UChar* name, int32_t len)
|
||||
{
|
||||
init();
|
||||
this->len = len;
|
||||
u_memcpy(this->name, name, len);
|
||||
memset(expansionString, 0, 25*sizeof(UChar));
|
||||
UChar32 c;
|
||||
U16_GET(name, 0, 0, len, c);
|
||||
firstCC = u_getCombiningClass(c);
|
||||
U16_GET(name, 0, len-1, len, c);
|
||||
lastCC = u_getCombiningClass(c);
|
||||
}
|
||||
|
||||
Line::Line(const UChar name)
|
||||
{
|
||||
init();
|
||||
len = 1;
|
||||
this->name[0] = name;
|
||||
this->name[1] = 0;
|
||||
memset(expansionString, 0, 25*sizeof(UChar));
|
||||
firstCC = u_getCombiningClass(name);
|
||||
lastCC = firstCC;
|
||||
}
|
||||
|
||||
Line::Line(const UnicodeString &string)
|
||||
{
|
||||
init();
|
||||
setTo(string);
|
||||
}
|
||||
|
||||
Line::Line(const char *buff, int32_t buffLen, UErrorCode &status) :
|
||||
previous(NULL),
|
||||
next(NULL),
|
||||
left(NULL),
|
||||
right(NULL)
|
||||
{
|
||||
initFromString(buff, buffLen, status);
|
||||
}
|
||||
|
||||
Line::Line(const Line &other) :
|
||||
previous(NULL),
|
||||
next(NULL),
|
||||
left(NULL),
|
||||
right(NULL)
|
||||
{
|
||||
*this = other;
|
||||
}
|
||||
|
||||
Line &
|
||||
Line::operator=(const Line &other) {
|
||||
len = other.len;
|
||||
expLen = other.expLen;
|
||||
strength = other.strength;
|
||||
strengthFromEmpty = other.strengthFromEmpty;
|
||||
cumulativeStrength = other.cumulativeStrength;
|
||||
expStrength = other.expStrength;
|
||||
isContraction = other.isContraction;
|
||||
isExpansion = other.isExpansion;
|
||||
isRemoved = other.isRemoved;
|
||||
isReset = other.isReset;
|
||||
expIndex = other.expIndex;
|
||||
firstCC = other.firstCC;
|
||||
lastCC = other.lastCC;
|
||||
u_strcpy(name, other.name);
|
||||
u_strcpy(expansionString, other.expansionString);
|
||||
sortKey = other.sortKey;
|
||||
left = other.left;
|
||||
right = other.right;
|
||||
return *this;
|
||||
}
|
||||
|
||||
UBool
|
||||
Line::operator==(const Line &other) const {
|
||||
if(this == &other) {
|
||||
return TRUE;
|
||||
}
|
||||
if(len != other.len) {
|
||||
return FALSE;
|
||||
}
|
||||
if(u_strcmp(name, other.name) != 0) {
|
||||
return FALSE;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
UBool
|
||||
Line::equals(const Line &other) const {
|
||||
if(this == &other) {
|
||||
return TRUE;
|
||||
}
|
||||
if(len != other.len) {
|
||||
return FALSE;
|
||||
}
|
||||
if(u_strcmp(name, other.name) != 0) {
|
||||
return FALSE;
|
||||
}
|
||||
if(strength != other.strength) {
|
||||
return FALSE;
|
||||
}
|
||||
if(expLen != other.expLen) {
|
||||
return FALSE;
|
||||
}
|
||||
if(u_strcmp(expansionString, other.expansionString)) {
|
||||
return FALSE;
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
UBool
|
||||
Line::operator!=(const Line &other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
|
||||
Line::~Line() {
|
||||
}
|
||||
|
||||
void
|
||||
Line::copyArray(Line *dest, const Line *src, int32_t size) {
|
||||
int32_t i = 0;
|
||||
for(i = 0; i < size; i++) {
|
||||
dest[i] = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Line::setName(const UChar* name, int32_t len) {
|
||||
this->len = len;
|
||||
u_memcpy(this->name, name, len);
|
||||
UChar32 c;
|
||||
U16_GET(name, 0, 0, len, c);
|
||||
firstCC = u_getCombiningClass(c);
|
||||
U16_GET(name, 0, len-1, len, c);
|
||||
lastCC = u_getCombiningClass(c);
|
||||
}
|
||||
|
||||
void
|
||||
Line::setToConcat(const Line *first, const Line *second) {
|
||||
u_strcpy(name, first->name);
|
||||
u_strcat(name, second->name);
|
||||
len = first->len + second->len;
|
||||
firstCC = first->firstCC;
|
||||
lastCC = second->lastCC;
|
||||
}
|
||||
|
||||
UnicodeString
|
||||
Line::stringToName(UChar *string, int32_t len) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeString result;
|
||||
char buffer[256];
|
||||
int32_t i = 0;
|
||||
UChar32 c;
|
||||
while(i < len) {
|
||||
U16_NEXT(string, i, len, c);
|
||||
if(c < 0x10000) {
|
||||
sprintf(buffer, "%04X ", c);
|
||||
} else {
|
||||
sprintf(buffer, "%06X ", c);
|
||||
}
|
||||
result.append(buffer);
|
||||
}
|
||||
i = 0;
|
||||
while(i < len) {
|
||||
U16_NEXT(string, i, len, c);
|
||||
u_charName(c, U_EXTENDED_CHAR_NAME, buffer, 256, &status);
|
||||
result.append("{");
|
||||
result.append(buffer);
|
||||
result.append("} ");
|
||||
}
|
||||
/*
|
||||
for(i = 0; i < len; i++) {
|
||||
sprintf(buffer, "%04X ", string[i]);
|
||||
result.append(buffer);
|
||||
}
|
||||
for(i = 0; i < len; i++) {
|
||||
u_charName(string[i], U_EXTENDED_CHAR_NAME, buffer, 256, &status);
|
||||
result.append("{");
|
||||
result.append(buffer);
|
||||
result.append("} ");
|
||||
}
|
||||
*/
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString
|
||||
Line::toBundleString()
|
||||
{
|
||||
|
||||
UnicodeString result;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
if(!needsQuoting) {
|
||||
needsQuoting = new UnicodeSet("[[:whitespace:][:c:][:z:][[:ascii:]-[a-zA-Z0-9]]]", status);
|
||||
}
|
||||
UChar NFC[50];
|
||||
int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status);
|
||||
result.append("\"");
|
||||
if(isReset) {
|
||||
result.append("&");
|
||||
} else {
|
||||
result.append(strengthToString(strength, FALSE, FALSE));
|
||||
}
|
||||
UBool quote = needsQuoting->containsSome(name) || needsQuoting->containsSome(NFC);
|
||||
if(quote) {
|
||||
result.append("'");
|
||||
}
|
||||
if(NFC[0] == 0x22) {
|
||||
result.append("\\u0022");
|
||||
} else {
|
||||
result.append(NFC, NFCLen);
|
||||
}
|
||||
if(quote && NFC[0] != 0x0027) {
|
||||
result.append("'");
|
||||
}
|
||||
if(expLen && !isReset) {
|
||||
quote = needsQuoting->containsSome(expansionString);
|
||||
result.append(" / ");
|
||||
if(quote) {
|
||||
result.append("'");
|
||||
}
|
||||
result.append(expansionString);
|
||||
if(quote) {
|
||||
result.append("'");
|
||||
}
|
||||
}
|
||||
result.append("\" //");
|
||||
|
||||
result.append(stringToName(NFC, NFCLen));
|
||||
if(expLen && !isReset) {
|
||||
result.append(" / ");
|
||||
result.append(stringToName(expansionString, expLen));
|
||||
}
|
||||
result.append("\n");
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString
|
||||
Line::toHTMLString()
|
||||
{
|
||||
UnicodeString result;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UChar NFC[50];
|
||||
int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status);
|
||||
result.append("<span title=\"");
|
||||
result.append(stringToName(NFC, NFCLen));
|
||||
if(expLen && !isReset) {
|
||||
result.append(" / ");
|
||||
result.append(stringToName(expansionString, expLen));
|
||||
}
|
||||
result.append("\">");
|
||||
if(isReset) {
|
||||
result.append("&");
|
||||
} else {
|
||||
result.append(strengthToString(strength, FALSE, TRUE));
|
||||
}
|
||||
result.append(NFC, NFCLen);
|
||||
if(expLen && !isReset) {
|
||||
result.append(" / ");
|
||||
result.append(expansionString);
|
||||
}
|
||||
result.append("</span><br>\n");
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString
|
||||
Line::toString(UBool pretty) {
|
||||
UnicodeString result;
|
||||
if(!pretty) {
|
||||
result.setTo(name);
|
||||
if(expLen) {
|
||||
result.append("/");
|
||||
result.append(expansionString);
|
||||
}
|
||||
} else {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UChar NFC[50];
|
||||
int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status);
|
||||
result.setTo(NFC, NFCLen);
|
||||
if(expLen) {
|
||||
result.append("/");
|
||||
result.append(expansionString);
|
||||
}
|
||||
/*
|
||||
if(NFCLen != len || u_strncmp(name, NFC, len) != 0) {
|
||||
result.append("(NFC: ");
|
||||
result.append(NFC, NFCLen);
|
||||
result.append(stringToName(NFC, NFCLen));
|
||||
result.append(")");
|
||||
}
|
||||
*/
|
||||
result.append(" # ");
|
||||
result.append(stringToName(NFC, NFCLen));
|
||||
if(expLen) {
|
||||
result.append("/ ");
|
||||
result.append(stringToName(expansionString, expLen));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Line::setTo(const UnicodeString &string) {
|
||||
int32_t len = string.length();
|
||||
u_strncpy(name, string.getBuffer(), len);
|
||||
name[len] = 0;
|
||||
this->len = len;
|
||||
UChar32 c;
|
||||
U16_GET(name, 0, 0, len, c);
|
||||
firstCC = u_getCombiningClass(c);
|
||||
U16_GET(name, 0, len-1, len, c);
|
||||
lastCC = u_getCombiningClass(c);
|
||||
}
|
||||
|
||||
void
|
||||
Line::setTo(const UChar32 n) {
|
||||
UBool isError = FALSE;
|
||||
len = 0; // we are setting the line to char, not appending
|
||||
U16_APPEND(name, len, 25, n, isError);
|
||||
name[len] = 0;
|
||||
firstCC = u_getCombiningClass(n);
|
||||
lastCC = firstCC;
|
||||
}
|
||||
|
||||
|
||||
UnicodeString
|
||||
Line::strengthIndent(UColAttributeValue strength, int indentSize, UnicodeString &result)
|
||||
{
|
||||
int i;
|
||||
int numIndents = strength+1;
|
||||
if(strength > UCOL_IDENTICAL) {
|
||||
return result;
|
||||
} else if(strength == UCOL_IDENTICAL) {
|
||||
numIndents = 5;
|
||||
}
|
||||
for(i = 0; i < numIndents*indentSize; i++) {
|
||||
result.append(" ");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
UnicodeString
|
||||
Line::strengthToString(UColAttributeValue strength, UBool pretty, UBool html) {
|
||||
UnicodeString result;
|
||||
if(html) {
|
||||
switch(strength) {
|
||||
case UCOL_IDENTICAL:
|
||||
result.append(" = ");
|
||||
break;
|
||||
case UCOL_QUATERNARY:
|
||||
result.append(" <<<< ");
|
||||
break;
|
||||
case UCOL_TERTIARY:
|
||||
result.append(" <<< ");
|
||||
break;
|
||||
case UCOL_SECONDARY:
|
||||
result.append(" << ");
|
||||
break;
|
||||
case UCOL_PRIMARY:
|
||||
result.append(" < ");
|
||||
break;
|
||||
case UCOL_OFF:
|
||||
result.append(" >? ");
|
||||
break;
|
||||
default:
|
||||
result.append(" ?! ");
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch(strength) {
|
||||
case UCOL_IDENTICAL:
|
||||
if(pretty) {
|
||||
result.append(" ");
|
||||
}
|
||||
result.append(" = ");
|
||||
break;
|
||||
case UCOL_QUATERNARY:
|
||||
if(pretty) {
|
||||
result.append(" ");
|
||||
}
|
||||
result.append(" <<<< ");
|
||||
break;
|
||||
case UCOL_TERTIARY:
|
||||
//u_fprintf(file, "<3");
|
||||
if(pretty) {
|
||||
result.append(" ");
|
||||
}
|
||||
result.append(" <<< ");
|
||||
break;
|
||||
case UCOL_SECONDARY:
|
||||
//u_fprintf(file, "<2");
|
||||
if(pretty) {
|
||||
result.append(" ");
|
||||
}
|
||||
result.append(" << ");
|
||||
break;
|
||||
case UCOL_PRIMARY:
|
||||
//u_fprintf(file, "<1");
|
||||
if(pretty) {
|
||||
result.append(" ");
|
||||
}
|
||||
result.append(" < ");
|
||||
break;
|
||||
case UCOL_OFF:
|
||||
result.append(" >? ");
|
||||
break;
|
||||
default:
|
||||
result.append(" ?! ");
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Line *
|
||||
Line::nextInteresting() {
|
||||
Line *result = this->next;
|
||||
while(result && result->strength != UCOL_IDENTICAL) {
|
||||
result = result->next;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
Line::append(const UChar* n, int32_t length)
|
||||
{
|
||||
u_strncat(name, n, length);
|
||||
name[len+length] = 0;
|
||||
len += length;
|
||||
UChar32 end;
|
||||
U16_GET(n, 0, length-1, length, end);
|
||||
lastCC = u_getCombiningClass(end);
|
||||
}
|
||||
|
||||
void
|
||||
Line::append(const UChar n)
|
||||
{
|
||||
name[len] = n;
|
||||
name[len+1] = 0;
|
||||
len++;
|
||||
lastCC = u_getCombiningClass(n);
|
||||
}
|
||||
|
||||
void
|
||||
Line::append(const Line &l)
|
||||
{
|
||||
append(l.name, l.len);
|
||||
lastCC = l.lastCC;
|
||||
}
|
||||
|
||||
void
|
||||
Line::clear()
|
||||
{
|
||||
name[0] = 0;
|
||||
len = 0;
|
||||
}
|
||||
|
||||
int32_t
|
||||
Line::write(char *buff, int32_t, UErrorCode &)
|
||||
{
|
||||
/*
|
||||
UChar name[25];
|
||||
int32_t len;
|
||||
UChar expansionString[25];
|
||||
int32_t expLen;
|
||||
|
||||
UColAttributeValue strength;
|
||||
UColAttributeValue strengthFromEmpty;
|
||||
UColAttributeValue cumulativeStrength;
|
||||
UColAttributeValue expStrength;
|
||||
|
||||
Line *previous;
|
||||
Line *next;
|
||||
|
||||
UBool isContraction;
|
||||
UBool isExpansion;
|
||||
UBool isRemoved;
|
||||
UBool isReset;
|
||||
|
||||
int32_t expIndex;
|
||||
uint8_t firstCC;
|
||||
uint8_t lastCC;
|
||||
*/
|
||||
int32_t resLen = 0;
|
||||
int32_t i = 0;
|
||||
sprintf(buff+resLen, "%04X", name[0]);
|
||||
resLen += 4;
|
||||
for(i = 1; i < len; i++) {
|
||||
sprintf(buff+resLen, " %04X", name[i]);
|
||||
resLen += 5;
|
||||
}
|
||||
sprintf(buff+resLen, "/");
|
||||
resLen += 1;
|
||||
|
||||
i = 0;
|
||||
if(expLen) {
|
||||
sprintf(buff+resLen, "%04X", expansionString[0]);
|
||||
resLen += 4;
|
||||
for(i = 1; i < expLen; i++) {
|
||||
sprintf(buff+resLen, " %04X", expansionString[i]);
|
||||
resLen += 5;
|
||||
}
|
||||
}
|
||||
sprintf(buff+resLen, "; ");
|
||||
resLen += 2;
|
||||
|
||||
sprintf(buff+resLen, "%02i ", strength);
|
||||
resLen += 3;
|
||||
sprintf(buff+resLen, "%02i", strengthFromEmpty);
|
||||
resLen += 2;
|
||||
sprintf(buff+resLen, "%02i", cumulativeStrength);
|
||||
resLen += 2;
|
||||
sprintf(buff+resLen, "%02i", expStrength);
|
||||
resLen += 2;
|
||||
|
||||
// Various flags. The only interesting ones are isReset and isRemoved. We will not output removed lines
|
||||
//sprintf(buff+resLen, "%1i%1i%1i%1i ", isContraction, isExpansion, isRemoved, isReset);
|
||||
//resLen += 5;
|
||||
sprintf(buff+resLen, "%1i%1i ", isRemoved, isReset);
|
||||
resLen += 3;
|
||||
|
||||
// first and last CC
|
||||
// can be calculated on reading
|
||||
//sprintf(buff+resLen, "%03i %03i ", firstCC, lastCC);
|
||||
//resLen += 8;
|
||||
|
||||
sprintf(buff+resLen, "%08X", expIndex);
|
||||
resLen += 8;
|
||||
|
||||
buff[resLen] = 0;
|
||||
|
||||
return resLen;
|
||||
}
|
||||
|
||||
void
|
||||
Line::initFromString(const char *buff, int32_t, UErrorCode &)
|
||||
{
|
||||
int32_t bufIndex = 0;
|
||||
int32_t i = 0;
|
||||
|
||||
sscanf(buff+bufIndex, "%04X", &name[i]);
|
||||
i++;
|
||||
bufIndex += 4;
|
||||
while(buff[bufIndex] != '/') {
|
||||
sscanf(buff+bufIndex, " %04X", &name[i]);
|
||||
i++;
|
||||
bufIndex += 5;
|
||||
}
|
||||
len = i;
|
||||
name[len] = 0;
|
||||
bufIndex++;
|
||||
|
||||
if(i > 1) {
|
||||
isContraction = TRUE;
|
||||
} else {
|
||||
isContraction = FALSE;
|
||||
}
|
||||
|
||||
if(buff[bufIndex] == ';') {
|
||||
isExpansion = FALSE;
|
||||
bufIndex += 2;
|
||||
expansionString[0] = 0;
|
||||
expLen = 0;
|
||||
} else {
|
||||
i = 0;
|
||||
sscanf(buff+bufIndex, "%04X", &expansionString[i]);
|
||||
i++;
|
||||
bufIndex += 4;
|
||||
while(buff[bufIndex] != ';') {
|
||||
sscanf(buff+bufIndex, " %04X", &expansionString[i]);
|
||||
i++;
|
||||
bufIndex += 5;
|
||||
}
|
||||
expLen = i;
|
||||
expansionString[expLen] = 0;
|
||||
bufIndex += 2;
|
||||
}
|
||||
sscanf(buff+bufIndex, "%02i ", &strength);
|
||||
bufIndex += 3;
|
||||
sscanf(buff+bufIndex, "%02i", &strengthFromEmpty);
|
||||
bufIndex += 2;
|
||||
sscanf(buff+bufIndex, "%02i", &cumulativeStrength);
|
||||
bufIndex += 2;
|
||||
sscanf(buff+bufIndex, "%02i", &expStrength);
|
||||
bufIndex += 2;
|
||||
|
||||
sscanf(buff+bufIndex, "%1i%1i ", &isRemoved, &isReset);
|
||||
bufIndex += 3;
|
||||
|
||||
sscanf(buff+bufIndex, "%08X", &expIndex);
|
||||
bufIndex += 8;
|
||||
|
||||
// calculate first and last CC
|
||||
UChar32 c;
|
||||
U16_GET(name, 0, 0, len, c);
|
||||
firstCC = u_getCombiningClass(c);
|
||||
U16_GET(name, 0, len-1, len, c);
|
||||
lastCC = u_getCombiningClass(c);
|
||||
}
|
||||
|
||||
void
|
||||
Line::swapCase(UChar *string, int32_t &sLen)
|
||||
{
|
||||
UChar32 c = 0;
|
||||
int32_t i = 0, j = 0;
|
||||
UChar buff[256];
|
||||
UBool isError = FALSE;
|
||||
while(i < sLen) {
|
||||
U16_NEXT(string, i, sLen, c);
|
||||
if(u_isUUppercase(c)) {
|
||||
c = u_tolower(c);
|
||||
} else if(u_isULowercase(c)) {
|
||||
c = u_toupper(c);
|
||||
}
|
||||
U16_APPEND(buff, j, 256, c, isError);
|
||||
}
|
||||
buff[j] = 0;
|
||||
u_strcpy(string, buff);
|
||||
sLen = j;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Line::swapCase()
|
||||
{
|
||||
swapCase(name, len);
|
||||
swapCase(expansionString, expLen);
|
||||
}
|
||||
|
||||
UnicodeString
|
||||
Line::dumpSortkey()
|
||||
{
|
||||
|
||||
char buffer[256];
|
||||
char *buff = buffer;
|
||||
*buff = 0;
|
||||
uint8_t *key = sortKey;
|
||||
if(sortKey) {
|
||||
while(*key) {
|
||||
sprintf(buff, "%02X ", *key);
|
||||
key++;
|
||||
buff += 3;
|
||||
if(buff - buffer > 252) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return UnicodeString(buffer);
|
||||
}
|
||||
|
113
tools/colprobe/line.h
Executable file
113
tools/colprobe/line.h
Executable file
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* File line.h
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 03/18/2003 weiv Creation.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
//
|
||||
// class Line
|
||||
//
|
||||
// Each line from the source file (containing a name, presumably) gets
|
||||
// one of these structs.
|
||||
//
|
||||
|
||||
#ifndef COLPROBE_LINE_H
|
||||
#define COLPROBE_LINE_H
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ucol.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "colprobe.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
static const int MAX_EXPANSION_PREFIXES = 10;
|
||||
|
||||
class Line {
|
||||
public:
|
||||
static void copyArray(Line *dest, const Line *src, int32_t size);
|
||||
Line();
|
||||
Line(const Line &other);
|
||||
Line(const UChar* name, int32_t len);
|
||||
Line(const UnicodeString &string);
|
||||
Line(const UChar name);
|
||||
Line(const char *buff, int32_t buffLen, UErrorCode &status);
|
||||
~Line();
|
||||
Line & operator=(const Line &other);
|
||||
UBool operator==(const Line &other) const;
|
||||
UBool operator!=(const Line &other) const;
|
||||
void setToConcat(const Line *first, const Line *second);
|
||||
void setName(const UChar* name, int32_t len);
|
||||
UnicodeString toString(UBool pretty = FALSE);
|
||||
UnicodeString toBundleString();
|
||||
UnicodeString toHTMLString();
|
||||
int32_t write(char *buff, int32_t buffLen, UErrorCode &status);
|
||||
void initFromString(const char *buff, int32_t buffLen, UErrorCode &status);
|
||||
|
||||
|
||||
UnicodeString strengthIndent(UColAttributeValue strength, int indentSize, UnicodeString &result);
|
||||
UnicodeString strengthToString(UColAttributeValue strength, UBool pretty, UBool html = FALSE);
|
||||
UnicodeString stringToName(UChar *string, int32_t len);
|
||||
void setTo(const UnicodeString &string);
|
||||
void setTo(const UChar32 n);
|
||||
UBool equals(const Line &other) const;
|
||||
Line *nextInteresting();
|
||||
void append(const UChar n);
|
||||
void append(const UChar* n, int32_t length);
|
||||
void append(const Line &l);
|
||||
void clear();
|
||||
void swapCase();
|
||||
void swapCase(UChar *string, int32_t &sLen);
|
||||
UnicodeString dumpSortkey();
|
||||
void init();
|
||||
|
||||
|
||||
public:
|
||||
UChar name[25];
|
||||
int32_t len;
|
||||
UChar expansionString[25];
|
||||
int32_t expLen;
|
||||
|
||||
UColAttributeValue strength;
|
||||
UColAttributeValue strengthFromEmpty;
|
||||
UColAttributeValue cumulativeStrength;
|
||||
UColAttributeValue expStrength;
|
||||
|
||||
Line *previous;
|
||||
Line *next;
|
||||
|
||||
// In case this element is a contraction
|
||||
// we keep a pointer at which lines were components
|
||||
Line *left;
|
||||
Line *right;
|
||||
|
||||
UBool isContraction;
|
||||
UBool isExpansion;
|
||||
UBool isRemoved;
|
||||
UBool isReset;
|
||||
|
||||
int32_t expIndex;
|
||||
uint8_t firstCC;
|
||||
uint8_t lastCC;
|
||||
|
||||
uint8_t *sortKey;
|
||||
public:
|
||||
static UnicodeSet *needsQuoting;
|
||||
};
|
||||
|
||||
|
||||
#endif //COLPROBE_LINE_H
|
241
tools/colprobe/locale.txt
Executable file
241
tools/colprobe/locale.txt
Executable file
|
@ -0,0 +1,241 @@
|
|||
af
|
||||
af_ZA
|
||||
am
|
||||
am_ET
|
||||
ar
|
||||
ar_AE
|
||||
ar_BH
|
||||
ar_DZ
|
||||
ar_EG
|
||||
ar_IN
|
||||
ar_IQ
|
||||
ar_JO
|
||||
ar_KW
|
||||
ar_LB
|
||||
ar_LY
|
||||
ar_MA
|
||||
ar_OM
|
||||
ar_QA
|
||||
ar_SA
|
||||
ar_SD
|
||||
ar_SY
|
||||
ar_TN
|
||||
ar_YE
|
||||
be
|
||||
be_BY
|
||||
bg
|
||||
bg_BG
|
||||
bn
|
||||
bn_IN
|
||||
ca
|
||||
ca_ES
|
||||
#ca_ES_PREEURO
|
||||
cs
|
||||
cs_CZ
|
||||
da
|
||||
da_DK
|
||||
de
|
||||
de_AT
|
||||
#de_AT_PREEURO
|
||||
de_BE
|
||||
de_CH
|
||||
de_DE
|
||||
#de_DE_PREEURO
|
||||
de_LU
|
||||
#de_LU_PREEURO
|
||||
de__PHONEBOOK
|
||||
el
|
||||
el_GR
|
||||
#el_GR_PREEURO
|
||||
en
|
||||
en_AU
|
||||
en_BE
|
||||
#en_BE_PREEURO
|
||||
en_BW
|
||||
en_CA
|
||||
en_GB
|
||||
#en_GB_EURO
|
||||
en_HK
|
||||
en_IE
|
||||
#en_IE_PREEURO
|
||||
en_IN
|
||||
en_MT
|
||||
en_NZ
|
||||
en_PH
|
||||
en_SG
|
||||
en_US
|
||||
en_US_POSIX
|
||||
en_VI
|
||||
en_ZA
|
||||
en_ZW
|
||||
eo
|
||||
es
|
||||
es_AR
|
||||
es_BO
|
||||
es_CL
|
||||
es_CO
|
||||
es_CR
|
||||
es_DO
|
||||
es_EC
|
||||
es_ES
|
||||
#es_ES_PREEURO
|
||||
es_GT
|
||||
es_HN
|
||||
es_MX
|
||||
es_NI
|
||||
es_PA
|
||||
es_PE
|
||||
es_PR
|
||||
es_PY
|
||||
es_SV
|
||||
es_US
|
||||
es_UY
|
||||
es_VE
|
||||
es__TRADITIONAL
|
||||
et
|
||||
et_EE
|
||||
eu
|
||||
eu_ES
|
||||
#eu_ES_PREEURO
|
||||
fa
|
||||
fa_AF
|
||||
fa_IR
|
||||
fi
|
||||
fi_FI
|
||||
#fi_FI_PREEURO
|
||||
fo
|
||||
fo_FO
|
||||
fr
|
||||
fr_BE
|
||||
#fr_BE_PREEURO
|
||||
fr_CA
|
||||
fr_CH
|
||||
fr_FR
|
||||
#fr_FR_PREEURO
|
||||
fr_LU
|
||||
#fr_LU_PREEURO
|
||||
ga
|
||||
ga_IE
|
||||
#ga_IE_PREEURO
|
||||
gl
|
||||
gl_ES
|
||||
#gl_ES_PREEURO
|
||||
gu
|
||||
gu_IN
|
||||
gv
|
||||
gv_GB
|
||||
he
|
||||
he_IL
|
||||
hi
|
||||
hi_IN
|
||||
hi__DIRECT
|
||||
hr
|
||||
hr_HR
|
||||
hu
|
||||
hu_HU
|
||||
hy
|
||||
hy_AM
|
||||
hy_AM_REVISED
|
||||
id
|
||||
id_ID
|
||||
is
|
||||
is_IS
|
||||
it
|
||||
it_CH
|
||||
it_IT
|
||||
#it_IT_PREEURO
|
||||
ja
|
||||
ja_JP
|
||||
#ja_JP_TRADITIONAL
|
||||
kk_KZ
|
||||
kl
|
||||
kl_GL
|
||||
kn
|
||||
kn_IN
|
||||
ko
|
||||
ko_KR
|
||||
kok
|
||||
kok_IN
|
||||
kw
|
||||
kw_GB
|
||||
lt
|
||||
lt_LT
|
||||
lv
|
||||
lv_LV
|
||||
mk
|
||||
mk_MK
|
||||
mr
|
||||
mr_IN
|
||||
ms_MY
|
||||
mt
|
||||
mt_MT
|
||||
nb
|
||||
nb_NO
|
||||
nl
|
||||
nl_BE
|
||||
#nl_BE_PREEURO
|
||||
nl_NL
|
||||
#nl_NL_PREEURO
|
||||
nn
|
||||
nn_NO
|
||||
om
|
||||
om_ET
|
||||
om_KE
|
||||
pl
|
||||
pl_PL
|
||||
ps
|
||||
ps_AF
|
||||
pt
|
||||
pt_BR
|
||||
pt_PT
|
||||
#pt_PT_PREEURO
|
||||
ro
|
||||
ro_RO
|
||||
ru
|
||||
ru_RU
|
||||
ru_UA
|
||||
sh
|
||||
sh_YU
|
||||
sk
|
||||
sk_SK
|
||||
sl
|
||||
sl_SI
|
||||
so
|
||||
so_DJ
|
||||
so_ET
|
||||
so_KE
|
||||
so_SO
|
||||
sq
|
||||
sq_AL
|
||||
sr
|
||||
sr_YU
|
||||
sv
|
||||
sv_FI
|
||||
sv_SE
|
||||
sw
|
||||
sw_KE
|
||||
sw_TZ
|
||||
ta
|
||||
ta_IN
|
||||
te
|
||||
te_IN
|
||||
th
|
||||
th_TH
|
||||
#th_TH_TRADITIONAL
|
||||
ti
|
||||
ti_ER
|
||||
ti_ET
|
||||
tr
|
||||
tr_TR
|
||||
uk
|
||||
uk_UA
|
||||
vi
|
||||
vi_VN
|
||||
zh
|
||||
zh_CN
|
||||
zh_HK
|
||||
zh_MO
|
||||
zh_SG
|
||||
zh_TW
|
||||
zh_TW_STROKE
|
||||
zh__PINYIN
|
48
tools/colprobe/longname.cpp
Executable file
48
tools/colprobe/longname.cpp
Executable file
|
@ -0,0 +1,48 @@
|
|||
#include "unicode/unistr.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main(int argc,
|
||||
char* argv[])
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
const char *loc = argv[1];
|
||||
int32_t hasCountry;
|
||||
UConverter *conv = ucnv_open("utf8", &status);
|
||||
|
||||
|
||||
UChar UBuffer[256];
|
||||
int32_t uBufLen = 0;
|
||||
char buffer[256];
|
||||
int32_t bufLen = 0;
|
||||
|
||||
uBufLen = uloc_getDisplayLanguage(loc, "en", UBuffer, 256, &status);
|
||||
bufLen = ucnv_fromUChars(conv, buffer, 256, UBuffer, uBufLen, &status);
|
||||
//u_UCharsToChars(UBuffer, buffer, uBufLen);
|
||||
buffer[bufLen] = 0;
|
||||
printf("%s", buffer);
|
||||
|
||||
if(hasCountry = uloc_getCountry(loc, buffer, 256, &status)) {
|
||||
uBufLen = uloc_getDisplayCountry(loc, "en", UBuffer, 256, &status);
|
||||
bufLen = ucnv_fromUChars(conv, buffer, 256, UBuffer, uBufLen, &status);
|
||||
//u_UCharsToChars(UBuffer, buffer, uBufLen);
|
||||
buffer[bufLen] = 0;
|
||||
printf("_%s", buffer);
|
||||
}
|
||||
|
||||
if(uloc_getVariant(loc, buffer, 256, &status)) {
|
||||
uBufLen = uloc_getDisplayVariant(loc, "en", UBuffer, 256, &status);
|
||||
bufLen = ucnv_fromUChars(conv, buffer, 256, UBuffer, uBufLen, &status);
|
||||
//u_UCharsToChars(UBuffer, buffer, uBufLen);
|
||||
buffer[bufLen] = 0;
|
||||
if(!hasCountry) {
|
||||
printf("_");
|
||||
}
|
||||
printf("_%s", buffer);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
4
tools/colprobe/readme.txt
Executable file
4
tools/colprobe/readme.txt
Executable file
|
@ -0,0 +1,4 @@
|
|||
There are several tools in this directory that should make it easier to generate collation data:
|
||||
extractCollationData.pl - perl script that reads ICU resource bundle files and outputs a locale_collation.html file if collation elements are present in the locale. Arguments are the list of locale source files (*.txt) that need to be processed.
|
||||
createComparisonTables.pl - takes a locale name. Looks in directories that should contain the html data produced by colprobe or extractCollationData.
|
||||
tableStarter.pl - invokes createComparisonTables.pl with a list of locales.
|
2067
tools/colprobe/sortedlines.cpp
Executable file
2067
tools/colprobe/sortedlines.cpp
Executable file
File diff suppressed because it is too large
Load diff
120
tools/colprobe/sortedlines.h
Executable file
120
tools/colprobe/sortedlines.h
Executable file
|
@ -0,0 +1,120 @@
|
|||
#ifndef COLPROBE_SORTEDLINES_H
|
||||
#define COLPROBE_SORTEDLINES_H
|
||||
|
||||
// colprobe includes
|
||||
#include "colprobe.h"
|
||||
#include "line.h"
|
||||
#include "uprinter.h"
|
||||
#include "strengthprobe.h"
|
||||
|
||||
|
||||
// ICU includes
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/usetiter.h"
|
||||
#include "unicode/uscript.h"
|
||||
#include "hash.h"
|
||||
|
||||
class SortedLines {
|
||||
Line empty;
|
||||
Line *UB[UCOL_OFF];
|
||||
UnicodeSet ignorables[UCOL_OFF];
|
||||
|
||||
Line **toSort;
|
||||
int32_t toSortCapacity;
|
||||
Line *lines;
|
||||
int32_t size;
|
||||
int32_t capacity;
|
||||
|
||||
UnicodeSet repertoire;
|
||||
UnicodeSet excludeBounds;
|
||||
|
||||
StrengthProbe probe;
|
||||
|
||||
Line *first;
|
||||
Line *last;
|
||||
Line *current;
|
||||
SortedLines() {};
|
||||
|
||||
UPrinter *logger;
|
||||
UPrinter *debug;
|
||||
|
||||
Hashtable *contractionsTable;
|
||||
Hashtable *duplicators; // elements that duplicate preceding characters
|
||||
int32_t maxExpansionPrefixSize;
|
||||
|
||||
// Properties of the sort
|
||||
UBool wordSort;
|
||||
UBool frenchSecondary;
|
||||
UBool upperFirst;
|
||||
|
||||
uint8_t *sortkeys;
|
||||
int32_t sortkeyOffset;
|
||||
public:
|
||||
SortedLines(const UnicodeSet &set, const UnicodeSet &excludeBounds, const StrengthProbe &probe, UPrinter *logger, UPrinter *debug);
|
||||
SortedLines(FILE *file, UPrinter *logger, UPrinter *debug, UErrorCode &status);
|
||||
~SortedLines();
|
||||
void analyse(UErrorCode &status);
|
||||
|
||||
void sort(UBool setStrengths = TRUE, UBool link = FALSE);
|
||||
void sort(Line **sortingArray, int32_t sizeToSort, UBool setStrengths = TRUE, UBool link = FALSE);
|
||||
|
||||
Line *getFirst();
|
||||
Line *getLast();
|
||||
void add(Line *line, UBool linkIn = FALSE);
|
||||
void insert(Line *line, int32_t index);
|
||||
Line *getNext();
|
||||
Line *getPrevious();
|
||||
Line *operator[](int32_t index);
|
||||
int32_t addContractionsToRepertoire(UErrorCode &status);
|
||||
|
||||
int32_t getSize() const;
|
||||
|
||||
int32_t detectExpansions();
|
||||
|
||||
UnicodeString toString(UBool useLinks = FALSE);
|
||||
UnicodeString toStringFromEmpty();
|
||||
UnicodeString toPrettyString(UBool useLinks, UBool printSortKeys = FALSE);
|
||||
UnicodeString toOutput(const char *format,
|
||||
const char *locale, const char *platform, const char *reference,
|
||||
UBool useLinks, UBool initialize, UBool moreToCome);
|
||||
UnicodeString toBundle(const char *locale, const char *platform, const char *reference,
|
||||
UBool useLinks, UBool initialize, UBool moreToCome);
|
||||
UnicodeString toHTML(const char *locale, const char *platform, const char *reference,
|
||||
UBool useLinks, UBool initialize, UBool moreToCome);
|
||||
UnicodeString toXML(const char *locale, const char *platform, const char *reference,
|
||||
UBool useLinks, UBool initialize, UBool moreToCome);
|
||||
UnicodeString arrayToString(Line** sortedLines, int32_t linesSize, UBool pretty, UBool useLinks, UBool printSortKeys);
|
||||
void setSortingArray(Line **sortingArray, Line *elements, int32_t sizeToSort);
|
||||
int32_t setSortingArray(Line **sortingArray, Hashtable *table);
|
||||
|
||||
void reduceDifference(SortedLines& reference);
|
||||
void getRepertoire(UnicodeSet &fillIn);
|
||||
void removeDecompositionsFromRepertoire();
|
||||
void getBounds(UErrorCode &status);
|
||||
void classifyRepertoire();
|
||||
void toFile(FILE *file, UBool useLinks, UErrorCode &status);
|
||||
void swapCase();
|
||||
void calculateSortKeys();
|
||||
void calculateSortKey(Line &line);
|
||||
private:
|
||||
void init();
|
||||
void init(UnicodeSet &rep, Line *lin);
|
||||
int32_t detectContractions(Line **firstRep, int32_t firstSize,
|
||||
Line **secondRep, int32_t secondSize,
|
||||
Line *toAddTo, int32_t &toAddToSize,
|
||||
Line *lesserToAddTo, int32_t &lesserToAddToSize,
|
||||
int32_t capacity, UErrorCode &status);
|
||||
|
||||
void calculateCumulativeStrengths(Line *start, Line *end);
|
||||
void transferCumulativeStrength(Line *previous, Line *that);
|
||||
void updateBounds(UnicodeSet &set);
|
||||
void addAll(Line* toAdd, int32_t toAddSize);
|
||||
void setDistancesFromEmpty(Line* array, int32_t arraySize);
|
||||
void noteContraction(const char* msg, Line *toAddTo, int32_t &toAddToSize, Line *left, Line *right, int32_t &noConts, UErrorCode &status);
|
||||
int32_t gooseUp(int32_t resetIndex, int32_t expansionIndex, Line &expLine, int32_t *expIndexes, int32_t &expIndexSize, UColAttributeValue strength);
|
||||
UBool getExpansionLine(const Line &expansion, const Line &previous, const Line &exp, Line &expansionLine);
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif // #ifndef COLPROBE_SORTEDLINES_H
|
402
tools/colprobe/strengthprobe.cpp
Executable file
402
tools/colprobe/strengthprobe.cpp
Executable file
|
@ -0,0 +1,402 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* File line.h
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 07/07/2003 weiv Creation.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
//
|
||||
// class Line
|
||||
//
|
||||
// Each line from the source file (containing a name, presumably) gets
|
||||
// one of these structs.
|
||||
//
|
||||
|
||||
#include "strengthprobe.h"
|
||||
|
||||
StrengthProbe::StrengthProbe(CompareFn comparer, GetSortKeyFn getter, UChar SE,
|
||||
UChar B0, UChar B1, UChar B2, UChar B3) :
|
||||
SE(SE),
|
||||
B0(B0), B1(B1), B2(B2), B3(B3),
|
||||
utilFirstP(&utilFirst), utilSecondP(&utilSecond),
|
||||
frenchSecondary(FALSE),
|
||||
comparer(comparer), skgetter(getter)
|
||||
{
|
||||
}
|
||||
|
||||
int
|
||||
StrengthProbe::setProbeChars(UChar B0, UChar B1, UChar B2, UChar B3)
|
||||
{
|
||||
this->B0 = B0;
|
||||
this->B1 = B1;
|
||||
this->B2 = B2;
|
||||
this->
|
||||
B3 = B3;
|
||||
return checkSanity();
|
||||
}
|
||||
|
||||
int
|
||||
StrengthProbe::checkSanity()
|
||||
{
|
||||
int sanityRes;
|
||||
utilFirst.setTo(B0);
|
||||
utilSecond.setTo(B3);
|
||||
if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
|
||||
return sanityRes*10 + 3;
|
||||
}
|
||||
utilSecond.setTo(B2);
|
||||
if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
|
||||
return sanityRes*10 + 2;
|
||||
}
|
||||
utilSecond.setTo(B1);
|
||||
if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
|
||||
return sanityRes*10 + 1;
|
||||
}
|
||||
utilFirst.setTo(B3);
|
||||
utilSecond.setTo(B2);
|
||||
if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
|
||||
return sanityRes*10 + 5;
|
||||
}
|
||||
utilSecond.setTo(B1);
|
||||
if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
|
||||
return sanityRes*10 + 4;
|
||||
}
|
||||
utilFirst.setTo(B2);
|
||||
if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
|
||||
return sanityRes*10 + 6;
|
||||
}
|
||||
utilFirst.setTo(B0);
|
||||
if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) {
|
||||
return 1000;
|
||||
}
|
||||
utilFirst.setTo(B1);
|
||||
if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) {
|
||||
return 1001;
|
||||
}
|
||||
utilFirst.setTo(B2);
|
||||
if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) {
|
||||
return 1002;
|
||||
}
|
||||
utilFirst.setTo(B3);
|
||||
if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) {
|
||||
return 1003;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
UBool
|
||||
StrengthProbe::probePrefix(const Line &x, const Line &y, UChar first, UChar second) {
|
||||
utilFirst.name[0] = first;
|
||||
utilFirst.name[1] = SE;
|
||||
u_strcpy(utilFirst.name+2, x.name);
|
||||
utilFirst.name[x.len+2] = 0;
|
||||
utilFirst.len = x.len+2;
|
||||
|
||||
utilSecond.name[0] = second;
|
||||
utilSecond.name[1] = SE;
|
||||
u_strcpy(utilSecond.name+2, y.name);
|
||||
utilSecond.name[y.len+2] = 0;
|
||||
utilSecond.len = y.len+2;
|
||||
|
||||
if(comparer(&utilFirstP, &utilSecondP) < 0) {
|
||||
return TRUE;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
StrengthProbe::probeSuffix(const Line &x, const Line &y, UChar first, UChar second) {
|
||||
u_strcpy(utilFirst.name, x.name);
|
||||
utilFirst.name[x.len] = SE;
|
||||
utilFirst.name[x.len+1] = first;
|
||||
utilFirst.name[x.len+2] = 0;
|
||||
utilFirst.len = x.len + 2;
|
||||
u_strcpy(utilSecond.name, y.name);
|
||||
utilSecond.name[y.len] = SE;
|
||||
utilSecond.name[y.len+1] = second;
|
||||
utilSecond.name[y.len+2] = 0;
|
||||
utilSecond.len = y.len + 2;
|
||||
|
||||
if(comparer(&utilFirstP, &utilSecondP) < 0) {
|
||||
return TRUE;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
StrengthProbe::probePrefixNoSep(const Line &x, const Line &y, UChar first, UChar second) {
|
||||
utilFirst.name[0] = first;
|
||||
u_strcpy(utilFirst.name+1, x.name);
|
||||
utilFirst.name[x.len+1] = 0;
|
||||
utilFirst.len = x.len + 1;
|
||||
|
||||
utilSecond.name[0] = second;
|
||||
u_strcpy(utilSecond.name+1, y.name);
|
||||
utilSecond.name[y.len+1] = 0;
|
||||
utilSecond.len = y.len + 1;
|
||||
|
||||
if(comparer(&utilFirstP, &utilSecondP) < 0) {
|
||||
return TRUE;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
StrengthProbe::probeSuffixNoSep(const Line &x, const Line &y, UChar first, UChar second) {
|
||||
u_strcpy(utilFirst.name, x.name);
|
||||
utilFirst.name[x.len] = first;
|
||||
utilFirst.name[x.len+1] = 0;
|
||||
utilFirst.len = x.len + 1;
|
||||
u_strcpy(utilSecond.name, y.name);
|
||||
utilSecond.name[y.len] = second;
|
||||
utilSecond.name[y.len+1] = 0;
|
||||
utilSecond.len = y.len + 1;
|
||||
|
||||
if(comparer(&utilFirstP, &utilSecondP) < 0) {
|
||||
return TRUE;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
UColAttributeValue
|
||||
StrengthProbe::getStrength(const Line &x, const Line &y) {
|
||||
const Line *xp = &x;
|
||||
const Line *yp = &y;
|
||||
|
||||
Line empty;
|
||||
Line *emptyP = ∅
|
||||
if(comparer(&emptyP, &xp) == 0) {
|
||||
return distanceFromEmptyString(y);
|
||||
}
|
||||
|
||||
int32_t result = comparer(&xp, &yp);
|
||||
|
||||
if(result == 0) {
|
||||
return UCOL_IDENTICAL;
|
||||
} else if(result > 0) {
|
||||
return UCOL_OFF; // bad situation
|
||||
} else { // we need to probe strength
|
||||
if(probeSuffix(x, y, B1, B0)) {
|
||||
//if(probePrefix(x, y, B2, B0)) { // swamps secondary difference
|
||||
return UCOL_PRIMARY;
|
||||
} else if(probePrefix(x, y, B3, B0)) { // swamps tertiary difference
|
||||
return UCOL_SECONDARY;
|
||||
} else if(probeSuffix(x, y, B3, B0)) { // swamped by tertiary difference
|
||||
return UCOL_TERTIARY;
|
||||
} else if(!probePrefix(x, y, B3, B0)) {
|
||||
return UCOL_QUATERNARY;
|
||||
}
|
||||
/*
|
||||
//if(probeSuffix(x, y, B1, B0)) {
|
||||
if(probePrefix(x, y, B2, B0)) { // swamps secondary difference
|
||||
return UCOL_PRIMARY;
|
||||
} else if(probePrefix(x, y, B3, B0)) { // swamps tertiary difference
|
||||
return UCOL_SECONDARY;
|
||||
} else if(probeSuffix(x, y, B3, B0)) { // swamped by tertiary difference
|
||||
return UCOL_TERTIARY;
|
||||
} else if(!probePrefix(x, y, B3, B0)) {
|
||||
return UCOL_QUATERNARY;
|
||||
}
|
||||
*/
|
||||
}
|
||||
return UCOL_OFF; // bad
|
||||
}
|
||||
|
||||
UColAttributeValue
|
||||
StrengthProbe::getStrength(const UnicodeString &sx, const UnicodeString &sy) {
|
||||
Line x(sx);
|
||||
Line y(sy);
|
||||
return getStrength(x, y);
|
||||
}
|
||||
|
||||
int32_t
|
||||
StrengthProbe::compare(const UnicodeString &sx, const UnicodeString &sy) {
|
||||
Line x(sx);
|
||||
Line y(sy);
|
||||
const Line *xp = &x;
|
||||
const Line *yp = &y;
|
||||
return comparer(&xp, &yp);
|
||||
}
|
||||
|
||||
int32_t
|
||||
StrengthProbe::compare(const Line &x, const Line &y) {
|
||||
const Line *xp = &x;
|
||||
const Line *yp = &y;
|
||||
return comparer(&xp, &yp);
|
||||
}
|
||||
|
||||
UColAttributeValue
|
||||
StrengthProbe::distanceFromEmptyString(const Line &x) {
|
||||
if(x.name[0] == 0x30D) {
|
||||
int32_t putBreakPointHere = 0;
|
||||
}
|
||||
Line empty;
|
||||
Line *emptyP = ∅
|
||||
uint8_t buff[256];
|
||||
getSortKey(empty.name, empty.len, buff, 256);
|
||||
Line B0Line(B0);
|
||||
Line *B0LineP = &B0Line;
|
||||
const Line *xp = &x;
|
||||
int32_t result = comparer(&emptyP, &xp);
|
||||
if(result == 0) {
|
||||
return UCOL_IDENTICAL;
|
||||
} else if(result > 0) {
|
||||
return UCOL_OFF;
|
||||
}
|
||||
result = comparer(&B0LineP, &xp);
|
||||
if(result <= 0) {
|
||||
return UCOL_PRIMARY;
|
||||
}
|
||||
Line sexb0(SE);
|
||||
sexb0.append(x.name, x.len);
|
||||
sexb0.append(B0);
|
||||
|
||||
Line seb0(SE);
|
||||
seb0.append(B0);
|
||||
uint8_t seb0K[256];
|
||||
uint8_t sexb0K[256];
|
||||
uint8_t seb2K[256];
|
||||
uint8_t seb3K[256];
|
||||
memset(seb0K, 0, 256);
|
||||
memset(sexb0K, 0, 256);
|
||||
memset(seb2K, 0, 256);
|
||||
memset(seb3K, 0, 256);
|
||||
|
||||
getSortKey(seb0, seb0K, 256);
|
||||
getSortKey(sexb0, sexb0K, 256);
|
||||
|
||||
if(compare(seb0, sexb0) <= 0) {
|
||||
Line seb2(SE);
|
||||
seb2.append(B2);
|
||||
getSortKey(seb2, seb2K, 256);
|
||||
result = compare(seb2, sexb0);
|
||||
if((result <= 0 && !frenchSecondary) || (result >= 0 && frenchSecondary)) { // swamps tertiary difference
|
||||
return UCOL_SECONDARY;
|
||||
}
|
||||
Line seb3(SE);
|
||||
seb3.append(B3);
|
||||
getSortKey(seb3, seb3K, 256);
|
||||
if(compare(seb3, sexb0) < 0) {
|
||||
return UCOL_TERTIARY;
|
||||
}
|
||||
return UCOL_QUATERNARY;
|
||||
} else {
|
||||
// if this was UCA, we would have a primary difference.
|
||||
// however, this might not be so, since not everybody
|
||||
// makes well formed CEs.
|
||||
// in cs_CZ on linux, space is tertiary ignorable, but
|
||||
// its quaternary level strength is lower than quad
|
||||
// strengths for non-ignorables. oh well, more testing
|
||||
// required
|
||||
// I think that we can only have quaternary difference
|
||||
// here (in addition to primary difference).
|
||||
//if(!probePrefix(x, empty, B3, B0)) {
|
||||
//return UCOL_QUATERNARY;
|
||||
//} else {
|
||||
return UCOL_PRIMARY;
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
UColAttributeValue
|
||||
StrengthProbe::distanceFromEmptyString(const UnicodeString &x) {
|
||||
const Line xp(x);
|
||||
return distanceFromEmptyString(xp);
|
||||
}
|
||||
|
||||
|
||||
UColAttributeValue
|
||||
StrengthProbe::getPrefixedStrength(const Line &prefix, const Line &x, const Line &y) {
|
||||
contractionUtilFirst.setToConcat(&prefix, &x);
|
||||
contractionUtilSecond.setToConcat(&prefix, &y);
|
||||
return getStrength(contractionUtilFirst, contractionUtilSecond);
|
||||
}
|
||||
|
||||
|
||||
StrengthProbe::StrengthProbe(const StrengthProbe &that) {
|
||||
*this = that;
|
||||
}
|
||||
|
||||
StrengthProbe &
|
||||
StrengthProbe::operator=(const StrengthProbe &that) {
|
||||
if(this != &that) {
|
||||
B0 = that.B0;
|
||||
B1 = that.B1;
|
||||
B2 = that.B2;
|
||||
B3 = that.B3;
|
||||
SE = that.SE;
|
||||
frenchSecondary = that.frenchSecondary;
|
||||
comparer = that.comparer;
|
||||
skgetter = that.skgetter;
|
||||
|
||||
utilFirstP = &utilFirst;
|
||||
utilSecondP = &utilSecond;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
UBool
|
||||
StrengthProbe::isFrenchSecondary(UErrorCode &status) {
|
||||
utilFirst.setTo(B0);
|
||||
utilFirst.append(SE);
|
||||
utilFirst.append(B2);
|
||||
utilSecond.setTo(B2);
|
||||
utilSecond.append(SE);
|
||||
utilSecond.append(B0);
|
||||
|
||||
int32_t result = compare(utilFirst, utilSecond);
|
||||
|
||||
if(result < 0) {
|
||||
return FALSE;
|
||||
} else if(result > 0) {
|
||||
frenchSecondary = TRUE;
|
||||
return TRUE;
|
||||
} else {
|
||||
status = U_INTERNAL_PROGRAM_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
StrengthProbe::isUpperFirst(UErrorCode &status) {
|
||||
UChar i = 0;
|
||||
int32_t result = 0;
|
||||
int32_t upper = 0, lower = 0, equal = 0;
|
||||
for(i = 0x41; i < 0x5B; i++) {
|
||||
utilFirst.setTo(i);
|
||||
utilSecond.setTo(i+0x20);
|
||||
result = compare(utilFirst, utilSecond);
|
||||
if(result < 0) {
|
||||
upper++;
|
||||
} else if(result > 0) {
|
||||
lower++;
|
||||
} else {
|
||||
equal++;
|
||||
}
|
||||
}
|
||||
|
||||
if(lower == 0 && equal == 0) {
|
||||
return TRUE;
|
||||
}
|
||||
if(upper == 0 && equal == 0) {
|
||||
return FALSE;
|
||||
}
|
||||
status = U_INTERNAL_PROGRAM_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
|
85
tools/colprobe/strengthprobe.h
Executable file
85
tools/colprobe/strengthprobe.h
Executable file
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* File line.h
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 07/07/2003 weiv Creation.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
//
|
||||
// class Line
|
||||
//
|
||||
// Each line from the source file (containing a name, presumably) gets
|
||||
// one of these structs.
|
||||
//
|
||||
|
||||
#ifndef COLPROBE_STRENGTHPROBE_H
|
||||
#define COLPROBE_STRENGTHPROBE_H
|
||||
|
||||
#include "colprobe.h"
|
||||
#include "line.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/usetiter.h"
|
||||
|
||||
class StrengthProbe {
|
||||
public:
|
||||
UChar SE;
|
||||
UChar B0;
|
||||
UChar B1;
|
||||
UChar B2;
|
||||
UChar B3;
|
||||
private:
|
||||
Line utilFirst;
|
||||
Line utilSecond;
|
||||
Line *utilFirstP;
|
||||
Line *utilSecondP;
|
||||
Line contractionUtilFirst;
|
||||
Line contractionUtilSecond;
|
||||
UBool probePrefix(const Line &x, const Line &y, UChar first, UChar second);
|
||||
UBool probeSuffix(const Line &x, const Line &y, UChar first, UChar second);
|
||||
UBool probePrefixNoSep(const Line &x, const Line &y, UChar first, UChar second);
|
||||
UBool probeSuffixNoSep(const Line &x, const Line &y, UChar first, UChar second);
|
||||
|
||||
UBool frenchSecondary;
|
||||
|
||||
public:
|
||||
CompareFn comparer;
|
||||
GetSortKeyFn skgetter;
|
||||
|
||||
StrengthProbe() {};
|
||||
StrengthProbe(CompareFn comparer, GetSortKeyFn getter, UChar SE = 0x0030, UChar B0 = 0x0061, UChar B1 = 0x0062, UChar B2 = 0x00E1, UChar B3 = 0x0041); //, UChar LB = 0x0039, UChar UB = 0xfa29);
|
||||
int setProbeChars(UChar B0, UChar B1, UChar B2, UChar B3);
|
||||
int checkSanity();
|
||||
StrengthProbe(const StrengthProbe &that);
|
||||
StrengthProbe &operator=(const StrengthProbe &that);
|
||||
UColAttributeValue getStrength(const Line &x, const Line &y);
|
||||
UColAttributeValue getStrength(const UnicodeString &x, const UnicodeString &y);
|
||||
UColAttributeValue getPrefixedStrength(const Line &prefix, const Line &x, const Line &y);
|
||||
int32_t compare(const UnicodeString &x, const UnicodeString &y);
|
||||
int32_t compare(const Line &x, const Line &y);
|
||||
UColAttributeValue distanceFromEmptyString(const Line &x);
|
||||
UColAttributeValue distanceFromEmptyString(const UnicodeString &x);
|
||||
UBool isFrenchSecondary(UErrorCode &status);
|
||||
UBool isUpperFirst(UErrorCode &status);
|
||||
int getSortKey(const Line &l, uint8_t *buffer, int32_t buffCap) {
|
||||
return skgetter(l.name, l.len, buffer, buffCap);
|
||||
};
|
||||
|
||||
int getSortKey(UChar *string, int32_t sLen, uint8_t *buffer, int32_t buffCap) {
|
||||
return skgetter(string, sLen, buffer, buffCap);
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif //#ifndef COLPROBE_STRENGTHPROBE_H
|
||||
|
16
tools/colprobe/tableStarter.pl
Executable file
16
tools/colprobe/tableStarter.pl
Executable file
|
@ -0,0 +1,16 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
my $localeMinusA = `cat /home/weiv/src/icu/source/extra/colprobe/locale.txt`;
|
||||
my @locales = split(/\n/, $localeMinusA);
|
||||
my $locale;
|
||||
my $command;
|
||||
|
||||
foreach $locale (@locales) {
|
||||
if($locale =~ /_/ && !($locale =~ /^#/)) {
|
||||
$command = "/home/weiv/src/icu/source/extra/colprobe/doComparisonTable.pl $locale";
|
||||
print "$command\n";
|
||||
`$command`;
|
||||
}
|
||||
}
|
8
tools/colprobe/targetsetgenerator.cpp
Executable file
8
tools/colprobe/targetsetgenerator.cpp
Executable file
|
@ -0,0 +1,8 @@
|
|||
#include "targetsetgenerator.h"
|
||||
|
||||
TargetSetGenerator::TargetSetGenerator(UnicodeSet &startingSet, CompareFn comparer) :
|
||||
comparer(comparer),
|
||||
set(startingSet)
|
||||
{
|
||||
addAll(startingSet);
|
||||
}
|
15
tools/colprobe/targetsetgenerator.h
Executable file
15
tools/colprobe/targetsetgenerator.h
Executable file
|
@ -0,0 +1,15 @@
|
|||
#ifndef TARGETSETGENERATOR_H
|
||||
#define TARGETSETGENERATOR_H
|
||||
|
||||
#include "colprobe.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
class TargetSetGenerator : public UnicodeSet {
|
||||
public:
|
||||
TargetSetGenerator(UnicodeSet &startingSet, CompareFn comparer);
|
||||
private:
|
||||
CompareFn comparer;
|
||||
UnicodeSet set;
|
||||
};
|
||||
|
||||
#endif
|
48
tools/colprobe/template
Executable file
48
tools/colprobe/template
Executable file
|
@ -0,0 +1,48 @@
|
|||
<html>
|
||||
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
<title>$locale</title>
|
||||
<style>
|
||||
<!--
|
||||
table { border-spacing: 0; border-collapse: collapse; width: 100%;
|
||||
border: 1px solid black }
|
||||
td, th { width: 10%; border-spacing: 0; border-collapse: collapse; color: black;
|
||||
vertical-align: top; border: 1px solid black }
|
||||
-->
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body bgcolor="#FFFFFF">
|
||||
|
||||
<p><b><font color="#FF0000">Collation:</font> $locale <a href="http://oss.software.ibm.com/cgi-bin/icu/lx/en/?_=$locale">Demo</a>,
|
||||
|
||||
<a href="http://oss.software.ibm.com/cvs/icu/~checkout~/locale/all_diff_xml/comparison_charts.html">Cover
|
||||
Page</a>, <a href="http://oss.software.ibm.com/cvs/icu/~checkout~/locale/all_diff_xml/index.html">Index</a></b></p>
|
||||
<table>
|
||||
<tr>
|
||||
<th bgcolor="#AD989D">COMMON (<a href="http://oss.software.ibm.com/cvs/icu/~checkout~/locale/common/xml/$locale.xml">xml</a>)</th>
|
||||
<th bgcolor="#1191F1">LINUX (<a href="http://oss.software.ibm.com/cvs/icu/~checkout~/locale/linux/xml/$locale.xml">xml</a>)</th>
|
||||
<th bgcolor="#98FB98">WINDOWS (<a href="http://oss.software.ibm.com/cvs/icu/~checkout~/locale/windows/xml/$locale.xml">xml</a>)</th>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td bgcolor="#AD989D">1.0-alpha</td>
|
||||
<td bgcolor="#FF6633">1.0</td>
|
||||
<td bgcolor="#FF6633">=</td>
|
||||
<td bgcolor="#FF6633"><span title="006E {LATIN SMALL LETTER N}">&n</span><br>
|
||||
<span title="006E 0079 {LATIN SMALL LETTER N} {LATIN SMALL LETTER Y}"> < ny</span><br>
|
||||
|
||||
<span title="006E 006E 0079 {LATIN SMALL LETTER N} {LATIN SMALL LETTER N} {LATIN SMALL LETTER Y} / 006E 0079 {LATIN SMALL LETTER N} {LATIN SMALL LETTER Y}"> = nny / ny</span><br>
|
||||
<span title="006E 0059 {LATIN SMALL LETTER N} {LATIN CAPITAL LETTER Y}"> <<< nY</span><br>
|
||||
</td>
|
||||
<td bgcolor="#FF6633">=</td>
|
||||
<td bgcolor="#FFFF33">1.2</td>
|
||||
|
||||
<td bgcolor="#98FB98">Windows XP</td>
|
||||
<td bgcolor="#FF6633">=</td>
|
||||
<td bgcolor="#FF6633">=</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
</body>
|
49
tools/colprobe/uniqueFiles.pl
Executable file
49
tools/colprobe/uniqueFiles.pl
Executable file
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
|
||||
my $file;
|
||||
my $secondfile;
|
||||
my %secondfilelist;
|
||||
my @same;
|
||||
my %list;
|
||||
my $samefile;
|
||||
|
||||
foreach $secondfile (@ARGV) {
|
||||
$secondfilelist{$secondfile} = "";
|
||||
}
|
||||
|
||||
foreach $file (sort keys(%secondfilelist)) {
|
||||
if(exists $secondfilelist{$file}) {
|
||||
delete $secondfilelist{$file};
|
||||
foreach $secondfile (sort(keys %secondfilelist)) {
|
||||
#print "diffing: $file and $secondfile\n";
|
||||
if (!`diff $file $secondfile`) {
|
||||
#print "$file and $secondfile are the same\n";
|
||||
push @same, $secondfile;
|
||||
}
|
||||
}
|
||||
# if ($#same > -1) {
|
||||
print "Adding @same to $file\n";
|
||||
$list{$file} = [@same] ;
|
||||
foreach $samefile (@same) {
|
||||
delete $secondfilelist{$samefile};
|
||||
}
|
||||
delete @same[0..$#same];
|
||||
# }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
my $i = 0;
|
||||
my $j = 0;
|
||||
foreach $file (sort( keys %list)) {
|
||||
#print "$file -> "; #@{list{$file}}\n";
|
||||
print "<$file> <$j>\n";
|
||||
foreach $i ( 0 .. $#{ $list{$file} } ) {
|
||||
#print "$list{$file}[$i] ";
|
||||
print "<$list{$file}[$i]> <$j>\n ";
|
||||
}
|
||||
$j++;
|
||||
}
|
||||
|
116
tools/colprobe/uprinter.cpp
Executable file
116
tools/colprobe/uprinter.cpp
Executable file
|
@ -0,0 +1,116 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* File uprinter.cpp
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 03/18/2003 weiv Creation.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include "uprinter.h"
|
||||
|
||||
UPrinter::UPrinter(FILE *file, const char *locale, const char *encoding, UBool transliterateNonPrintable) {
|
||||
_on = TRUE;
|
||||
out = u_finit(file, locale, encoding);
|
||||
strcpy(_locale, locale);
|
||||
if(transliterateNonPrintable) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UTransliterator *anyHex = utrans_open("[^\\u000d\\u000a\\u0009\\u0020-\\u007f] Any-Hex/Java", UTRANS_FORWARD, NULL, 0, NULL, &status);
|
||||
u_fsettransliterator(out, U_WRITE, anyHex, &status);
|
||||
}
|
||||
};
|
||||
|
||||
UPrinter::UPrinter(const char *name, const char *locale, const char *encoding, UTransliterator *trans, UBool transliterateNonPrintable) {
|
||||
_on = TRUE;
|
||||
out = u_fopen(name, "wb", locale, encoding);
|
||||
u_fputc(0xFEFF, out); // emit a BOM
|
||||
strcpy(_locale, locale);
|
||||
if(transliterateNonPrintable) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
if(trans == NULL) {
|
||||
UTransliterator *anyHex = utrans_open("[^\\u000d\\u000a\\u0009\\u0020-\\u007f] Any-Hex/Java", UTRANS_FORWARD, NULL, 0, NULL, &status);
|
||||
u_fsettransliterator(out, U_WRITE, anyHex, &status);
|
||||
} else {
|
||||
u_fsettransliterator(out, U_WRITE, trans, &status);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
UPrinter::~UPrinter() {
|
||||
u_fclose(out);
|
||||
}
|
||||
|
||||
void
|
||||
UPrinter::log(const UnicodeString &string, UBool nl) {
|
||||
if(_on) {
|
||||
log(((UnicodeString)string).getTerminatedBuffer(), nl);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
UPrinter::log(const UChar *string, UBool nl) {
|
||||
if(_on) {
|
||||
u_fprintf(out, "%S", string);
|
||||
if(nl) {
|
||||
u_fprintf(out, "\n");
|
||||
}
|
||||
u_fflush(out);
|
||||
}
|
||||
}
|
||||
/*
|
||||
void
|
||||
UPrinter::log(const char *string, UBool nl) {
|
||||
if(_on) {
|
||||
u_fprintf(out, "%s", string);
|
||||
if(nl) {
|
||||
u_fprintf(out, "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
void
|
||||
UPrinter::log(const Line *line, UBool nl) {
|
||||
if(_on) {
|
||||
log(line->name);
|
||||
if(line->expLen) {
|
||||
log("/");
|
||||
log(line->expansionString);
|
||||
}
|
||||
if(nl) {
|
||||
u_fprintf(out, "\n");
|
||||
u_fflush(out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void UPrinter::log(const char *fmt, ...)
|
||||
{
|
||||
UChar buffer[4000];
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
/* sprintf it just to make sure that the information is valid */
|
||||
u_vsprintf(buffer, _locale, fmt, ap);
|
||||
va_end(ap);
|
||||
if( _on ) {
|
||||
log(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
UPrinter::on(void) {
|
||||
_on = TRUE;
|
||||
}
|
||||
|
||||
void
|
||||
UPrinter::off(void) {
|
||||
_on = FALSE;
|
||||
}
|
51
tools/colprobe/uprinter.h
Executable file
51
tools/colprobe/uprinter.h
Executable file
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2003, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* File uprinter.h
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 03/18/2003 weiv Creation.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef COLPROBE_UPRINTER_H
|
||||
#define COLPROBE_UPRINTER_H
|
||||
|
||||
#include "line.h"
|
||||
|
||||
#include "unicode/ustdio.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/ustring.h"
|
||||
|
||||
|
||||
class UPrinter {
|
||||
UFILE *out;
|
||||
UChar buffer[256];
|
||||
UBool _on;
|
||||
char _locale[256];
|
||||
public:
|
||||
UPrinter(FILE *file, const char *locale, const char *encoding, UBool transliterateNonPrintable=TRUE);
|
||||
UPrinter(const char *name, const char *locale, const char *encoding, UTransliterator *trans, UBool transliterateNonPrintable);
|
||||
~UPrinter();
|
||||
void log(const UnicodeString &string, UBool nl = FALSE);
|
||||
void log(const UChar *string, UBool nl = FALSE);
|
||||
//void log(const char *string, UBool nl = FALSE);
|
||||
void log(const Line *line, UBool nl = FALSE);
|
||||
void log(const char *fmt, ...);
|
||||
void off(void);
|
||||
void on(void);
|
||||
UBool isOn(void) {
|
||||
return _on;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif // #ifndef COLPROBE_UPRINTER_H
|
30
tools/colprobe/winGenCollData.pl
Executable file
30
tools/colprobe/winGenCollData.pl
Executable file
|
@ -0,0 +1,30 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
#my $localeMinusA = `locale -a`;
|
||||
my $localeMinusA = `cat locale.txt`;
|
||||
|
||||
my @locales = split(/\r\n/, $localeMinusA);
|
||||
my $locale;
|
||||
my $command;
|
||||
|
||||
#my $commandPath = "~/src/icu/source/extra/colprobe/";
|
||||
my $commandPath = "c:/dev/0_icu/source/extra/colprobe/release/";
|
||||
|
||||
|
||||
my $platform = $ARGV[0];
|
||||
|
||||
mkdir $platform."logs";
|
||||
mkdir $platform;
|
||||
|
||||
foreach $locale (@locales) {
|
||||
$_ = $locale;
|
||||
chomp;
|
||||
if(!/^\#/) { # && /\_/) {
|
||||
$command = $commandPath."colprobe --platform $platform --ref $platform --output resb $locale >$platform"."logs/$locale"."_log.txt 2>&1";
|
||||
|
||||
print "$command\n";
|
||||
`$command`;
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue