mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 21:45:37 +00:00
updated for 4.0
X-SVN-Rev: 11164
This commit is contained in:
parent
76aa91b7db
commit
9c6b10e2dc
3 changed files with 359 additions and 0 deletions
|
@ -0,0 +1,113 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/GenerateStandardizedVariants.java,v $
|
||||
* $Date: 2003/02/26 00:35:09 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.UCD;
|
||||
import com.ibm.text.utility.*;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import java.util.*;
|
||||
import java.io.*;
|
||||
|
||||
public final class GenerateStandardizedVariants implements UCD_Types {
|
||||
|
||||
static public String showVarGlyphs(String code0, String code1, String shape) {
|
||||
System.out.println(code0 + ", " + code1 + ", [" + shape + "]");
|
||||
|
||||
String abbShape = "";
|
||||
if (shape.length() != 0) {
|
||||
abbShape = '-' + shape.substring(0,4);
|
||||
if (shape.endsWith("-feminine")) abbShape += "fem";
|
||||
}
|
||||
|
||||
return "<img alt='U+" + code0 + "+U+" + code1 + "/" + shape
|
||||
+ "' src='http://www.unicode.org/cgi-bin/varglyph?24-" +code0 + "-" + code1 + abbShape + "'>";
|
||||
}
|
||||
|
||||
/*
|
||||
# Field 0: the variation sequence
|
||||
# Field 1: the description of the desired appearance
|
||||
# Field 2: where the appearance is only different in in particular shaping environments
|
||||
# this field lists them. The possible values are: isolated, initial, medial, final.
|
||||
# If more than one is present, there are spaces between them.
|
||||
*/
|
||||
static public void generate() throws IOException {
|
||||
Default.setUCD();
|
||||
|
||||
// read the data and compose the table
|
||||
|
||||
String table = "<table><tr><th>Rep Glyph</th><th>Character Sequence</th><th>Context</th><th width='10%'>Alt Glyph</th><th>Description of variant appearance</th></tr>";
|
||||
|
||||
String[] splits = new String[4];
|
||||
String[] codes = new String[2];
|
||||
String[] shapes = new String[4];
|
||||
|
||||
BufferedReader in = Utility.openUnicodeFile("StandardizedVariants", Default.ucdVersion, true, Utility.LATIN1);
|
||||
while (true) {
|
||||
String line = Utility.readDataLine(in);
|
||||
if (line == null) break;
|
||||
if (line.length() == 0) continue;
|
||||
|
||||
int count = Utility.split(line, ';', splits);
|
||||
int codeCount = Utility.split(splits[0], ' ', codes);
|
||||
int code = Utility.codePointFromHex(codes[0]);
|
||||
|
||||
// <img alt="03E2" src="http://www.unicode.org/cgi-bin/refglyph?24-03E2" style="vertical-align:middle">
|
||||
|
||||
table += "<tr><td><img alt='U+" + codes[0] + "' src='http://www.unicode.org/cgi-bin/refglyph?24-" + codes[0] + "'></td>\n";
|
||||
table += "<td>" + splits[0] + "</td>\n";
|
||||
|
||||
String shape = splits[2].trim();
|
||||
if (shape.equals("all")) shape = "";
|
||||
|
||||
table += "<td>" + Utility.replace(shape, " ", "<br>") + "</td>\n";
|
||||
|
||||
// http://www.unicode.org/cgi-bin/varglyph?24-1820-180B-fina
|
||||
// http://www.unicode.org/cgi-bin/varglyph?24-222A-FE00
|
||||
|
||||
table += "<td>";
|
||||
if (shape.length() == 0) {
|
||||
table += showVarGlyphs(codes[0], codes[1], "");
|
||||
} else {
|
||||
int shapeCount = Utility.split(shape, ' ', shapes);
|
||||
for (int i = 0; i < shapeCount; ++i) {
|
||||
if (i != 0) table += " ";
|
||||
table += showVarGlyphs(codes[0], codes[1], shapes[i]);
|
||||
}
|
||||
}
|
||||
table += "</td>\n";
|
||||
|
||||
table += "<td>" + Default.ucd.getName(code) + " " + splits[1] + "</td>\n";
|
||||
table += "</tr>";
|
||||
}
|
||||
in.close();
|
||||
table += "</table>";
|
||||
|
||||
// now write out the results
|
||||
|
||||
String directory = "DerivedData/";
|
||||
String filename = directory + "StandardizedVariants.html";
|
||||
PrintWriter out = Utility.openPrintWriter(filename, Utility.LATIN1_UNIX);
|
||||
String[] batName = {""};
|
||||
String mostRecent = GenerateData.generateBat(directory, filename, GenerateData.getFileSuffix(true), batName);
|
||||
|
||||
String[] replacementList = {
|
||||
"@revision@", Default.ucd.getVersion(),
|
||||
"@date@", Default.getDate(),
|
||||
"@table@", table};
|
||||
|
||||
Utility.appendFile("StandardizedVariants-Template.html", Utility.UTF8, out, replacementList);
|
||||
|
||||
out.close();
|
||||
Utility.renameIdentical(mostRecent, Utility.getOutputName(filename), batName[0]);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,137 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
|
||||
|
||||
"http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
<meta http-equiv="Content-Language" content="en-us">
|
||||
<meta name="GENERATOR" content="Microsoft FrontPage 4.0">
|
||||
<meta name="ProgId" content="FrontPage.Editor.Document">
|
||||
<meta name="keywords" content="unicode, variant glyphs">
|
||||
<meta name="description" content="Describes and displays standardized variant glyphs">
|
||||
<title>Standardized Variants</title>
|
||||
<link rel="stylesheet" type="text/css" href="http://www.unicode.org/reports/reports.css">
|
||||
</head>
|
||||
|
||||
<body bgcolor="#ffffff">
|
||||
|
||||
<table class="header">
|
||||
<tr>
|
||||
<td class="icon"><a href="http://www.unicode.org"><img align="middle" alt="[Unicode]" border="0" src="http://www.unicode.org/webscripts/logo60s2.gif" width="34" height="33"></a> <a class="bar" href="UnicodeCharacterDatabase.html">Unicode
|
||||
Character Database</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="gray"> </td>
|
||||
</tr>
|
||||
</table>
|
||||
<blockquote>
|
||||
<h1>Standardized Variants</h1>
|
||||
<table class="wide">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td valign="top" width="144">Revision</td>
|
||||
<td valign="top">@revision@</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="144">Authors</td>
|
||||
<td valign="top">Members of the Editorial Committee</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="144">Date</td>
|
||||
<td valign="top">@date@</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="144">This Version</td>
|
||||
<td valign="top"><a href="http://www.unicode.org/Public/3.2-Update/StandardizedVariants-@revision@.html">http://www.unicode.org/Public/3.2-Update/StandardizedVariants-@revision@.html</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="144">Previous Version</td>
|
||||
<td valign="top"><a href="http://www.unicode.org/Public/3.2-Update/StandardizedVariants-3.2.0.html">http://www.unicode.org/Public/3.2-Update/StandardizedVariants-3.2.0.html</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td valign="top" width="144">Latest Version</td>
|
||||
<td valign="top"><a href="http://www.unicode.org/Public/UNIDATA/StandardizedVariants.html">http://www.unicode.org/Public/UNIDATA/StandardizedVariants.html</a></td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<h3><br>
|
||||
<i>Summary</i></h3>
|
||||
<blockquote>
|
||||
<p>This file provides a visual display of the standard variant sequences
|
||||
derived from StandardizedVariants.txt.</p>
|
||||
</blockquote>
|
||||
<h3><i>Status</i></h3>
|
||||
<blockquote>
|
||||
<p><i>The file and the files described herein are part of the <a href="http://www.unicode.org/ucd">Unicode
|
||||
Character Database</a> (UCD) and are governed by the <a href="#Terms of Use">UCD
|
||||
Terms of Use</a> stated at the end.</i></p>
|
||||
</blockquote>
|
||||
<hr width="50%">
|
||||
<h2>Introduction</h2>
|
||||
<p>The tables here <i>exhaustively</i> lists the valid, registered
|
||||
combinations of base character plus variation indicator. All combinations not
|
||||
listed in StandardizedVariants.txt are unspecified and are reserved for future
|
||||
standardization; no conformant process may interpret them as standardized
|
||||
variants. Variation selectors and their use are described in The Unicode
|
||||
Standard.</p>
|
||||
<p>These mathematical variants are all produced with the addition of Variation
|
||||
Selector 1 (VS1 or U+FE00) to mathematical operator base characters. There is
|
||||
no variation according to context. The Mongolian variants use the Mongolian
|
||||
Variant Selectors, and may vary according to context. That is, if a contextual
|
||||
shape is not listed below, then the variation sequence has an unmodified
|
||||
appearance. At this time no Han variants exist.</p>
|
||||
<blockquote>
|
||||
<p><a name="fonts"><b>Note: </b></a>The glyphs used to show the variations
|
||||
are often derived from different physical fonts than the representative
|
||||
glyphs in the standard. They may therefore exhibit minor differences in
|
||||
size, proportion, or weight <i>unrelated</i> to the intentional difference
|
||||
in feature that is the defining element of the variation. Such minor
|
||||
differences should be ignored. Likewise, in some cases the existing
|
||||
representative fonts may not yet contain newly encoded characters and hence
|
||||
some representative glyphs shown in these tables may have a slightly
|
||||
different style than others.</p>
|
||||
</blockquote>
|
||||
<p>@table@</p>
|
||||
<hr width="50%">
|
||||
<h2>UCD <a name="Terms of Use">Terms of Use</a></h2>
|
||||
<h3><i>Disclaimer</i></h3>
|
||||
<blockquote>
|
||||
<p><i>The Unicode Character Database is provided as is by Unicode, Inc. No
|
||||
claims are made as to fitness for any particular purpose. No warranties of
|
||||
any kind are expressed or implied. The recipient agrees to determine
|
||||
applicability of information provided. If this file has been purchased on
|
||||
magnetic or optical media from Unicode, Inc., the sole remedy for any claim
|
||||
will be exchange of defective media within 90 days of receipt.</i></p>
|
||||
<p><i>This disclaimer is applicable for all other data files accompanying
|
||||
the Unicode Character Database, some of which have been compiled by the
|
||||
Unicode Consortium, and some of which have been supplied by other sources.</i></p>
|
||||
</blockquote>
|
||||
<h3><i>Limitations on Rights to Redistribute This Data</i></h3>
|
||||
<blockquote>
|
||||
<p><i>Recipient is granted the right to make copies in any form for internal
|
||||
distribution and to freely use the information supplied in the creation of
|
||||
products supporting the Unicode<sup>TM</sup> Standard. The files in the
|
||||
Unicode Character Database can be redistributed to third parties or other
|
||||
organizations (whether for profit or not) as long as this notice and the
|
||||
disclaimer notice are retained. Information can be extracted from these
|
||||
files and used in documentation or programs, as long as there is an
|
||||
accompanying notice indicating the source.</i></p>
|
||||
</blockquote>
|
||||
<hr width="50%">
|
||||
<div align="center">
|
||||
<center>
|
||||
<table cellspacing="0" cellpadding="0" border="0">
|
||||
<tr>
|
||||
<td><a href="http://www.unicode.org/unicode/copyright.html"><img src="http://www.unicode.org/img/hb_notice.gif" border="0" alt="Access to Copyright and terms of use" width="216" height="50"></a></td>
|
||||
</tr>
|
||||
</table>
|
||||
<script language="Javascript" type="text/javascript" src="http://www.unicode.org/webscripts/lastModified.js"></script>
|
||||
</center>
|
||||
</div>
|
||||
</blockquote>
|
||||
|
||||
</body>
|
||||
|
||||
</html>
|
109
tools/unicodetools/com/ibm/text/UCD/TestNameUniqueness.java
Normal file
109
tools/unicodetools/com/ibm/text/UCD/TestNameUniqueness.java
Normal file
|
@ -0,0 +1,109 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2001, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCD/TestNameUniqueness.java,v $
|
||||
* $Date: 2003/02/26 00:35:09 $
|
||||
* $Revision: 1.1 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.text.UCD;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.*;
|
||||
import java.text.DateFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
|
||||
import com.ibm.text.utility.*;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
public class TestNameUniqueness implements UCD_Types {
|
||||
|
||||
public static void test() throws IOException {
|
||||
Default.setUCD();
|
||||
new TestNameUniqueness().checkNames();
|
||||
}
|
||||
|
||||
Map names = new HashMap();
|
||||
int[] charCount = new int[128];
|
||||
int[] samples = new int[128];
|
||||
|
||||
void checkNames() throws IOException {
|
||||
PrintWriter out = Utility.openPrintWriter("name_uniqueness.txt", Utility.LATIN1_WINDOWS);
|
||||
try {
|
||||
out.println("Collisions");
|
||||
out.println();
|
||||
for (int cp = 0; cp < 0x10FFFF; ++cp) {
|
||||
Utility.dot(cp);
|
||||
if (!Default.ucd.isAllocated(cp)) continue;
|
||||
if (Default.ucd.hasComputableName(cp)) continue;
|
||||
int cat = Default.ucd.getCategory(cp);
|
||||
if (cat == Cc) continue;
|
||||
|
||||
String name = Default.ucd.getName(cp);
|
||||
String processedName = processName(cp, name);
|
||||
Integer existing = (Integer) names.get(processedName);
|
||||
if (existing != null) {
|
||||
out.println("Collision between: "
|
||||
+ Default.ucd.getCodeAndName(existing.intValue())
|
||||
+ ", " + Default.ucd.getCodeAndName(cp));
|
||||
} else {
|
||||
names.put(processedName, new Integer(cp));
|
||||
}
|
||||
}
|
||||
out.println();
|
||||
out.println("Samples");
|
||||
out.println();
|
||||
for (int i = 0; i < charCount.length; ++i) {
|
||||
int count = charCount[i];
|
||||
if (count == 0) continue;
|
||||
String sampleName = Default.ucd.getCodeAndName(samples[i]);
|
||||
out.println(count + "\t'" + ((char)i)
|
||||
+ "'\t" + Default.ucd.getCodeAndName(samples[i])
|
||||
+ "\t=>\t" + processName(samples[i], Default.ucd.getName(samples[i])));
|
||||
}
|
||||
out.println();
|
||||
out.println("Name Samples");
|
||||
out.println();
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
int cat = Default.ucd.getCategory(i);
|
||||
if (cat == Cc) continue;
|
||||
out.println(Default.ucd.getCodeAndName(i)
|
||||
+ "\t=>\t" + processName(i, Default.ucd.getName(i)));
|
||||
}
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
||||
static final String[][] replacements = {
|
||||
//{"SMALL LETTER", ""},
|
||||
{"LETTER", ""},
|
||||
{"CHARACTER", ""},
|
||||
{"DIGIT", ""},
|
||||
{"SIGN", ""},
|
||||
//{"WITH", ""},
|
||||
};
|
||||
|
||||
StringBuffer processNamesBuffer = new StringBuffer();
|
||||
|
||||
String processName(int codePoint, String name) {
|
||||
name = Utility.replace(name, replacements);
|
||||
processNamesBuffer.setLength(0);
|
||||
for (int i = 0; i < name.length(); ++i) {
|
||||
char c = name.charAt(i);
|
||||
++charCount[c];
|
||||
if (samples[c] == 0) samples[c] = codePoint;
|
||||
if ('A' <= c && c <= 'Z'
|
||||
|| '0' <= c && c <= '9') processNamesBuffer.append(c);
|
||||
|
||||
}
|
||||
if (processNamesBuffer.length() == name.length()) return name;
|
||||
return processNamesBuffer.toString();
|
||||
}
|
||||
}
|
||||
|
Loading…
Add table
Reference in a new issue