diff --git a/icu4j/src/com/ibm/icu/dev/tool/translit/SourceSet.java b/icu4j/src/com/ibm/icu/dev/tool/translit/SourceSet.java new file mode 100644 index 00000000000..8556c98e023 --- /dev/null +++ b/icu4j/src/com/ibm/icu/dev/tool/translit/SourceSet.java @@ -0,0 +1,109 @@ +/* +********************************************************************** +* Copyright (c) 2001, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 11/29/2001 aliu Creation. +* 06/26/2002 aliu Moved to com.ibm.icu.dev.tool.translit +********************************************************************** +*/ +package com.ibm.icu.dev.tool.translit; +import java.util.*; +import com.ibm.icu.dev.tool.translit.UnicodeSetClosure; +import java.io.*; +import com.ibm.icu.text.*; + +/** + * Class that generates source set information for a transliterator. + * + * To run, use: + * + * java com.ibm.icu.dev.tool.translit.SourceSet Latin-Katakana NFD lower + * + * Output is produced in the command console, and a file with more detail is also written. + * + * To see if it works, use: + * + * java com.ibm.icu.dev.test.translit.TransliteratorTest -v -nothrow TestIncrementalProgress + * + * and + * + * java com.ibm.icu.dev.demo.translit.Demo + */ +public class SourceSet { + + public static void main(String[] args) throws IOException { + if (args.length == 0) { + // Compute and display the source sets for all system + // transliterators. + for (Enumeration e = Transliterator.getAvailableIDs(); e.hasMoreElements(); ) { + String ID = (String) e.nextElement(); + showSourceSet(ID, Normalizer.NO_OP, false); + } + } else { + // Usage: ID [NFKD | NFD] [lower] + Normalizer.Mode m = Normalizer.NO_OP; + boolean lowerFirst = false; + if (args.length >= 2) { + if (args[1].equalsIgnoreCase("NFD")) { + m = Normalizer.DECOMP; + } else if (args[1].equalsIgnoreCase("NFKD")) { + m = Normalizer.DECOMP_COMPAT; + } else { + usage(); + } + } + if (args.length >= 3) { + if (args[2].equalsIgnoreCase("lower")) { + lowerFirst = true; + } else { + usage(); + } + } + if (args.length > 3) { + usage(); + } + showSourceSet(args[0], m, lowerFirst); + } + } + + static void showSourceSet(String ID, Normalizer.Mode m, boolean lowerFirst) throws IOException { + File f = new File("UnicodeSetClosure.txt"); + String filename = f.getCanonicalFile().toString(); + out = new PrintWriter( + new OutputStreamWriter( + new FileOutputStream(filename), "UTF-8")); + out.print('\uFEFF'); // BOM + System.out.println(); + System.out.println("Writing " + filename); + Transliterator t = Transliterator.getInstance(ID); + showSourceSetAux(t, m, lowerFirst, true); + showSourceSetAux(t.getInverse(), m, lowerFirst, false); + out.close(); + } + + static PrintWriter out; + + static void showSourceSetAux(Transliterator t, Normalizer.Mode m, boolean lowerFirst, boolean forward) throws IOException { + UnicodeSet sourceSet = t.getSourceSet(); + if (m != Normalizer.NO_OP || lowerFirst) { + UnicodeSetClosure.close(sourceSet, m, lowerFirst); + } + System.out.println(t.getID() + ": " + + sourceSet.toPattern(true)); + out.println("# MINIMAL FILTER GENERATED FOR: " + t.getID() + (forward ? "" : " REVERSE")); + out.println(":: " + + (forward ? "" : "( ") + + sourceSet.toPattern(true) + + (forward ? "" : " )") + + " ;"); + out.println("# Unicode: " + sourceSet.toPattern(false)); + out.println(); + } + + static void usage() { + System.err.println("Usage: ID [ NFD|NFKD [lower] ]"); + System.exit(1); + } +}