diff --git a/.gitattributes b/.gitattributes index 7c8f1eee7f3..a0be8f64afc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -145,7 +145,6 @@ icu4j/main/shared/data/icudata.jar -text icu4j/main/shared/data/icutzdata.jar -text icu4j/main/shared/data/testdata.jar -text icu4j/main/tests/core/src/com/ibm/icu/dev/data/rbbi/english.dict -text -icu4j/main/tests/core/src/com/ibm/icu/dev/data/thai6.ucs -text icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.impl.OlsonTimeZone.dat -text icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.impl.TimeZoneAdapter.dat -text icu4j/main/tests/core/src/com/ibm/icu/dev/test/serializable/data/ICU_3.6/com.ibm.icu.math.BigDecimal.dat -text diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/thai6.ucs b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/thai6.ucs deleted file mode 100644 index a3d02826a75..00000000000 Binary files a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/thai6.ucs and /dev/null differ diff --git a/icu4j/tools/misc/src/com/ibm/icu/dev/tool/rbbi/BuildDictionaryFile.java b/icu4j/tools/misc/src/com/ibm/icu/dev/tool/rbbi/BuildDictionaryFile.java deleted file mode 100644 index 8c722a79b7b..00000000000 --- a/icu4j/tools/misc/src/com/ibm/icu/dev/tool/rbbi/BuildDictionaryFile.java +++ /dev/null @@ -1,880 +0,0 @@ -/* - ******************************************************************************* - * Copyright (C) 1996-2009, International Business Machines Corporation and * - * others. All Rights Reserved. * - ******************************************************************************* - */ -package com.ibm.icu.dev.tool.rbbi; - -import java.io.DataOutputStream; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.io.PrintWriter; -import java.io.UnsupportedEncodingException; -import java.util.Vector; - -import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.util.CompactByteArray; - -public class BuildDictionaryFile { - public static void main(String args[]) - throws FileNotFoundException, UnsupportedEncodingException, IOException { - String filename = args[0]; - String encoding = ""; - String outputFile = ""; - String listingFile = ""; - - if (args.length >= 2) - encoding = args[1]; - - if(args.length >= 3) - outputFile = args[2]; - - if (args.length >= 4) - listingFile = args[3]; - - BuildDictionaryFile dictionary = new BuildDictionaryFile(); - dictionary.build(filename, encoding); - - DataOutputStream out = null; - if (outputFile.length() != 0) { - out = new DataOutputStream(new FileOutputStream(outputFile)); - dictionary.writeDictionaryFile(out); - } - - PrintWriter listing = null; - if (listingFile.length() != 0) { - listing = new PrintWriter(new OutputStreamWriter(new FileOutputStream(listingFile), "UnicodeLittle")); - dictionary.printWordList("", 0, listing); - listing.close(); - } - } - - public BuildDictionaryFile() { - } - - public void build(String filename, String encoding) - throws FileNotFoundException, UnsupportedEncodingException, IOException { - FileInputStream file = new FileInputStream(filename); - InputStreamReader in; - if (encoding.length() == 0) - in = new InputStreamReader(file); - else - in = new InputStreamReader(file, encoding); - - buildColumnMap(in); - - file = new FileInputStream(filename); - if (encoding.length() == 0) - in = new InputStreamReader(file); - else - in = new InputStreamReader(file, encoding); - - buildStateTable(in); -//printTable(); - } - - public void buildColumnMap(InputStreamReader in) throws IOException { -System.out.println("Building column map..."); - UnicodeSet charsInFile = new UnicodeSet(); - int c = in.read(); -int totalChars = 0; - while (c >= 0) { -++totalChars; if (totalChars > 0 && totalChars % 5000 == 0) System.out.println("Read " + totalChars + " characters..."); - if (c > ' ') - charsInFile.add((char)c); - c = in.read(); - } -// Test.debugPrintln(charsInFile.toString()); - - StringBuffer tempReverseMap = new StringBuffer(); - tempReverseMap.append(' '); - - columnMap = new CompactByteArray(); - int n = charsInFile.getRangeCount(); - byte p = 1; - for (int i=0; i= 0) { - charsInWord++; - short column = columnMap.elementAt((char)c); - - row = (int[])(tempTable.elementAt(state)); - if (column != 0) { - if (row[column] == 0) { - row[column] = tempTable.size(); - ++row[numCols]; - state = (tempTable.size()); - tempTable.addElement(new int[numCols + 1]); - } - else - state = row[column]; - } - else if (state != 0) { - if (row[0] != -1) { - row[0] = -1; - ++row[numCols]; - uniqueWords++; - totalUniqueWordChars += charsInWord; - } - totalWords++; -if (totalWords % 5000 == 0) System.out.println("Read " + totalWords + " words, " + tempTable.size() + " rows..."); - charsInWord = 0; - state = 0; - } - c = in.read(); - } - if (state != 0) { - row = (int[])(tempTable.elementAt(state)); - if (row[0] != -1) { - row[0] = -1; - uniqueWords++; - totalUniqueWordChars += charsInWord; - } - totalWords++; - } - - compress(tempTable); - - table = new short[numCols * tempTable.size()]; - for (int i = 0; i < tempTable.size(); i++) { - row = (int[])tempTable.elementAt(i); - for (int j = 0; j < numCols; j++) - table[i * numCols + j] = (short)row[j]; - } - } - - private void compress(Vector tempTable) { -System.out.println("Before compression:"); -System.out.println(" Number of rows = " + tempTable.size()); -System.out.println(" Number of columns = " + numCols); -System.out.println(" Number of cells = " + tempTable.size() * numCols); - deleteDuplicateRows(tempTable); -System.out.println("After removing duplicate rows:"); -System.out.println(" Number of rows = " + tempTable.size()); -System.out.println(" Number of columns = " + numCols); -System.out.println(" Number of cells = " + tempTable.size() * numCols); - stackRows(tempTable); -if (tempTable.size() > 32767) throw new IllegalArgumentException("Too many rows in table!"); -System.out.println("After doubling up on rows:"); -System.out.println(" Number of rows = " + tempTable.size()); -System.out.println(" Number of columns = " + numCols); -System.out.println(" Number of cells = " + tempTable.size() * numCols); - } - -/* -experimental... - private void deleteDuplicateRows(Vector tempTable) { - int[] rowNumMap = new int[tempTable.size()]; - for (int i = 0; i < rowNumMap.length; i++) - rowNumMap[i] = i; - - int nextClass = numCols; - int currentClass; - int lastClass; - boolean split; - int[] row1, row2, tempRow; - int tempCat; - - do { -System.out.println("Making a pass (" + nextClass + " classes)..."); - currentClass = 0; - lastClass = nextClass; - while (currentClass < nextClass) { -System.out.println(" currentClass = " + currentClass); - split = false; - row1 = row2 = null; - for (int i = 0; i < tempTable.size(); i++) { - tempRow = (int[])tempTable.elementAt(i); - if (tempRow[numCols] == currentClass) { - if (row1 == null) { - row1 = (int[])tempTable.elementAt(i); - } - else { - row2 = (int[])tempTable.elementAt(i); - for (int j = 0; j < numCols; j++) { - if ((row1[j] == 0) != (row2[j] == 0) || - (row1[j] == -1) != (row2[j] == -1)) { - row2[numCols] = nextClass; - split = true; - break; - } - else if (row1[j] != 0 && row2[j] != 0 && row1[j] != -1 - && row2[j] != -1) { - tempRow = (int[])tempTable.elementAt(row1[j]); - tempCat = tempRow[numCols]; - tempRow = (int[])tempTable.elementAt(row2[j]); - if (tempCat != tempRow[numCols]) { - row2[numCols] = nextClass; - split = true; - break; - } - } - } - } - } - } - if (split) - ++nextClass; - ++currentClass; -//System.out.println(); - } - } while (lastClass != nextClass); - - int[] representatives = new int[nextClass]; - for (int i = 1; i < tempTable.size(); i++) { - tempRow = (int[])tempTable.elementAt(i); - if (representatives[tempRow[numCols]] == 0) - representatives[tempRow[numCols]] = i; - else - rowNumMap[i] = representatives[tempRow[numCols]]; - } -System.out.println("Renumbering..."); - - // renumber all remaining rows - for (int i = 0; i < rowNumMap.length; i++) - if (rowNumMap[i] != i) - tempTable.setElementAt(null, i); - int newRowNum = 0; - for (int i = 0; i < rowNumMap.length; i++) - if (tempTable.elementAt(i) != null) - rowNumMap[i] = newRowNum++; - for (int i = 0; i < rowNumMap.length; i++) - if (tempTable.elementAt(i) == null) - rowNumMap[i] = rowNumMap[rowNumMap[i]]; - - for (int i = tempTable.size() - 1; i >= 0; i--) { - tempRow = (int[])tempTable.elementAt(i); - if (tempRow == null) - tempTable.removeElementAt(i); - else { - for (int j = 0; j < numCols; j++) - if (tempRow[j] != -1) - tempRow[j] = rowNumMap[j]; - } - } -//for (int i = 1; i < rowNumMap.length; i++) rowNumMap[i] = i; int newRowNum = rowNumMap.length; - } -*/ - - private void deleteDuplicateRows(Vector tempTable) { - Vector work = (Vector)(tempTable.clone()); - boolean didDeleteRow = true; - - Vector tempMapping = new Vector(work.size()); - int[] mapping = new int[work.size()]; - for (int i = 0; i < mapping.length; i++) { - mapping[i] = i; - tempMapping.addElement(new Integer(i)); - } - boolean[] tbd = new boolean[work.size()]; - - while (didDeleteRow) { -System.out.println(" " + work.size() + " rows..."); -int deletedRows = 0; - didDeleteRow = false; - - sortTable(work, tempMapping, mapping, 1, work.size()); - for (int i = 0; i < work.size() - 1; ) { -System.out.print("Deleting, inspecting row " + i + ", deleted " + deletedRows + " rows...\r"); - int rowToDelete = ((Integer)(tempMapping.elementAt(i + 1))).intValue(); - int rowToMapTo = ((Integer)(tempMapping.elementAt(i))).intValue(); - if (compareRows((int[])work.elementAt(i), (int[])work.elementAt(i + 1), - mapping) == 0) { - tbd[rowToDelete] = true; - tempTable.setElementAt(null, rowToDelete); - while (tbd[mapping[rowToMapTo]]) - mapping[rowToMapTo] = mapping[mapping[rowToMapTo]]; - mapping[rowToDelete] = mapping[rowToMapTo]; - didDeleteRow = true; -deletedRows++; - work.removeElementAt(i + 1); - tempMapping.removeElementAt(i + 1); - } - else - i++; - } - for (int i = 0; i < mapping.length; i++) { - if (tbd[i] && tbd[mapping[i]]) - mapping[i] = mapping[mapping[i]]; - } - } - - int decrementBy = 0; - for (int i = 0; i < mapping.length; i++) { - if (tbd[i]) - decrementBy++; - else - mapping[i] -= decrementBy; - } - for (int i = 0; i < mapping.length; i++) { - if (tbd[i]) - mapping[i] = mapping[mapping[i]]; - } - for (int i = tempTable.size() - 1; i >= 0; i--) { - if (tbd[i]) - tempTable.removeElementAt(i); - else { - int[] row = (int[])tempTable.elementAt(i); - for (int j = 0; j < numCols; j++) - row[j] = (row[j] == -1) ? -1 : mapping[row[j]]; - } - } - } - - private void sortTable(Vector tbl, Vector tempMapping, int[] mapping, int start, int end) { -System.out.print("Sorting (" + start + ", " + end + ")...\r"); - if (start + 1 >= end) - return; - else if (start + 10 >= end) { - for (int i = start + 1; i < end; i++) { - int[] row = (int[])tbl.elementAt(i); - Integer tempMap = (Integer)tempMapping.elementAt(i); - int j; - for (j = i - 1; j >= start; j--) { - if (compareRows((int[])tbl.elementAt(j), row, mapping) > 0) { - tbl.setElementAt((int[])tbl.elementAt(j), j + 1); - tempMapping.setElementAt((Integer)tempMapping.elementAt(j), j + 1); - } - else { - tbl.setElementAt(row, j + 1); - tempMapping.setElementAt(tempMap, j + 1); - break; - } - } - if (j < start) { - tbl.setElementAt(row, start); - tempMapping.setElementAt(tempMap, start); - } - } - } - else { - int boundaryPos = (start + end) / 2; - int i; - boolean allTheSame = true; - int firstDifferent = 0; - - do { - int[] boundary = (int[])tbl.elementAt(boundaryPos); - i = start; - int j = end - 1; - int[] row = null; - byte compResult; - while (i < j) { - row = (int[])tbl.elementAt(i); - while (i <= j && compareRows(row, boundary, mapping) < 0) { - i++; - row = (int[])tbl.elementAt(i); - } - - row = (int[])tbl.elementAt(j); - compResult = compareRows(row, boundary, mapping); - while (i <= j && (compResult >= 0)) { - if (compResult != 0) { - allTheSame = false; - firstDifferent = j; - } - j--; - row = (int[])tbl.elementAt(j); - compResult = compareRows(row, boundary, mapping); - } - if (i <= j) { - row = (int[])tbl.elementAt(j); - tbl.setElementAt(tbl.elementAt(i), j); - tbl.setElementAt(row, i); - Object temp = tempMapping.elementAt(j); - tempMapping.setElementAt(tempMapping.elementAt(i), j); - tempMapping.setElementAt(temp, i); - } - } - if (i <= start) { - if (allTheSame) - return; - else - boundaryPos = firstDifferent; - } - } while (i <= start); - sortTable(tbl, tempMapping, mapping, start, i); - sortTable(tbl, tempMapping, mapping, i, end); - } - } - - private byte compareRows(int[] row1, int[] row2, int[] mapping) { - for (int i = 0; i < numCols; i++) { - int c1 = (row1[i] == -1) ? -1 : mapping[row1[i]]; - int c2 = (row2[i] == -1) ? -1 : mapping[row2[i]]; - if (c1 < c2) - return -1; - else if (c1 > c2) - return 1; - } - return 0; - } - - private int[] buildRowIndex(Vector tempTable) { - int[] tempRowIndex = new int[tempTable.size()]; - rowIndexFlagsIndex = new short[tempTable.size()]; - Vector tempRowIndexFlags = new Vector(); - rowIndexShifts = new byte[tempTable.size()]; - - // build the row index. Each entry in the row index starts out referring - // to the original row (so it doesn't actually do any mapping), and we set - // up the index flags to show which cells in the row are populated - for (int i = 0; i < tempTable.size(); i++) { - tempRowIndex[i] = i; - - int[] row = (int[])tempTable.elementAt(i); - if (row[numCols] == 1 && row[0] == 0) { - int j = 0; - while (row[j] == 0) - ++j; - rowIndexFlagsIndex[i] = (short)(-j); - } - else { - int[] flags = new int[numColGroups]; - int nextFlag = 1; - int colGroup = 0; - for (int j = 0; j < numCols; j++) { - if (row[j] != 0) - flags[colGroup] |= nextFlag; - nextFlag <<= 1; - if (nextFlag == 0) { - ++colGroup; - nextFlag = 1; - } - } - colGroup = 0; - int j = 0; - while (j < tempRowIndexFlags.size()) { - if (((Integer)tempRowIndexFlags.elementAt(j)).intValue() == - flags[colGroup]) { - ++colGroup; - ++j; - if (colGroup >= numColGroups) - break; - } - else if (colGroup != 0) - colGroup = 0; - else - ++j; - } - rowIndexFlagsIndex[i] = (short)(j - colGroup); - while (colGroup < numColGroups) { - tempRowIndexFlags.addElement(new Integer(flags[colGroup])); - ++colGroup; - } - } - } - rowIndexFlags = new int[tempRowIndexFlags.size()]; - for (int i = 0; i < rowIndexFlags.length; i++) - rowIndexFlags[i] = ((Integer)tempRowIndexFlags.elementAt(i)).intValue(); -System.out.println("Number of column groups = " + numColGroups); -System.out.println("Size of rowIndexFlags = " + rowIndexFlags.length); - - return tempRowIndex; - } - - private void stackRows(Vector tempTable) { -/* -System.out.print("Row:\t"); -for (int i = 0; i < numCols; i++) -System.out.print(reverseColumnMap[i] + "\t"); -System.out.println(); -for (int i = 0; i < tempTable.size(); i++) { -System.out.print(Integer.toString(i) + ":\t"); -int[] row = (int[])tempTable.elementAt(i); -for (int j = 0; j < row.length; j++) -if (row[j] != 0) System.out.print(Integer.toString(row[j]) + "\t"); -else System.out.print(".\t"); -System.out.println(); -} -*/ - - int[] tempRowIndex = buildRowIndex(tempTable); - boolean[] tbd = new boolean[tempTable.size()]; - - // now we actually go through and stack rows together - for (int i = 0; i < tempTable.size(); i++) { - if (tbd[i]) - continue; -System.out.print("Stacking, inspecting row " + i + "...\r"); -//System.out.println("Stacking, inspecting row " + i + "..."); - - int[] destRow = (int[])tempTable.elementAt(i); - - boolean[] tempFlags = new boolean[numCols]; - boolean[] filledCells = new boolean[numCols]; - for (int j = 0; j < numCols; j++) - filledCells[j] = destRow[j] != 0; - - for (int j = i + 1; destRow[numCols] < numCols && j < tempTable.size(); j++) { - if (tbd[j]) - continue; - - int[] srcRow = (int[])tempTable.elementAt(j); - if (srcRow[numCols] + destRow[numCols] > numCols) - continue; - - int maxLeftShift = -999; - int maxRightShift = 0; - for (int k = 0; k < numCols; k++) { - tempFlags[k] = srcRow[k] != 0; - if (tempFlags[k]) { - if (maxLeftShift == -999) - maxLeftShift = -k; - maxRightShift = (numCols - 1) - k; - } - } - - int shift; - for (shift = maxLeftShift; shift <= maxRightShift; shift++) { - int k; - for (k = 0; k < numCols; k++) { - if (tempFlags[k] && filledCells[k + shift]) - break; - } - if (k >= numCols) - break; - } - if (shift <= maxRightShift) { -//System.out.println("Packing row " + j + " into row " + i + " with shift = " + shift); - for (int k = 0; k < numCols; k++) { - if (tempFlags[k]) { - filledCells[k + shift] = true; - destRow[k + shift] = srcRow[k]; - ++destRow[numCols]; - } - } - tbd[j] = true; - tempRowIndex[j] = i; - rowIndexShifts[j] = (byte)shift; - } - } - } - - // finally, we squeeze out all the deleted rows - int decrementBy = 0; - for (int i = 0; i < tempRowIndex.length; i++) { - if (!tbd[i]) - tempRowIndex[i] -= decrementBy; - else - ++decrementBy; - } - rowIndex = new short[tempRowIndex.length]; - for (int i = tempRowIndex.length - 1; i >= 0; i--) { - if (tbd[i]) { - rowIndex[i] = (short)(tempRowIndex[tempRowIndex[i]]); - tempTable.removeElementAt(i); - } - else - rowIndex[i] = (short)tempRowIndex[i]; - } - } - -// private void printTable() { -// short cell; -// int populatedCells = 0; -///* -// System.out.println("Conceptual table:"); -// System.out.println(" Row: a b c d e f g h i j k l m n" -// + " o p q r s t u v w x y z ' #"); -// -// boolean[] rowPrintFlags = new boolean[rowIndex.length]; -// printConceptualTable("", 0, rowPrintFlags); -//*/ -// -// System.out.println(); -// System.out.println("Conceptual table:"); -// System.out.print(" Row:"); -// for (int i = 0; i < reverseColumnMap.length; i++) { -// System.out.print(" " + reverseColumnMap[i]); -// } -// for (int i = 0; i < rowIndex.length; i++) { -// System.out.println(); -// printNumber(i, 4); -// System.out.print(":"); -// for (int j = 0; j < numCols; j++) -// printNumber(at(i, j), 4); -// } -// System.out.println('\n'); -// -// System.out.println(); -// System.out.println("Internally stored table:"); -// System.out.print(" Row:"); -// for (int i = 0; i < reverseColumnMap.length; i++) { -// System.out.print(" " + reverseColumnMap[i]); -// } -// for (int i = 0; i < table.length; i++) { -// if (i % numCols == 0) { -// System.out.println(); -// printNumber(i / numCols, 4); -// System.out.print(":"); -// } -// cell = table[i]; -// if (cell != 0) -// populatedCells++; -// printNumber(cell, 4); -// } -// System.out.println('\n'); -// -//System.out.println("Row index:"); -//for (int i = 0; i < rowIndex.length; i++) { -// System.out.print(" " + i + " -> " + rowIndex[i]); -// if (rowIndexFlagsIndex[i] < 0) -// System.out.print(", flags = " + Integer.toBinaryString((1 << (-rowIndexFlagsIndex[i]))) + " (" + rowIndexFlagsIndex[i]); -// else -// System.out.print(", flags = " + Integer.toBinaryString(rowIndexFlags[rowIndexFlagsIndex[i]]) + " (" + rowIndexFlagsIndex[i]); -// System.out.println("), shift = " + rowIndexShifts[i]); -//} -///* -// int theoreticalMinRows = populatedCells / numCols; -// if (populatedCells % numCols != 0) -// theoreticalMinRows++; -// int oneCellRows = 0; -// for (int i = 0; i < rowIndexFlags.length; i++) { -// double temp = Math.log(rowIndexFlags[i]) / Math.log(2); -// if (temp == (int)temp) -// oneCellRows++; -// } -// -// System.out.println('\n'); -// System.out.println("Total words in input = " + totalWords); -// System.out.println("Total unique words = " + uniqueWords + ", comprising " + -// totalUniqueWordChars + " characters\n"); -// System.out.println("Number of populated cells = " + populatedCells); -// System.out.println("Total number of cells = " + (table.length)); -// System.out.println("Residency = " + ((float)populatedCells / table.length * 100) + '%'); -// System.out.println("Ratio of populated cells to unique-word characters = " + -// ((float)populatedCells / totalUniqueWordChars * 100) + '%'); -// System.out.println("Ratio of total cells to unique-word characters = " + -// ((float)table.length / totalUniqueWordChars * 100) + '%'); -// System.out.println("Number of rows = " + (table.length / numCols)); -// System.out.println("Theoretical minimum number of rows = " + theoreticalMinRows); -// System.out.println("Ratio of number of rows to theoretical minimum = " + -// ((float)(table.length / numCols) / theoreticalMinRows * 100) + '%'); -// System.out.println("Number of conceptual rows = " + rowIndex.length); -// System.out.println("Conceptual rows with only one populated cell = " + oneCellRows); -// System.out.println("Ratio of one-cell rows to total conceptual rows = " + (((float)oneCellRows) -// / rowIndex.length * 100) + '%'); -// System.out.println("Average number of populated cells in multi-cell rows = " + -// ((float)(populatedCells - oneCellRows) / (rowIndex.length - oneCellRows))); -// -// int storageUsed = table.length * 2 + rowIndex.length * 2 -// + rowIndexFlags.length * 4 + rowIndexShifts.length; -// System.out.println("Total number of bytes in table (including indexes) = " + -// storageUsed); -// System.out.println("Bytes of overhead per unique-word character = " + ((double)(storageUsed -// - (totalUniqueWordChars * 2)) / totalUniqueWordChars)); -//*/ -// } - -// private void printConceptualTable(String initialString, int state, boolean[] flags) { -// if (initialString.length() == 0) -// System.out.println("root:"); -// else -// System.out.println(initialString + ':'); -// -// if (!flags[state]) { -// flags[state] = true; -// printNumber(state, 4); -// System.out.print(":"); -// for (int i = 0; i < numCols; i++) -// printNumber(at(state, i), 4); -// System.out.println(); -// } -// -// int nextState; -// for (int i = 0; i < numCols; i++) { -// nextState = at(state, i); -// if (nextState > 0 && !flags[nextState]) { -// printNumber(nextState, 4); -// System.out.print(":"); -// for (int j = 0; j < numCols; j++) -// printNumber(at(nextState, j), 4); -// System.out.println(); -// } -// } -// for (int i = 0; i < numCols; i++) { -// nextState = at(state, i); -// if (nextState > 0 && !flags[nextState]) { -// char nextChar; -// if (nextState == 27) -// nextChar = ' '; -// else if (nextState == 26) -// nextChar = '\''; -// else -// nextChar = (char)(i + 'a'); -// flags[nextState] = true; -// printConceptualTable(initialString + nextChar, nextState, flags); -// } -// } -// } - - private void printWordList(String partialWord, int state, PrintWriter out) - throws IOException { - if (state == -1) { - System.out.println(partialWord); - if (out != null) - out.println(partialWord); - } - else { - for (int i = 0; i < numCols; i++) { - if (at(state, i) != 0) - printWordList(partialWord + reverseColumnMap[i], at(state, i), out); - } - } - } - - private void writeDictionaryFile(DataOutputStream out) throws IOException { - out.writeInt(0); // version number - - char[] columnMapIndexes = columnMap.getIndexArray(); - out.writeInt(columnMapIndexes.length); - for (int i = 0; i < columnMapIndexes.length; i++) - out.writeShort((short)columnMapIndexes[i]); - byte[] columnMapValues = columnMap.getValueArray(); - out.writeInt(columnMapValues.length); - for (int i = 0; i < columnMapValues.length; i++) - out.writeByte((byte)columnMapValues[i]); - - out.writeInt(numCols); - out.writeInt(numColGroups); - - out.writeInt(rowIndex.length); - for (int i = 0; i < rowIndex.length; i++) - out.writeShort(rowIndex[i]); - - out.writeInt(rowIndexFlagsIndex.length); - for (int i = 0; i < rowIndexFlagsIndex.length; i++) - out.writeShort(rowIndexFlagsIndex[i]); - out.writeInt(rowIndexFlags.length); - for (int i = 0; i < rowIndexFlags.length; i++) - out.writeInt(rowIndexFlags[i]); - - out.writeInt(rowIndexShifts.length); - for (int i = 0; i < rowIndexShifts.length; i++) - out.writeByte(rowIndexShifts[i]); - - out.writeInt(table.length); - for (int i = 0; i < table.length; i++) - out.writeShort(table[i]); - - out.close(); - } - -// private void printNumber(int x, int width) { -// String s = String.valueOf(x); -// if (width > s.length()) -// System.out.print(spaces.substring(0, width - s.length())); -// if (x != 0) -// System.out.print(s); -// else -// System.out.print('.'); -// } - - public final short at(int row, char ch) { - int col = columnMap.elementAt(ch); - return at(row, col); - } - - public final short at(int row, int col) { - if (cellIsPopulated(row, col)) - return internalAt(rowIndex[row], col + rowIndexShifts[row]); - else - return 0; - } - - private final boolean cellIsPopulated(int row, int col) { - if (rowIndexFlagsIndex[row] < 0) - return col == -rowIndexFlagsIndex[row]; - else { - int flags = rowIndexFlags[rowIndexFlagsIndex[row] + (col >> 5)]; - return (flags & (1 << (col & 0x1f))) != 0; - } - } - - private final short internalAt(int row, int col) { - return table[row * numCols + col]; - } - - private CompactByteArray columnMap = null; - private char[] reverseColumnMap = null; - private int numCols; - private int numColGroups; - private short[] table = null; - private short[] rowIndex = null; - private int[] rowIndexFlags = null; - private short[] rowIndexFlagsIndex = null; - private byte[] rowIndexShifts = null; - - private int totalWords = 0; - private int uniqueWords = 0; - private int totalUniqueWordChars = 0; - - //private static final String spaces = " "; -} - diff --git a/icu4j/tools/misc/src/com/ibm/icu/dev/tool/rbbi/readme.html b/icu4j/tools/misc/src/com/ibm/icu/dev/tool/rbbi/readme.html deleted file mode 100644 index 5f62a9890e1..00000000000 --- a/icu4j/tools/misc/src/com/ibm/icu/dev/tool/rbbi/readme.html +++ /dev/null @@ -1,65 +0,0 @@ - - - - - README For RBBI Tools - - - -

What Are These Tools?

-This directory contains two tools, WriteTablesToFiles, which converts -the Java  BreakIterators into .brk files for ICU4C, and -BuildDictionaryFile, which builds the binary the Thai word break -dictionary from a Unicode text file containing a list of Thai words. -The rest of this document describes how to use these tools.
-

How To Build The ICU4C BreakIterator Files

-The RuleBasedBreakIterator code was originally developed for ICU4J, and -then ported to ICU4C. For various reasons, the code which compiled the -state tables from the rule text was hard to port. Instead the -WriteTablesToFiles tool was wirtten to read in the Java data and write -the .brk files which ICU4C reads. Later the RBBI code was re-written -for ICU4C, including the ability to compile the state tables from rules -stored in text files. This means that the WriteTablesToFiles tool is -now obsolete.
-
-

How To Build The Thai Word Break Dictionary

-The Thai word berak code was developed originally for ICU4J, and then -ported to ICU4C - the dictionary builder tool was never ported, so you -have to use the Java tool to build the dictionary file for ICU4C. On -the other hand, all of the rest of the ICU locale data was developed -originally for -ICU4C, and a tool was written to covert the ICU4C locale data to Java -resource bundles for use by ICU4J. Consequently, the process of -building the Thai -word break dictionary for ICU4C and -ICU4J is a bit convoluted. Here are the steps:
-
-
    -
  1. Download and build both ICU4C and ICU4J on a Big Endian machine.
    -
  2. -
  3. Run the following command line to build the Thai dictionary file:
    -java -classpath $icu4j_root/classes -com.ibm.icu.dev.tool.rbbi.BuildDictionaryFile -$icu4j_root/src/com/ibm/icu/dev/data/thai6.ucs Unicode -$icu_root/soruce/data/brkitr/thai_dict.brk
  4. -
  5. Rebuild the ICU4C resources.
  6. -
  7. Rebuild the ICU4J ICULocaleData.jar file. (See the ICU4J readme file for -instructions)
  8. -
  9. Move ICULocaleData.jar from $icu_root/source/data/locales/java to -$icu4j_root/src/com/ibm/icu/impl/data
  10. -
  11. Build ICU4J's _resources target to unjar the new files.
    -
  12. -
-
-In the above, $icu_root is the root of your ICU4C source tree, for -example -"~/dev/icu" and $icu4j_root is the root of your ICU4J source tree, for -example "~/dev/icu4j".
-
- -