ICU-12450 move com.ibm.icu.dev.util.PrettyPrinter to org.unicode.cldr.util.UnicodeSetPrettyPrinter

X-SVN-Rev: 38617
2025-04-08 06:53:45 +00:00 · 2016-04-14 22:29:35 +00:00 · 2016-04-14 22:29:35 +00:00 · eb3db2f0f8
commit eb3db2f0f8
parent c291532c83
2 changed files with 0 additions and 328 deletions
--- a/icu4j/main/tests/framework/src/com/ibm/icu/dev/util/PrettyPrinter.java
+++ b/icu4j/main/tests/framework/src/com/ibm/icu/dev/util/PrettyPrinter.java
@ -1,290 +0,0 @@
-/**
- *******************************************************************************
- * Copyright (C) 1996-2012, International Business Machines Corporation and    *
- * others. All Rights Reserved.                                                *
- **********************************************************************
- * Author: Mark Davis
- **********************************************************************
- */
-
-package com.ibm.icu.dev.util;
-
-import java.io.IOException;
-import java.text.FieldPosition;
-import java.util.Comparator;
-import java.util.TreeSet;
-
-import com.ibm.icu.impl.Utility;
-import com.ibm.icu.lang.UCharacter;
-import com.ibm.icu.text.StringTransform;
-import com.ibm.icu.text.UTF16;
-import com.ibm.icu.text.UTF16.StringComparator;
-import com.ibm.icu.text.UnicodeSet;
-import com.ibm.icu.text.UnicodeSetIterator;
-
-/** Provides more flexible formatting of UnicodeSet patterns.
- */
-public class PrettyPrinter {
-    private static final StringComparator CODEPOINT_ORDER = new UTF16.StringComparator(true,false,0);
-    private static final UnicodeSet PATTERN_WHITESPACE = (UnicodeSet) new UnicodeSet("[[:Cn:][:Default_Ignorable_Code_Point:][:patternwhitespace:]]").freeze();
-    private static final UnicodeSet SORT_AT_END = (UnicodeSet) new UnicodeSet("[[:Cn:][:Cs:][:Co:][:Ideographic:]]").freeze();
-    private static final UnicodeSet QUOTED_SYNTAX = (UnicodeSet) new UnicodeSet("[\\[\\]\\-\\^\\&\\\\\\{\\}\\$\\:]").addAll(PATTERN_WHITESPACE).freeze();
-
-    private boolean first = true;
-    private StringBuffer target = new StringBuffer();
-    private int firstCodePoint = -2;
-    private int lastCodePoint = -2;
-    private boolean compressRanges = true;
-    private String lastString = "";
-    private UnicodeSet toQuote = new UnicodeSet(PATTERN_WHITESPACE);
-    private StringTransform quoter = null;
-
-    private Comparator<String> ordering;
-    private Comparator<String> spaceComp;
-
-    public PrettyPrinter() {
-    }
-
-    public StringTransform getQuoter() {
-        return quoter;
-    }
-
-    public PrettyPrinter setQuoter(StringTransform quoter) {
-        this.quoter = quoter;
-        return this; // for chaining
-    }
-
-    public boolean isCompressRanges() {
-        return compressRanges;
-    }
-
-    /**
-     * @param compressRanges if you want abcde instead of a-e, make this false
-     * @return
-     */
-    public PrettyPrinter setCompressRanges(boolean compressRanges) {
-        this.compressRanges = compressRanges;
-        return this;
-    }
-
-    public Comparator<String> getOrdering() {
-        return ordering;
-    }
-
-    /**
-     * @param ordering the resulting  ordering of the list of characters in the pattern
-     * @return
-     */
-    public PrettyPrinter setOrdering(Comparator ordering) {
-        this.ordering = ordering == null ? CODEPOINT_ORDER : new com.ibm.icu.impl.MultiComparator<String>(ordering, CODEPOINT_ORDER);
-        return this;
-    }
-
-    public Comparator<String> getSpaceComparator() {
-        return spaceComp;
-    }
-
-    /**
-     * @param spaceComp if the comparison returns non-zero, then a space will be inserted between characters
-     * @return this, for chaining
-     */
-    public PrettyPrinter setSpaceComparator(Comparator spaceComp) {
-        this.spaceComp = spaceComp;
-        return this;
-    }
-
-    public UnicodeSet getToQuote() {
-        return toQuote;
-    }
-
-    /**
-     * a UnicodeSet of extra characters to quote with \\uXXXX-style escaping (will automatically quote pattern whitespace)
-     * @param toQuote
-     */
-    public PrettyPrinter setToQuote(UnicodeSet toQuote) {
-        if (toQuote != null) {
-            toQuote = (UnicodeSet)toQuote.cloneAsThawed();
-            toQuote.addAll(PATTERN_WHITESPACE);
-            this.toQuote = toQuote;
-        }
-        return this;
-    }
-
-
-    /**
-     * Get the pattern for a particular set.
-     * @param uset
-     * @return formatted UnicodeSet
-     */
-    public String format(UnicodeSet uset) {
-        first = true;
-        UnicodeSet putAtEnd = new UnicodeSet(uset).retainAll(SORT_AT_END); // remove all the unassigned gorp for now
-        // make sure that comparison separates all strings, even canonically equivalent ones
-        TreeSet<String> orderedStrings = new TreeSet<String>(ordering);
-        for (UnicodeSetIterator it = new UnicodeSetIterator(uset); it.nextRange();) {
-            if (it.codepoint == UnicodeSetIterator.IS_STRING) {
-                orderedStrings.add(it.string);
-            } else {
-                for (int i = it.codepoint; i <= it.codepointEnd; ++i) {
-                    if (!putAtEnd.contains(i)) {
-                        orderedStrings.add(UTF16.valueOf(i));
-                    }
-                }
-            }
-        }
-        target.setLength(0);
-        target.append("[");
-        for (String item : orderedStrings) {
-            appendUnicodeSetItem(item);
-        }
-        for (UnicodeSetIterator it = new UnicodeSetIterator(putAtEnd); it.next();) { // add back the unassigned gorp
-            appendUnicodeSetItem(it.codepoint); // we know that these are only codepoints, not strings, so this is safe
-        }
-        flushLast();
-        target.append("]");
-        String sresult = target.toString();
-
-        // double check the results. This can be removed once we have more tests.
-        //        try {
-        //            UnicodeSet  doubleCheck = new UnicodeSet(sresult);
-        //            if (!uset.equals(doubleCheck)) {
-        //                throw new IllegalStateException("Failure to round-trip in pretty-print " + uset + " => " + sresult + Utility.LINE_SEPARATOR + " source-result: " + new UnicodeSet(uset).removeAll(doubleCheck) +  Utility.LINE_SEPARATOR + " result-source: " + new UnicodeSet(doubleCheck).removeAll(uset));
-        //            }
-        //        } catch (RuntimeException e) {
-        //            throw (RuntimeException) new IllegalStateException("Failure to round-trip in pretty-print " + uset).initCause(e);
-        //        }
-        return sresult;
-    }
-
-    private PrettyPrinter appendUnicodeSetItem(String s) {
-        if (UTF16.hasMoreCodePointsThan(s, 1)) {
-            flushLast();
-            addSpaceAsNeededBefore(s);
-            appendQuoted(s);
-            lastString = s;
-        } else {
-            appendUnicodeSetItem(UTF16.charAt(s, 0));
-        }
-        return this;
-    }
-
-    private void appendUnicodeSetItem(int cp) {
-        if (!compressRanges)
-            flushLast();
-        if (cp == lastCodePoint + 1) {
-            lastCodePoint = cp; // continue range
-        } else { // start range
-            flushLast();
-            firstCodePoint = lastCodePoint = cp;
-        }
-    }
-    /**
-     * 
-     */
-    private void addSpaceAsNeededBefore(String s) {
-        if (first) {
-            first = false;
-        } else if (spaceComp != null && spaceComp.compare(s, lastString) != 0) {
-            target.append(' ');
-        } else {
-            int cp = UTF16.charAt(s,0);
-            if (!toQuote.contains(cp) && !QUOTED_SYNTAX.contains(cp)) {
-                int type = UCharacter.getType(cp);
-                if (type == UCharacter.NON_SPACING_MARK || type == UCharacter.ENCLOSING_MARK) {
-                    target.append(' ');
-                } else if (type == UCharacter.SURROGATE && cp >= UTF16.TRAIL_SURROGATE_MIN_VALUE) {
-                    target.append(' '); // make sure we don't accidentally merge two surrogates
-                }
-            }
-        }
-    }
-
-    private void addSpaceAsNeededBefore(int codepoint) {
-        addSpaceAsNeededBefore(UTF16.valueOf(codepoint));
-    }
-
-    private void flushLast() {
-        if (lastCodePoint >= 0) {
-            addSpaceAsNeededBefore(firstCodePoint);
-            if (firstCodePoint != lastCodePoint) {
-                appendQuoted(firstCodePoint);
-                if (firstCodePoint + 1 != lastCodePoint) {
-                    target.append('-');
-                } else {
-                    addSpaceAsNeededBefore(lastCodePoint);
-                }
-            }
-            appendQuoted(lastCodePoint);
-            lastString = UTF16.valueOf(lastCodePoint);
-            firstCodePoint = lastCodePoint = -2;
-        }
-    }
-
-
-    private void appendQuoted(String s) {
-        if (toQuote.containsSome(s) && quoter != null) {
-            target.append(quoter.transform(s));
-        } else {
-            int cp;
-            target.append("{");
-            for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
-                appendQuoted(cp = UTF16.charAt(s, i));
-            }
-            target.append("}");
-        }
-    }
-
-    PrettyPrinter appendQuoted(int codePoint) {
-        if (toQuote.contains(codePoint)) {
-            if (quoter != null) {
-                target.append(quoter.transform(UTF16.valueOf(codePoint)));
-                return this;
-            }
-            if (codePoint > 0xFFFF) {
-                target.append("\\U");
-                target.append(Utility.hex(codePoint,8));
-            } else {
-                target.append("\\u");
-                target.append(Utility.hex(codePoint,4));                    
-            }
-            return this;
-        }
-        switch (codePoint) {
-        case '[': // SET_OPEN:
-        case ']': // SET_CLOSE:
-        case '-': // HYPHEN:
-        case '^': // COMPLEMENT:
-        case '&': // INTERSECTION:
-        case '\\': //BACKSLASH:
-        case '{':
-        case '}':
-        case '$':
-        case ':':
-            target.append('\\');
-            break;
-        default:
-            // Escape whitespace
-            if (PATTERN_WHITESPACE.contains(codePoint)) {
-                target.append('\\');
-            }
-            break;
-        }
-        UTF16.append(target, codePoint);
-        return this;
-    }        
-    //  Appender append(String s) {
-    //  target.append(s);
-    //  return this;
-    //  }
-    //  public String toString() {
-    //  return target.toString();
-    //  }
-
-    public Appendable format(UnicodeSet obj, Appendable toAppendTo, FieldPosition pos) {
-        try {
-            return toAppendTo.append(format(obj));
-        } catch (IOException e) {
-            throw new IllegalArgumentException(e);
-        }
-    }
-}
--- a/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/PrettyPrinterTest.java
+++ b/icu4j/main/tests/translit/src/com/ibm/icu/dev/test/translit/PrettyPrinterTest.java
@ -1,38 +0,0 @@
-/*
- **********************************************************************
- * Copyright (c) 2009-2012, Google, International Business Machines
- * Corporation and others.  All Rights Reserved.
- **********************************************************************
- */
-package com.ibm.icu.dev.test.translit;
-
-import java.text.Collator;
-import java.util.Locale;
-
-import com.ibm.icu.dev.test.TestFmwk;
-import com.ibm.icu.dev.util.PrettyPrinter;
-import com.ibm.icu.text.UnicodeSet;
-
-public class PrettyPrinterTest extends TestFmwk {
-    public static void main(String[] args) throws Exception {
-        new PrettyPrinterTest().run(args);
-    }
-    public static final UnicodeSet TO_QUOTE = new UnicodeSet("[[:z:][:me:][:mn:][:di:][:c:]-[\u0020]]");
-
-    public void TestBasicUnicodeSet() {
-
-        Collator spaceComp = Collator.getInstance(Locale.ENGLISH);
-        spaceComp.setStrength(Collator.PRIMARY);
-        
-        final PrettyPrinter PRETTY_PRINTER = new PrettyPrinter()
-        .setOrdering(Collator.getInstance(Locale.ENGLISH))
-        .setSpaceComparator(spaceComp)
-        .setToQuote(TO_QUOTE);
-
-        UnicodeSet expected = new UnicodeSet("[:L:]");
-        String formatted = PRETTY_PRINTER.format(expected);
-        logln(formatted);
-        UnicodeSet actual = new UnicodeSet(formatted);
-        assertEquals("PrettyPrinter preserves meaning", expected, actual);
-    }
-}