mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-08 06:53:45 +00:00
ICU-12450 move com.ibm.icu.dev.util.PrettyPrinter to org.unicode.cldr.util.UnicodeSetPrettyPrinter
X-SVN-Rev: 38617
This commit is contained in:
parent
c291532c83
commit
eb3db2f0f8
2 changed files with 0 additions and 328 deletions
|
@ -1,290 +0,0 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1996-2012, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
**********************************************************************
|
||||
* Author: Mark Davis
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
package com.ibm.icu.dev.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.FieldPosition;
|
||||
import java.util.Comparator;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import com.ibm.icu.impl.Utility;
|
||||
import com.ibm.icu.lang.UCharacter;
|
||||
import com.ibm.icu.text.StringTransform;
|
||||
import com.ibm.icu.text.UTF16;
|
||||
import com.ibm.icu.text.UTF16.StringComparator;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.UnicodeSetIterator;
|
||||
|
||||
/** Provides more flexible formatting of UnicodeSet patterns.
|
||||
*/
|
||||
public class PrettyPrinter {
|
||||
private static final StringComparator CODEPOINT_ORDER = new UTF16.StringComparator(true,false,0);
|
||||
private static final UnicodeSet PATTERN_WHITESPACE = (UnicodeSet) new UnicodeSet("[[:Cn:][:Default_Ignorable_Code_Point:][:patternwhitespace:]]").freeze();
|
||||
private static final UnicodeSet SORT_AT_END = (UnicodeSet) new UnicodeSet("[[:Cn:][:Cs:][:Co:][:Ideographic:]]").freeze();
|
||||
private static final UnicodeSet QUOTED_SYNTAX = (UnicodeSet) new UnicodeSet("[\\[\\]\\-\\^\\&\\\\\\{\\}\\$\\:]").addAll(PATTERN_WHITESPACE).freeze();
|
||||
|
||||
private boolean first = true;
|
||||
private StringBuffer target = new StringBuffer();
|
||||
private int firstCodePoint = -2;
|
||||
private int lastCodePoint = -2;
|
||||
private boolean compressRanges = true;
|
||||
private String lastString = "";
|
||||
private UnicodeSet toQuote = new UnicodeSet(PATTERN_WHITESPACE);
|
||||
private StringTransform quoter = null;
|
||||
|
||||
private Comparator<String> ordering;
|
||||
private Comparator<String> spaceComp;
|
||||
|
||||
public PrettyPrinter() {
|
||||
}
|
||||
|
||||
public StringTransform getQuoter() {
|
||||
return quoter;
|
||||
}
|
||||
|
||||
public PrettyPrinter setQuoter(StringTransform quoter) {
|
||||
this.quoter = quoter;
|
||||
return this; // for chaining
|
||||
}
|
||||
|
||||
public boolean isCompressRanges() {
|
||||
return compressRanges;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param compressRanges if you want abcde instead of a-e, make this false
|
||||
* @return
|
||||
*/
|
||||
public PrettyPrinter setCompressRanges(boolean compressRanges) {
|
||||
this.compressRanges = compressRanges;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Comparator<String> getOrdering() {
|
||||
return ordering;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ordering the resulting ordering of the list of characters in the pattern
|
||||
* @return
|
||||
*/
|
||||
public PrettyPrinter setOrdering(Comparator ordering) {
|
||||
this.ordering = ordering == null ? CODEPOINT_ORDER : new com.ibm.icu.impl.MultiComparator<String>(ordering, CODEPOINT_ORDER);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Comparator<String> getSpaceComparator() {
|
||||
return spaceComp;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param spaceComp if the comparison returns non-zero, then a space will be inserted between characters
|
||||
* @return this, for chaining
|
||||
*/
|
||||
public PrettyPrinter setSpaceComparator(Comparator spaceComp) {
|
||||
this.spaceComp = spaceComp;
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnicodeSet getToQuote() {
|
||||
return toQuote;
|
||||
}
|
||||
|
||||
/**
|
||||
* a UnicodeSet of extra characters to quote with \\uXXXX-style escaping (will automatically quote pattern whitespace)
|
||||
* @param toQuote
|
||||
*/
|
||||
public PrettyPrinter setToQuote(UnicodeSet toQuote) {
|
||||
if (toQuote != null) {
|
||||
toQuote = (UnicodeSet)toQuote.cloneAsThawed();
|
||||
toQuote.addAll(PATTERN_WHITESPACE);
|
||||
this.toQuote = toQuote;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the pattern for a particular set.
|
||||
* @param uset
|
||||
* @return formatted UnicodeSet
|
||||
*/
|
||||
public String format(UnicodeSet uset) {
|
||||
first = true;
|
||||
UnicodeSet putAtEnd = new UnicodeSet(uset).retainAll(SORT_AT_END); // remove all the unassigned gorp for now
|
||||
// make sure that comparison separates all strings, even canonically equivalent ones
|
||||
TreeSet<String> orderedStrings = new TreeSet<String>(ordering);
|
||||
for (UnicodeSetIterator it = new UnicodeSetIterator(uset); it.nextRange();) {
|
||||
if (it.codepoint == UnicodeSetIterator.IS_STRING) {
|
||||
orderedStrings.add(it.string);
|
||||
} else {
|
||||
for (int i = it.codepoint; i <= it.codepointEnd; ++i) {
|
||||
if (!putAtEnd.contains(i)) {
|
||||
orderedStrings.add(UTF16.valueOf(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
target.setLength(0);
|
||||
target.append("[");
|
||||
for (String item : orderedStrings) {
|
||||
appendUnicodeSetItem(item);
|
||||
}
|
||||
for (UnicodeSetIterator it = new UnicodeSetIterator(putAtEnd); it.next();) { // add back the unassigned gorp
|
||||
appendUnicodeSetItem(it.codepoint); // we know that these are only codepoints, not strings, so this is safe
|
||||
}
|
||||
flushLast();
|
||||
target.append("]");
|
||||
String sresult = target.toString();
|
||||
|
||||
// double check the results. This can be removed once we have more tests.
|
||||
// try {
|
||||
// UnicodeSet doubleCheck = new UnicodeSet(sresult);
|
||||
// if (!uset.equals(doubleCheck)) {
|
||||
// throw new IllegalStateException("Failure to round-trip in pretty-print " + uset + " => " + sresult + Utility.LINE_SEPARATOR + " source-result: " + new UnicodeSet(uset).removeAll(doubleCheck) + Utility.LINE_SEPARATOR + " result-source: " + new UnicodeSet(doubleCheck).removeAll(uset));
|
||||
// }
|
||||
// } catch (RuntimeException e) {
|
||||
// throw (RuntimeException) new IllegalStateException("Failure to round-trip in pretty-print " + uset).initCause(e);
|
||||
// }
|
||||
return sresult;
|
||||
}
|
||||
|
||||
private PrettyPrinter appendUnicodeSetItem(String s) {
|
||||
if (UTF16.hasMoreCodePointsThan(s, 1)) {
|
||||
flushLast();
|
||||
addSpaceAsNeededBefore(s);
|
||||
appendQuoted(s);
|
||||
lastString = s;
|
||||
} else {
|
||||
appendUnicodeSetItem(UTF16.charAt(s, 0));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
private void appendUnicodeSetItem(int cp) {
|
||||
if (!compressRanges)
|
||||
flushLast();
|
||||
if (cp == lastCodePoint + 1) {
|
||||
lastCodePoint = cp; // continue range
|
||||
} else { // start range
|
||||
flushLast();
|
||||
firstCodePoint = lastCodePoint = cp;
|
||||
}
|
||||
}
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private void addSpaceAsNeededBefore(String s) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else if (spaceComp != null && spaceComp.compare(s, lastString) != 0) {
|
||||
target.append(' ');
|
||||
} else {
|
||||
int cp = UTF16.charAt(s,0);
|
||||
if (!toQuote.contains(cp) && !QUOTED_SYNTAX.contains(cp)) {
|
||||
int type = UCharacter.getType(cp);
|
||||
if (type == UCharacter.NON_SPACING_MARK || type == UCharacter.ENCLOSING_MARK) {
|
||||
target.append(' ');
|
||||
} else if (type == UCharacter.SURROGATE && cp >= UTF16.TRAIL_SURROGATE_MIN_VALUE) {
|
||||
target.append(' '); // make sure we don't accidentally merge two surrogates
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void addSpaceAsNeededBefore(int codepoint) {
|
||||
addSpaceAsNeededBefore(UTF16.valueOf(codepoint));
|
||||
}
|
||||
|
||||
private void flushLast() {
|
||||
if (lastCodePoint >= 0) {
|
||||
addSpaceAsNeededBefore(firstCodePoint);
|
||||
if (firstCodePoint != lastCodePoint) {
|
||||
appendQuoted(firstCodePoint);
|
||||
if (firstCodePoint + 1 != lastCodePoint) {
|
||||
target.append('-');
|
||||
} else {
|
||||
addSpaceAsNeededBefore(lastCodePoint);
|
||||
}
|
||||
}
|
||||
appendQuoted(lastCodePoint);
|
||||
lastString = UTF16.valueOf(lastCodePoint);
|
||||
firstCodePoint = lastCodePoint = -2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void appendQuoted(String s) {
|
||||
if (toQuote.containsSome(s) && quoter != null) {
|
||||
target.append(quoter.transform(s));
|
||||
} else {
|
||||
int cp;
|
||||
target.append("{");
|
||||
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
|
||||
appendQuoted(cp = UTF16.charAt(s, i));
|
||||
}
|
||||
target.append("}");
|
||||
}
|
||||
}
|
||||
|
||||
PrettyPrinter appendQuoted(int codePoint) {
|
||||
if (toQuote.contains(codePoint)) {
|
||||
if (quoter != null) {
|
||||
target.append(quoter.transform(UTF16.valueOf(codePoint)));
|
||||
return this;
|
||||
}
|
||||
if (codePoint > 0xFFFF) {
|
||||
target.append("\\U");
|
||||
target.append(Utility.hex(codePoint,8));
|
||||
} else {
|
||||
target.append("\\u");
|
||||
target.append(Utility.hex(codePoint,4));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
switch (codePoint) {
|
||||
case '[': // SET_OPEN:
|
||||
case ']': // SET_CLOSE:
|
||||
case '-': // HYPHEN:
|
||||
case '^': // COMPLEMENT:
|
||||
case '&': // INTERSECTION:
|
||||
case '\\': //BACKSLASH:
|
||||
case '{':
|
||||
case '}':
|
||||
case '$':
|
||||
case ':':
|
||||
target.append('\\');
|
||||
break;
|
||||
default:
|
||||
// Escape whitespace
|
||||
if (PATTERN_WHITESPACE.contains(codePoint)) {
|
||||
target.append('\\');
|
||||
}
|
||||
break;
|
||||
}
|
||||
UTF16.append(target, codePoint);
|
||||
return this;
|
||||
}
|
||||
// Appender append(String s) {
|
||||
// target.append(s);
|
||||
// return this;
|
||||
// }
|
||||
// public String toString() {
|
||||
// return target.toString();
|
||||
// }
|
||||
|
||||
public Appendable format(UnicodeSet obj, Appendable toAppendTo, FieldPosition pos) {
|
||||
try {
|
||||
return toAppendTo.append(format(obj));
|
||||
} catch (IOException e) {
|
||||
throw new IllegalArgumentException(e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,38 +0,0 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (c) 2009-2012, Google, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
package com.ibm.icu.dev.test.translit;
|
||||
|
||||
import java.text.Collator;
|
||||
import java.util.Locale;
|
||||
|
||||
import com.ibm.icu.dev.test.TestFmwk;
|
||||
import com.ibm.icu.dev.util.PrettyPrinter;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
public class PrettyPrinterTest extends TestFmwk {
|
||||
public static void main(String[] args) throws Exception {
|
||||
new PrettyPrinterTest().run(args);
|
||||
}
|
||||
public static final UnicodeSet TO_QUOTE = new UnicodeSet("[[:z:][:me:][:mn:][:di:][:c:]-[\u0020]]");
|
||||
|
||||
public void TestBasicUnicodeSet() {
|
||||
|
||||
Collator spaceComp = Collator.getInstance(Locale.ENGLISH);
|
||||
spaceComp.setStrength(Collator.PRIMARY);
|
||||
|
||||
final PrettyPrinter PRETTY_PRINTER = new PrettyPrinter()
|
||||
.setOrdering(Collator.getInstance(Locale.ENGLISH))
|
||||
.setSpaceComparator(spaceComp)
|
||||
.setToQuote(TO_QUOTE);
|
||||
|
||||
UnicodeSet expected = new UnicodeSet("[:L:]");
|
||||
String formatted = PRETTY_PRINTER.format(expected);
|
||||
logln(formatted);
|
||||
UnicodeSet actual = new UnicodeSet(formatted);
|
||||
assertEquals("PrettyPrinter preserves meaning", expected, actual);
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue