From a18c8f83e3adbfd70b6b229c2afdb3f05e3586a6 Mon Sep 17 00:00:00 2001 From: Victor Chang Date: Fri, 12 May 2023 16:01:56 +0100 Subject: [PATCH] ICU-22390 Speed-up ICU4J Transliterator. It reduces the method runtime by approx. 60%. --- .../ibm/icu/impl/ICUResourceBundleReader.java | 12 ++++++------ .../com/ibm/icu/text/AnyTransliterator.java | 19 ++++++++++--------- .../ibm/icu/text/TransliteratorRegistry.java | 10 +++++----- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java index 746b1d9c1ac..7800f904c59 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/ICUResourceBundleReader.java @@ -14,6 +14,7 @@ import java.lang.ref.SoftReference; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.IntBuffer; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import com.ibm.icu.util.ICUException; @@ -444,13 +445,12 @@ public final class ICUResourceBundleReader { } private static String makeKeyStringFromBytes(byte[] keyBytes, int keyOffset) { - StringBuilder sb = new StringBuilder(); - byte b; - while((b = keyBytes[keyOffset]) != 0) { - ++keyOffset; - sb.append((char)b); + int end = keyOffset; + while(keyBytes[end] != 0) { + ++end; } - return sb.toString(); + int len = end - keyOffset; + return new String(keyBytes, keyOffset, len, StandardCharsets.ISO_8859_1); } private String getKey16String(int keyOffset) { if(keyOffset < localKeyLimit) { diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/AnyTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/AnyTransliterator.java index d7fbcb3b3f9..b051a6798e7 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/AnyTransliterator.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/AnyTransliterator.java @@ -52,6 +52,12 @@ class AnyTransliterator extends Transliterator { static final String NULL_ID = "Null"; static final String LATIN_PIVOT = "-Latin;Latin-"; + /** + * Special code for handling width characters + */ + private static final Transliterator WIDTH_FIX = + Transliterator.getInstance("[[:dt=Nar:][:dt=Wide:]] nfkd"); + /** * Cache mapping UScriptCode values to Transliterator*. */ @@ -67,11 +73,6 @@ class AnyTransliterator extends Transliterator { */ private int targetScript; - /** - * Special code for handling width characters - */ - private Transliterator widthFix = Transliterator.getInstance("[[:dt=Nar:][:dt=Wide:]] nfkd"); - /** * Implements {@link Transliterator#handleTransliterate}. */ @@ -176,7 +177,7 @@ class AnyTransliterator extends Transliterator { if (isWide(targetScript)) { return null; } else { - return widthFix; + return WIDTH_FIX; } } @@ -201,7 +202,7 @@ class AnyTransliterator extends Transliterator { if (t != null) { if (!isWide(targetScript)) { List v = new ArrayList(); - v.add(widthFix); + v.add(WIDTH_FIX); v.add(t); t = new CompoundTransliterator(v); } @@ -210,7 +211,7 @@ class AnyTransliterator extends Transliterator { t = prevCachedT; } } else if (!isWide(targetScript)) { - return widthFix; + return WIDTH_FIX; } } @@ -408,7 +409,7 @@ class AnyTransliterator extends Transliterator { if (filter != null && filter instanceof UnicodeSet) { filter = new UnicodeSet((UnicodeSet)filter); } - return new AnyTransliterator(getID(), filter, target, targetScript, widthFix, cache); + return new AnyTransliterator(getID(), filter, target, targetScript, WIDTH_FIX, cache); } /* (non-Javadoc) diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorRegistry.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorRegistry.java index b39d8353c97..be6bda49630 100644 --- a/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorRegistry.java +++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/TransliteratorRegistry.java @@ -16,11 +16,13 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Enumeration; import java.util.HashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.MissingResourceException; import java.util.ResourceBundle; +import java.util.Set; import com.ibm.icu.impl.ICUData; import com.ibm.icu.impl.ICUResourceBundle; @@ -69,7 +71,7 @@ class TransliteratorRegistry { /** * Vector of public full IDs (CaseInsensitiveString objects). */ - private List availableIDs; + private final Set availableIDs; //---------------------------------------------------------------------- // class Spec @@ -293,7 +295,7 @@ class TransliteratorRegistry { public TransliteratorRegistry() { registry = Collections.synchronizedMap(new HashMap()); specDAG = Collections.synchronizedMap(new HashMap>>()); - availableIDs = new ArrayList(); + availableIDs = new LinkedHashSet<>(); } /** @@ -520,9 +522,7 @@ class TransliteratorRegistry { registry.put(ciID, arrayOfObj); if (visible) { registerSTV(source, target, variant); - if (!availableIDs.contains(ciID)) { - availableIDs.add(ciID); - } + availableIDs.add(ciID); } else { removeSTV(source, target, variant); availableIDs.remove(ciID);