From 035d116c8bd48483681251aebb3277ebb3ecf698 Mon Sep 17 00:00:00 2001 From: Doug Felt Date: Mon, 10 Apr 2006 21:54:40 +0000 Subject: [PATCH] ICU-5156 perf enhancements X-SVN-Rev: 19526 --- .../src/com/ibm/icu/util/StringTokenizer.java | 79 +++++++++++++++---- 1 file changed, 62 insertions(+), 17 deletions(-) diff --git a/icu4j/src/com/ibm/icu/util/StringTokenizer.java b/icu4j/src/com/ibm/icu/util/StringTokenizer.java index 00fcfc584bd..5418443af5d 100644 --- a/icu4j/src/com/ibm/icu/util/StringTokenizer.java +++ b/icu4j/src/com/ibm/icu/util/StringTokenizer.java @@ -233,6 +233,7 @@ public final class StringTokenizer implements Enumeration if (delim != null && delim.length() > 0) { m_delimiters_ = new UnicodeSet(); m_delimiters_.addAll(delim); + checkDelimiters(); } m_coalesceDelimiters_ = coalescedelims; m_source_ = str; @@ -318,7 +319,10 @@ public final class StringTokenizer implements Enumeration if (m_returnDelimiters_) { int tokenlimit = 0; int c = UTF16.charAt(m_source_, m_nextOffset_); - if (m_delimiters_.contains(c)) { + boolean contains = delims == null + ? m_delimiters_.contains(c) + : c < delims.length && delims[c]; + if (contains) { if (m_coalesceDelimiters_) { tokenlimit = getNextNonDelimiter(m_nextOffset_); } else { @@ -417,6 +421,7 @@ public final class StringTokenizer implements Enumeration public String nextToken(UnicodeSet delim) { m_delimiters_ = delim; + checkDelimiters(); m_tokenOffset_ = -1; m_tokenSize_ = -1; if (!m_returnDelimiters_) { @@ -490,8 +495,11 @@ public final class StringTokenizer implements Enumeration } m_tokenStart_[result] = m_nextOffset_; if (m_returnDelimiters_) { - if (m_delimiters_.contains(UTF16.charAt(m_source_, - m_nextOffset_))) { + int c = UTF16.charAt(m_source_, m_nextOffset_); + boolean contains = delims == null + ? m_delimiters_.contains(c) + : c < delims.length && delims[c]; + if (contains) { m_tokenLimit_[result] = getNextNonDelimiter( m_nextOffset_); } @@ -592,13 +600,23 @@ public final class StringTokenizer implements Enumeration if (offset >= 0) { int result = offset; int c = 0; - do { - c = UTF16.charAt(m_source_, result); - if (m_delimiters_.contains(c)) { - break; - } - result ++; - } while (result < m_length_); + if (delims == null) { + do { + c = UTF16.charAt(m_source_, result); + if (m_delimiters_.contains(c)) { + break; + } + result ++; + } while (result < m_length_); + } else { + do { + c = UTF16.charAt(m_source_, result); + if (c < delims.length && delims[c]) { + break; + } + result ++; + } while (result < m_length_); + } if (result < m_length_) { return result; } @@ -618,17 +636,44 @@ public final class StringTokenizer implements Enumeration if (offset >= 0) { int result = offset; int c = 0; - do { - c = UTF16.charAt(m_source_, result); - if (!m_delimiters_.contains(c)) { - break; - } - result ++; - } while (result < m_length_); + if (delims == null) { + do { + c = UTF16.charAt(m_source_, result); + if (!m_delimiters_.contains(c)) { + break; + } + result ++; + } while (result < m_length_); + } else { + do { + c = UTF16.charAt(m_source_, result); + if (!(c < delims.length && delims[c])) { + break; + } + result ++; + } while (result < m_length_); + } if (result < m_length_) { return result; } } return -1 - m_length_; } + + void checkDelimiters() { + if (m_delimiters_ == null || m_delimiters_.size() == 0) { + delims = new boolean[0]; + } else { + int maxChar = m_delimiters_.getRangeEnd(m_delimiters_.getRangeCount()-1); + if (maxChar < 0x7f) { + delims = new boolean[maxChar+1]; + for (int i = 0, ch; -1 != (ch = m_delimiters_.charAt(i)); ++i) { + delims[ch] = true; + } + } else { + delims = null; + } + } + } + private boolean[] delims; }