From ed4a0639ba50b9c498e13a8f9ecd665d113f2df4 Mon Sep 17 00:00:00 2001 From: Eric Mader Date: Fri, 20 May 2005 22:33:10 +0000 Subject: [PATCH] ICU-4060 Implement CharsetMatch.getReader(), CharsetMatch.getString(), CharsetDetector.getReader(), CharsetDetector.getString(), relationship between ISO-8859-x and Windows-125x. X-SVN-Rev: 17637 --- .../src/com/ibm/icu/text/CharsetDetector.java | 31 +++++++++++++++++-- icu4j/src/com/ibm/icu/text/CharsetMatch.java | 31 +++++++++++++++++-- .../com/ibm/icu/text/CharsetRecog_sbcs.java | 17 +++++----- 3 files changed, 64 insertions(+), 15 deletions(-) diff --git a/icu4j/src/com/ibm/icu/text/CharsetDetector.java b/icu4j/src/com/ibm/icu/text/CharsetDetector.java index ae034ba9ec3..3a145d439e6 100644 --- a/icu4j/src/com/ibm/icu/text/CharsetDetector.java +++ b/icu4j/src/com/ibm/icu/text/CharsetDetector.java @@ -212,7 +212,15 @@ public class CharsetDetector { * or null or an empty string if none is available. */ public Reader getReader(InputStream in, String declaredEncoding) { - return null; + fDeclaredEncoding = declaredEncoding; + + try { + setText(in); + + return detect().getReader(); + } catch (IOException e) { + return null; + } } /** @@ -231,7 +239,14 @@ public class CharsetDetector { */ public String getString(byte[] in, String declaredEncoding) { - return null; + fDeclaredEncoding = declaredEncoding; + + try { + setText(in); + return detect().getString(-1); + } catch (IOException e) { + return null; + } } @@ -343,7 +358,14 @@ public class CharsetDetector { for (srci=0; srci= 0) { + sb.append(buffer, 0, bytesRead); + max -= bytesRead; + } + + reader.close(); + + return sb.toString(); } else { result = new String(fRawInput, getName()); } diff --git a/icu4j/src/com/ibm/icu/text/CharsetRecog_sbcs.java b/icu4j/src/com/ibm/icu/text/CharsetRecog_sbcs.java index 69d42b80540..b7fe00f428c 100644 --- a/icu4j/src/com/ibm/icu/text/CharsetRecog_sbcs.java +++ b/icu4j/src/com/ibm/icu/text/CharsetRecog_sbcs.java @@ -148,17 +148,14 @@ public abstract class CharsetRecog_sbcs extends CharsetRecognizer { } } - private int byteIndex; - - void reset() - { - byteIndex = 0; - } + protected boolean haveC1Bytes = false; int match(CharsetDetector det, int[] ngrams, byte[] byteMap) { NGramParser parser = new NGramParser(ngrams, byteMap); + haveC1Bytes = det.fC1Bytes; + return parser.parse(det); } @@ -201,7 +198,7 @@ public abstract class CharsetRecog_sbcs extends CharsetRecognizer { public String getName() { - return "ISO-8859-1"; + return haveC1Bytes? "windows-1252" : "ISO-8859-1"; } } @@ -444,7 +441,7 @@ public abstract class CharsetRecog_sbcs extends CharsetRecognizer { public String getName() { - return "ISO-8859-2"; + return haveC1Bytes? "windows-1250" : "ISO-8859-2"; } } @@ -630,7 +627,7 @@ public abstract class CharsetRecog_sbcs extends CharsetRecognizer { public String getName() { - return "ISO-8859-7"; + return haveC1Bytes? "windows-1253" : "ISO-8859-7"; } } @@ -693,7 +690,7 @@ public abstract class CharsetRecog_sbcs extends CharsetRecognizer { public String getName() { - return "ISO-8859-9"; + return haveC1Bytes? "windows-1254" : "ISO-8859-9"; } }