From a50418ca17186290ab6c65d27d0d4d86ba95f702 Mon Sep 17 00:00:00 2001 From: Yoshito Umaoka Date: Fri, 10 Jan 2014 16:12:09 +0000 Subject: [PATCH] ICU-10583 Fixed a minor problem in illegal lead bye handling in the UTF-8 charset recognizer. X-SVN-Rev: 34857 --- .../core/src/com/ibm/icu/text/CharsetRecog_UTF8.java | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_UTF8.java b/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_UTF8.java index 454e81c97c8..37357329b86 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_UTF8.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_UTF8.java @@ -1,6 +1,6 @@ /** ******************************************************************************* -* Copyright (C) 2005 - 2013, International Business Machines Corporation and * +* Copyright (C) 2005 - 2014, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ @@ -48,10 +48,7 @@ class CharsetRecog_UTF8 extends CharsetRecognizer { trailBytes = 3; } else { numInvalid++; - if (numInvalid > 5) { - break; - } - trailBytes = 0; + continue; } // Verify that we've got the right number of trail bytes in the sequence @@ -70,7 +67,6 @@ class CharsetRecog_UTF8 extends CharsetRecognizer { break; } } - } // Cook up some sort of confidence score, based on presense of a BOM