mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-10583 Fixed a minor problem in illegal lead bye handling in the UTF-8 charset recognizer.
X-SVN-Rev: 34857
This commit is contained in:
parent
13c6750194
commit
a50418ca17
1 changed files with 2 additions and 6 deletions
|
@ -1,6 +1,6 @@
|
|||
/**
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2005 - 2013, International Business Machines Corporation and *
|
||||
* Copyright (C) 2005 - 2014, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
@ -48,10 +48,7 @@ class CharsetRecog_UTF8 extends CharsetRecognizer {
|
|||
trailBytes = 3;
|
||||
} else {
|
||||
numInvalid++;
|
||||
if (numInvalid > 5) {
|
||||
break;
|
||||
}
|
||||
trailBytes = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Verify that we've got the right number of trail bytes in the sequence
|
||||
|
@ -70,7 +67,6 @@ class CharsetRecog_UTF8 extends CharsetRecognizer {
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Cook up some sort of confidence score, based on presense of a BOM
|
||||
|
|
Loading…
Add table
Reference in a new issue