ICU-10012 Disable EBCDIC Arabic/Hebrew detectors by default. Added APIs to enable/disable each charset and get currently active charsets.

X-SVN-Rev: 34350
2025-04-14 17:24:01 +00:00 · 2013-09-17 06:57:53 +00:00 · 2013-09-17 06:57:53 +00:00 · 920dadff8d
commit 920dadff8d
parent 7b6fa12efe
5 changed files with 326 additions and 59 deletions
--- a/icu4c/source/i18n/csdetect.cpp
+++ b/icu4c/source/i18n/csdetect.cpp
@ -32,7 +32,21 @@
 #define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
 #define DELETE_ARRAY(array) uprv_free((void *) (array))

-static icu::CharsetRecognizer **fCSRecognizers = NULL;
+U_NAMESPACE_BEGIN
+
+struct CSRecognizerInfo : public UMemory {
+    CSRecognizerInfo(CharsetRecognizer *recognizer, UBool isDefaultEnabled)
+        : recognizer(recognizer), isDefaultEnabled(isDefaultEnabled) {};
+
+    ~CSRecognizerInfo() {delete recognizer;};
+
+    CharsetRecognizer *recognizer;
+    UBool isDefaultEnabled;
+};
+
+U_NAMESPACE_END
+
+static icu::CSRecognizerInfo **fCSRecognizers = NULL;
 static icu::UInitOnce gCSRecognizersInitOnce;
 static int32_t fCSRecognizers_size = 0;

@ -70,47 +84,48 @@ charsetMatchComparator(const void * /*context*/, const void *left, const void *r
 static void U_CALLCONV initRecognizers(UErrorCode &status) {
    U_NAMESPACE_USE
    ucln_i18n_registerCleanup(UCLN_I18N_CSDET, csdet_cleanup);
-    CharsetRecognizer *tempArray[] = {
-        new CharsetRecog_UTF8(),
+    CSRecognizerInfo *tempArray[] = {
+        new CSRecognizerInfo(new CharsetRecog_UTF8(), TRUE),

-        new CharsetRecog_UTF_16_BE(),
-        new CharsetRecog_UTF_16_LE(),
-        new CharsetRecog_UTF_32_BE(),
-        new CharsetRecog_UTF_32_LE(),
+        new CSRecognizerInfo(new CharsetRecog_UTF_16_BE(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_UTF_16_LE(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_UTF_32_BE(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_UTF_32_LE(), TRUE),

-        new CharsetRecog_8859_1(),
-        new CharsetRecog_8859_2(),
-        new CharsetRecog_8859_5_ru(),
-        new CharsetRecog_8859_6_ar(),
-        new CharsetRecog_8859_7_el(),
-        new CharsetRecog_8859_8_I_he(),
-        new CharsetRecog_8859_8_he(),
-        new CharsetRecog_windows_1251(),
-        new CharsetRecog_windows_1256(),
-        new CharsetRecog_KOI8_R(),
-        new CharsetRecog_8859_9_tr(),
-        new CharsetRecog_sjis(),
-        new CharsetRecog_gb_18030(),
-        new CharsetRecog_euc_jp(),
-        new CharsetRecog_euc_kr(),
-        new CharsetRecog_big5(),
+        new CSRecognizerInfo(new CharsetRecog_8859_1(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_2(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_5_ru(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_6_ar(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_7_el(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_8_I_he(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_8_he(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_windows_1251(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_windows_1256(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_KOI8_R(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_8859_9_tr(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_sjis(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_gb_18030(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_euc_jp(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_euc_kr(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),

-        new CharsetRecog_2022JP(),
-        new CharsetRecog_2022KR(),
-        new CharsetRecog_2022CN(),
+        new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),
+        new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),

-        new CharsetRecog_IBM424_he_rtl(),
-        new CharsetRecog_IBM424_he_ltr(),
-        new CharsetRecog_IBM420_ar_rtl(),
-        new CharsetRecog_IBM420_ar_ltr()
+        new CSRecognizerInfo(new CharsetRecog_IBM424_he_rtl(), FALSE),
+        new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),
+        new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),
+        new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)
    };
    int32_t rCount = ARRAY_SIZE(tempArray);

-    fCSRecognizers = NEW_ARRAY(CharsetRecognizer *, rCount);
+    fCSRecognizers = NEW_ARRAY(CSRecognizerInfo *, rCount);

    if (fCSRecognizers == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
-    } else {
+    } 
+    else {
        fCSRecognizers_size = rCount;
        for (int32_t r = 0; r < rCount; r += 1) {
            fCSRecognizers[r] = tempArray[r];
@ -132,7 +147,8 @@ void CharsetDetector::setRecognizers(UErrorCode &status)

 CharsetDetector::CharsetDetector(UErrorCode &status)
  : textIn(new InputText(status)), resultArray(NULL),
-    resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE)
+    resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE),
+    fEnabledRecognizers(NULL)
 {
    if (U_FAILURE(status)) {
        return;
@ -170,6 +186,10 @@ CharsetDetector::~CharsetDetector()
    }

    uprv_free(resultArray);
+
+    if (fEnabledRecognizers) {
+        uprv_free(fEnabledRecognizers);
+    }
 }

 void CharsetDetector::setText(const char *in, int32_t len)
@ -234,7 +254,7 @@ const CharsetMatch * const *CharsetDetector::detectAll(int32_t &maxMatchesFound,
        // give a match quality > 0.
        resultCount = 0;
        for (i = 0; i < fCSRecognizers_size; i += 1) {
-            csr = fCSRecognizers[i];
+            csr = fCSRecognizers[i]->recognizer;
            if (csr->match(textIn, resultArray[resultCount])) {
                resultCount++;
            }
@ -251,6 +271,46 @@ const CharsetMatch * const *CharsetDetector::detectAll(int32_t &maxMatchesFound,
    return resultArray;
 }

+void CharsetDetector::setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status)
+{
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    int32_t modIdx = -1;
+    UBool isDefaultVal = FALSE;
+    for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+        CSRecognizerInfo *csrinfo = fCSRecognizers[i];
+        if (uprv_strcmp(csrinfo->recognizer->getName(), encoding) == 0) {
+            modIdx = i;
+            isDefaultVal = (csrinfo->isDefaultEnabled == enabled);
+            break;
+        }
+    }
+    if (modIdx < 0) {
+        // No matching encoding found
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+
+    if (fEnabledRecognizers == NULL && !isDefaultVal) {
+        // Create an array storing the non default setting
+        fEnabledRecognizers = NEW_ARRAY(UBool, fCSRecognizers_size);
+        if (fEnabledRecognizers == NULL) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return;
+        }
+        // Initialize the array with default info
+        for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+            fEnabledRecognizers[i] = fCSRecognizers[i]->isDefaultEnabled;
+        }
+    }
+
+    if (fEnabledRecognizers != NULL) {
+        fEnabledRecognizers[modIdx] = enabled;
+    }
+}
+
 /*const char *CharsetDetector::getCharsetName(int32_t index, UErrorCode &status) const
 {
    if( index > fCSRecognizers_size-1 || index < 0) {
@ -267,6 +327,8 @@ U_NAMESPACE_END
 U_CDECL_BEGIN
 typedef struct {
    int32_t currIndex;
+    UBool all;
+    UBool *enabledRecognizers;
 } Context;


@ -281,27 +343,73 @@ enumClose(UEnumeration *en) {
 }

 static int32_t U_CALLCONV
-enumCount(UEnumeration *, UErrorCode *) {
-    return fCSRecognizers_size;
+enumCount(UEnumeration *en, UErrorCode *) {
+    if (((Context *)en->context)->all) {
+        // ucsdet_getAllDetectableCharsets, all charset detector names
+        return fCSRecognizers_size;
+    }
+
+    // Otherwise, ucsdet_getDetectableCharsets - only enabled ones
+    int32_t count = 0;
+    UBool *enabledArray = ((Context *)en->context)->enabledRecognizers;
+    if (enabledArray != NULL) {
+        // custom set
+        for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+            if (enabledArray[i]) {
+                count++;
+            }
+        }
+    } else {
+        // default set
+        for (int32_t i = 0; i < fCSRecognizers_size; i++) {
+            if (fCSRecognizers[i]->isDefaultEnabled) {
+                count++;
+            }
+        }
+    }
+    return count;
 }

 static const char* U_CALLCONV
 enumNext(UEnumeration *en, int32_t *resultLength, UErrorCode * /*status*/) {
-    if(((Context *)en->context)->currIndex >= fCSRecognizers_size) {
-        if(resultLength != NULL) {
-            *resultLength = 0;
+    const char *currName = NULL;
+
+    if (((Context *)en->context)->currIndex < fCSRecognizers_size) {
+        if (((Context *)en->context)->all) {
+            // ucsdet_getAllDetectableCharsets, all charset detector names
+            currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
+            ((Context *)en->context)->currIndex++;
+        } else {
+            // ucsdet_getDetectableCharsets
+            UBool *enabledArray = ((Context *)en->context)->enabledRecognizers;
+            if (enabledArray != NULL) {
+                // custome set
+                while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) {
+                    if (enabledArray[((Context *)en->context)->currIndex]) {
+                        currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
+                    }
+                    ((Context *)en->context)->currIndex++;
+                }
+            } else {
+                // default set
+                while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) {
+                    if (fCSRecognizers[((Context *)en->context)->currIndex]->isDefaultEnabled) {
+                        currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
+                    }
+                    ((Context *)en->context)->currIndex++;
+                }
+            }
        }
-        return NULL;
    }
-    const char *currName = fCSRecognizers[((Context *)en->context)->currIndex]->getName();
+
    if(resultLength != NULL) {
-        *resultLength = (int32_t)uprv_strlen(currName);
+        *resultLength = currName == NULL ? 0 : (int32_t)uprv_strlen(currName);
    }
-    ((Context *)en->context)->currIndex++;

    return currName;
 }

+
 static void U_CALLCONV
 enumReset(UEnumeration *en, UErrorCode *) {
    ((Context *)en->context)->currIndex = 0;
@ -317,25 +425,61 @@ static const UEnumeration gCSDetEnumeration = {
    enumReset
 };

-U_CAPI  UEnumeration * U_EXPORT2
-ucsdet_getAllDetectableCharsets(const UCharsetDetector * /*ucsd*/, UErrorCode *status)
-{
-    U_NAMESPACE_USE
+U_CDECL_END

-    if(U_FAILURE(*status)) {
+U_NAMESPACE_BEGIN
+
+UEnumeration * CharsetDetector::getAllDetectableCharsets(UErrorCode &status)
+{
+
+    /* Initialize recognized charsets. */
+    setRecognizers(status);
+
+    if(U_FAILURE(status)) {
        return 0;
    }

-    /* Initialize recognized charsets. */
-    CharsetDetector::getDetectableCount();
-
    UEnumeration *en = NEW_ARRAY(UEnumeration, 1);
+    if (en == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return 0;
+    }
    memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration));
    en->context = (void*)NEW_ARRAY(Context, 1);
+    if (en->context == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        DELETE_ARRAY(en);
+        return 0;
+    }
    uprv_memset(en->context, 0, sizeof(Context));
+    ((Context*)en->context)->all = TRUE;
    return en;
 }
-U_CDECL_END

-#endif
+UEnumeration * CharsetDetector::getDetectableCharsets(UErrorCode &status) const
+{
+    if(U_FAILURE(status)) {
+        return 0;
+    }

+    UEnumeration *en = NEW_ARRAY(UEnumeration, 1);
+    if (en == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return 0;
+    }
+    memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration));
+    en->context = (void*)NEW_ARRAY(Context, 1);
+    if (en->context == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        DELETE_ARRAY(en);
+        return 0;
+    }
+    uprv_memset(en->context, 0, sizeof(Context));
+    ((Context*)en->context)->all = FALSE;
+    ((Context*)en->context)->enabledRecognizers = fEnabledRecognizers;
+    return en;
+}
+
+U_NAMESPACE_END
+
+#endif
--- a/icu4c/source/i18n/csdetect.h
+++ b/icu4c/source/i18n/csdetect.h
@ -1,6 +1,6 @@
 /*
 **********************************************************************
- *   Copyright (C) 2005-2006, International Business Machines
+ *   Copyright (C) 2005-2013, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 */
@ -28,6 +28,10 @@ private:
    UBool fFreshTextSet;
    static void setRecognizers(UErrorCode &status);

+    UBool *fEnabledRecognizers;  // If not null, active set of charset recognizers had
+                                // been changed from the default. The array index is
+                                // corresponding to fCSRecognizers. See setDetectableCharset().
+
 public:
    CharsetDetector(UErrorCode &status);

@ -47,7 +51,12 @@ public:

 //    const char *getCharsetName(int32_t index, UErrorCode& status) const;

-    static int32_t getDetectableCount(); 
+    static int32_t getDetectableCount();
+
+
+    static UEnumeration * getAllDetectableCharsets(UErrorCode &status);
+    UEnumeration * getDetectableCharsets(UErrorCode &status) const;
+    void setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status);
 };

 U_NAMESPACE_END
--- a/icu4c/source/i18n/ucsdet.cpp
+++ b/icu4c/source/i18n/ucsdet.cpp
@ -1,6 +1,6 @@
 /*
 ********************************************************************************
- *   Copyright (C) 2005-2007, International Business Machines
+ *   Copyright (C) 2005-2013, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 ********************************************************************************
 */
@ -11,6 +11,11 @@
 #include "unicode/ucsdet.h"
 #include "csdetect.h"
 #include "csmatch.h"
+#include "csrsbcs.h"
+#include "csrmbcs.h"
+#include "csrutf8.h"
+#include "csrucode.h"
+#include "csr2022.h"

 #include "cmemory.h"

@ -175,6 +180,26 @@ ucsdet_getUChars(const UCharsetMatch *ucsm,

    return ((CharsetMatch *) ucsm)->getUChars(buf, cap, status);
 }
+
+U_CAPI void U_EXPORT2
+ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status)
+{
+    ((CharsetDetector *)ucsd)->setDetectableCharset(encoding, enabled, *status);
+}
+
+U_CAPI  UEnumeration * U_EXPORT2
+ucsdet_getAllDetectableCharsets(const UCharsetDetector * /*ucsd*/, UErrorCode *status)
+{
+    return CharsetDetector::getAllDetectableCharsets(*status);
+}
+
+U_DRAFT UEnumeration * U_EXPORT2
+ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *status)
+{
+    return ((CharsetDetector *)ucsd)->getDetectableCharsets(*status);
+}
+
 U_CDECL_END

+
 #endif
--- a/icu4c/source/i18n/unicode/ucsdet.h
+++ b/icu4c/source/i18n/unicode/ucsdet.h
@ -1,6 +1,6 @@
 /*
 **********************************************************************
- *   Copyright (C) 2005-2010, International Business Machines
+ *   Copyright (C) 2005-2013, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   file name:  ucsdet.h
@ -321,12 +321,21 @@ ucsdet_getUChars(const UCharsetMatch *ucsm,
  *  The returned UEnumeration provides access to the names of
  *  the charsets.
  *
+  *  <p>
  *  The state of the Charset detector that is passed in does not
  *  affect the result of this function, but requiring a valid, open
  *  charset detector as a parameter insures that the charset detection
  *  service has been safely initialized and that the required detection
  *  data is available.
  *
+  *  <p>
+  *  <b>Note:</b> Multiple different charset encodings in a same family may use
+  *  a single shared name in this implementation. For example, this method returns
+  *  an array including "ISO-8859-1" (ISO Latin 1), but not including "windows-1252"
+  *  (Windows Latin 1). However, actual detection result could be "windows-1252"
+  *  when the input data matches Latin 1 code points with any points only available
+  *  in "windows-1252".
+  *
  *  @param ucsd a Charset detector.
  *  @param status  Any error conditions are reported back in this variable.
  *  @return an iterator providing access to the detectable charset names.
@ -335,7 +344,6 @@ ucsdet_getUChars(const UCharsetMatch *ucsm,
 U_STABLE  UEnumeration * U_EXPORT2
 ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *status);

-
 /**
  *  Test whether input filtering is enabled for this charset detector.
  *  Input filtering removes text that appears to be HTML or xml
@ -346,6 +354,7 @@ ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *statu
  *  @return TRUE if filtering is enabled.
  *  @stable ICU 3.6
  */
+
 U_STABLE  UBool U_EXPORT2
 ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd);

@ -364,6 +373,39 @@ ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd);
 U_STABLE  UBool U_EXPORT2
 ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter);

+
+/**
+  *  Get an iterator over the set of detectable charsets -
+  *  over the charsets that are enabled by the specified charset detector.
+  *
+  *  The returned UEnumeration provides access to the names of
+  *  the charsets.
+  *
+  *  @param ucsd a Charset detector.
+  *  @param status  Any error conditions are reported back in this variable.
+  *  @return an iterator providing access to the detectable charset names by
+  *  the specified charset detector.
+  *  @internal
+  */
+U_DRAFT UEnumeration * U_EXPORT2
+ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *status);
+
+/**
+  * Enable or disable individual charset encoding.
+  * A name of charset encoding must be included in the names returned by
+  * {@link #getAllDetectableCharsets()}.
+  *
+  * @param ucsd a Charset detector.
+  * @param encoding encoding the name of charset encoding.
+  * @param enabled <code>TRUE</code> to enable, or <code>FALSE</code> to disable the
+  *   charset encoding.
+  * @param status receives the return status. When the name of charset encoding
+  *   is not supported, U_ILLEGAL_ARGUMENT_ERROR is set.
+  * @internal
+  */
+U_DRAFT void U_EXPORT2
+ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status);
+
 #endif
 #endif   /* __UCSDET_H */

--- a/icu4c/source/test/intltest/csdetest.cpp
+++ b/icu4c/source/test/intltest/csdetest.cpp
@ -1,6 +1,6 @@
 /*
 **********************************************************************
- *   Copyright (C) 2005-2012, International Business Machines
+ *   Copyright (C) 2005-2013, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 */
@ -275,6 +275,45 @@ void CharsetDetectionTest::ConstructionTest()
        printf("%s\n", name);
 #endif
    }
+
+    const char* defDisabled[] = {
+        "IBM420_rtl", "IBM420_ltr",
+        "IBM424_rtl", "IBM424_ltr",
+        0
+    };
+
+    LocalUEnumerationPointer eActive(ucsdet_getDetectableCharsets(csd.getAlias(), status));
+    const char *activeName = NULL;
+
+    while (activeName = uenum_next(eActive.getAlias(), NULL, status)) {
+        // the charset must be included in all list
+        UBool found = FALSE;
+
+        const char *name = NULL;
+        uenum_reset(e.getAlias(), status);
+        while (name = uenum_next(e.getAlias(), NULL, status)) {
+            if (strcmp(activeName, name) == 0) {
+                found = TRUE;
+                break;
+            }
+        }
+
+        if (!found) {
+            errln(UnicodeString(activeName) + " is not included in the all charset list.");
+        }
+
+        // some charsets are disabled by default
+        found = FALSE;
+        for (int32_t i = 0; defDisabled[i] != 0; i++) {
+            if (strcmp(activeName, defDisabled[i]) == 0) {
+                found = TRUE;
+                break;
+            }
+        }
+        if (found) {
+            errln(UnicodeString(activeName) + " should not be included in the default charset list.");
+        }
+    }
 }

 void CharsetDetectionTest::UTF8Test()
@ -597,6 +636,10 @@ void CharsetDetectionTest::IBM424Test()
    char *bytes_r = extractBytes(s2, "IBM424", brLength);
    
    UCharsetDetector *csd = ucsdet_open(&status);
+	ucsdet_setDetectableCharset(csd, "IBM424_rtl", TRUE, &status);
+	ucsdet_setDetectableCharset(csd, "IBM424_ltr", TRUE, &status);
+	ucsdet_setDetectableCharset(csd, "IBM420_rtl", TRUE, &status);
+	ucsdet_setDetectableCharset(csd, "IBM420_ltr", TRUE, &status);
    if (U_FAILURE(status)) {
        errln("Error opening charset detector. - %s", u_errorName(status));
    }
@ -684,6 +727,10 @@ void CharsetDetectionTest::IBM420Test()
    if (U_FAILURE(status)) {
        errln("Error opening charset detector. - %s", u_errorName(status));
    }
+	ucsdet_setDetectableCharset(csd, "IBM424_rtl", TRUE, &status);
+	ucsdet_setDetectableCharset(csd, "IBM424_ltr", TRUE, &status);
+	ucsdet_setDetectableCharset(csd, "IBM420_rtl", TRUE, &status);
+	ucsdet_setDetectableCharset(csd, "IBM420_ltr", TRUE, &status);
    const UCharsetMatch *match;
    const char *name;