ICU-5138 Separate the casing data from normalization data and data loading.

X-SVN-Rev: 19499
2025-04-08 23:10:40 +00:00 · 2006-03-31 05:29:06 +00:00 · 2006-03-31 05:29:06 +00:00 · 7d382500f6
commit 7d382500f6
parent f47dea2b53
8 changed files with 271 additions and 247 deletions
--- a/icu4c/source/common/Makefile.in
+++ b/icu4c/source/common/Makefile.in
@ -74,7 +74,7 @@ utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_w
 normlzr.o unorm.o unormcmp.o unorm_it.o chariter.o schriter.o uchriter.o uiter.o \
 uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
 uscript.o usc_impl.o unames.o \
-utrie.o uset_props.o uniset_props.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
+utrie.o utrie_swap.o uset_props.o uniset_props.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
 uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o triedict.o \
 rbbi.o rbbidata.o rbbinode.o rbbirb.o rbbiscan.o rbbisetb.o rbbistbl.o rbbitblb.o \
 serv.o servnotf.o servls.o servlk.o servlkf.o servrbf.o servslkf.o \
--- a/icu4c/source/common/common.vcproj
+++ b/icu4c/source/common/common.vcproj
@ -460,6 +460,9 @@
 			<File
 				RelativePath=".\utrie.h">
 			</File>
+			<File
+				RelativePath=".\utrie_swap.c">
+			</File>
 			<File
 				RelativePath=".\uvector.cpp">
 			</File>
--- a/icu4c/source/common/ucase.c
+++ b/icu4c/source/common/ucase.c
@ -196,7 +196,9 @@ ucase_openBinary(const uint8_t *bin, int32_t length, UErrorCode *pErrorCode) {
 U_CAPI void U_EXPORT2
 ucase_close(UCaseProps *csp) {
    if(csp!=NULL) {
+#if !UCASE_HARDCODE_DATA
        udata_close(csp->mem);
+#endif
        uprv_free(csp);
    }
 }
@ -1482,3 +1484,116 @@ ucase_toFullFolding(const UCaseProps *csp, UChar32 c,

    return (result==c) ? ~result : result;
 }
+
+/* case mapping properties API ---------------------------------------------- */
+
+/* get the UCaseProps singleton, or else its dummy, once and for all */
+static const UCaseProps *
+getCaseProps() {
+    /*
+     * This lazy intialization with double-checked locking (without mutex protection for
+     * the initial check) is transiently unsafe under certain circumstances.
+     * Check the readme and use u_init() if necessary.
+     */
+
+    /* the initial check is performed by the GET_CASE_PROPS() macro */
+    const UCaseProps *csp;
+    UErrorCode errorCode=U_ZERO_ERROR;
+
+    csp=ucase_getSingleton(&errorCode);
+    if(U_FAILURE(errorCode)) {
+        errorCode=U_ZERO_ERROR;
+        csp=ucase_getDummy(&errorCode);
+        if(U_FAILURE(errorCode)) {
+            return NULL;
+        }
+    }
+
+    return csp;
+}
+
+/*
+ * In ICU 3.0, most Unicode properties were loaded from uprops.icu.
+ * ICU 3.2 adds ucase.icu for case mapping properties.
+ * ICU 3.4 adds ubidi.icu for bidi/shaping properties and
+ * removes case/bidi/shaping properties from uprops.icu.
+ *
+ * Loading of uprops.icu was never mutex-protected and required u_init()
+ * for thread safety.
+ * In order to maintain performance for all such properties,
+ * ucase.icu and ubidi.icu are loaded lazily, without mutexing.
+ * u_init() will try to load them for thread safety,
+ * but u_init() will not fail if they are missing.
+ *
+ * uchar.c maintains a tri-state flag for (not loaded/loaded/failed to load)
+ * and an error code for load failure.
+ * Instead, here we try to load at most once.
+ * If it works, we use the resulting singleton object.
+ * If it fails, then we get a dummy object, which always works unless
+ * we are seriously out of memory.
+ * After the first try, we have a never-changing pointer to either the
+ * real singleton or the dummy.
+ *
+ * This method is used in Unicode properties APIs (uchar.h) that
+ * do not have a service object and also do not have an error code parameter.
+ * Other API implementations get the singleton themselves
+ * (with mutexing), store it in the service object, and report errors.
+ */
+#define GET_CASE_PROPS() (gCsp!=NULL ? gCsp : getCaseProps())
+
+/* public API (see uchar.h) */
+
+U_CAPI UBool U_EXPORT2
+u_isULowercase(UChar32 c) {
+    return (UBool)(UCASE_LOWER==ucase_getType(GET_CASE_PROPS(), c));
+}
+
+U_CAPI UBool U_EXPORT2
+u_isUUppercase(UChar32 c) {
+    return (UBool)(UCASE_UPPER==ucase_getType(GET_CASE_PROPS(), c));
+}
+
+/* Transforms the Unicode character to its lower case equivalent.*/
+U_CAPI UChar32 U_EXPORT2
+u_tolower(UChar32 c) {
+    return ucase_tolower(GET_CASE_PROPS(), c);
+}
+    
+/* Transforms the Unicode character to its upper case equivalent.*/
+U_CAPI UChar32 U_EXPORT2
+u_toupper(UChar32 c) {
+    return ucase_toupper(GET_CASE_PROPS(), c);
+}
+
+/* Transforms the Unicode character to its title case equivalent.*/
+U_CAPI UChar32 U_EXPORT2
+u_totitle(UChar32 c) {
+    return ucase_totitle(GET_CASE_PROPS(), c);
+}
+
+/* return the simple case folding mapping for c */
+U_CAPI UChar32 U_EXPORT2
+u_foldCase(UChar32 c, uint32_t options) {
+    return ucase_fold(GET_CASE_PROPS(), c, options);
+}
+
+U_CFUNC int32_t U_EXPORT2
+ucase_hasBinaryProperty(UChar32 c, UProperty which) {
+    /* case mapping properties */
+    const UCaseProps *csp=GET_CASE_PROPS();
+    if(csp==NULL) {
+        return FALSE;
+    }
+    switch(which) {
+    case UCHAR_LOWERCASE:
+        return (UBool)(UCASE_LOWER==ucase_getType(csp, c));
+    case UCHAR_UPPERCASE:
+        return (UBool)(UCASE_UPPER==ucase_getType(csp, c));
+    case UCHAR_SOFT_DOTTED:
+        return ucase_isSoftDotted(csp, c);
+    case UCHAR_CASE_SENSITIVE:
+        return ucase_isCaseSensitive(csp, c);
+    default:
+        return FALSE;
+    }
+}
--- a/icu4c/source/common/ucase.h
+++ b/icu4c/source/common/ucase.h
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2004-2005, International Business Machines
+*   Copyright (C) 2004-2006, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -238,6 +238,9 @@ ucase_toFullFolding(const UCaseProps *csp, UChar32 c,
                    const UChar **pString,
                    uint32_t options);

+U_CFUNC int32_t U_EXPORT2
+ucase_hasBinaryProperty(UChar32 c, UProperty which);
+
 /* file definitions --------------------------------------------------------- */

 #define UCASE_DATA_NAME "ucase"
--- a/icu4c/source/common/uprops.c
+++ b/icu4c/source/common/uprops.c
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2002-2005, International Business Machines
+*   Copyright (C) 2002-2006, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -36,115 +36,13 @@

 /* cleanup ------------------------------------------------------------------ */

-static const UCaseProps *gCsp=NULL;
 static const UBiDiProps *gBdp=NULL;

 static UBool U_CALLCONV uprops_cleanup(void) {
-    gCsp=NULL;
    gBdp=NULL;
    return TRUE;
 }

-/* case mapping properties API ---------------------------------------------- */
-
-/* get the UCaseProps singleton, or else its dummy, once and for all */
-static const UCaseProps *
-getCaseProps() {
-    /*
-     * This lazy intialization with double-checked locking (without mutex protection for
-     * the initial check) is transiently unsafe under certain circumstances.
-     * Check the readme and use u_init() if necessary.
-     */
-
-    /* the initial check is performed by the GET_CASE_PROPS() macro */
-    const UCaseProps *csp;
-    UErrorCode errorCode=U_ZERO_ERROR;
-
-    csp=ucase_getSingleton(&errorCode);
-    if(U_FAILURE(errorCode)) {
-        errorCode=U_ZERO_ERROR;
-        csp=ucase_getDummy(&errorCode);
-        if(U_FAILURE(errorCode)) {
-            return NULL;
-        }
-    }
-
-    umtx_lock(NULL);
-    if(gCsp==NULL) {
-        gCsp=csp;
-        csp=NULL;
-        ucln_common_registerCleanup(UCLN_COMMON_UPROPS, uprops_cleanup);
-    }
-    umtx_unlock(NULL);
-
-    return gCsp;
-}
-
-/*
- * In ICU 3.0, most Unicode properties were loaded from uprops.icu.
- * ICU 3.2 adds ucase.icu for case mapping properties.
- * ICU 3.4 adds ubidi.icu for bidi/shaping properties and
- * removes case/bidi/shaping properties from uprops.icu.
- *
- * Loading of uprops.icu was never mutex-protected and required u_init()
- * for thread safety.
- * In order to maintain performance for all such properties,
- * ucase.icu and ubidi.icu are loaded lazily, without mutexing.
- * u_init() will try to load them for thread safety,
- * but u_init() will not fail if they are missing.
- *
- * uchar.c maintains a tri-state flag for (not loaded/loaded/failed to load)
- * and an error code for load failure.
- * Instead, here we try to load at most once.
- * If it works, we use the resulting singleton object.
- * If it fails, then we get a dummy object, which always works unless
- * we are seriously out of memory.
- * After the first try, we have a never-changing pointer to either the
- * real singleton or the dummy.
- *
- * This method is used in Unicode properties APIs (uchar.h) that
- * do not have a service object and also do not have an error code parameter.
- * Other API implementations get the singleton themselves
- * (with mutexing), store it in the service object, and report errors.
- */
-#define GET_CASE_PROPS() (gCsp!=NULL ? gCsp : getCaseProps())
-
-/* public API (see uchar.h) */
-
-U_CAPI UBool U_EXPORT2
-u_isULowercase(UChar32 c) {
-    return (UBool)(UCASE_LOWER==ucase_getType(GET_CASE_PROPS(), c));
-}
-
-U_CAPI UBool U_EXPORT2
-u_isUUppercase(UChar32 c) {
-    return (UBool)(UCASE_UPPER==ucase_getType(GET_CASE_PROPS(), c));
-}
-
-/* Transforms the Unicode character to its lower case equivalent.*/
-U_CAPI UChar32 U_EXPORT2
-u_tolower(UChar32 c) {
-    return ucase_tolower(GET_CASE_PROPS(), c);
-}
-    
-/* Transforms the Unicode character to its upper case equivalent.*/
-U_CAPI UChar32 U_EXPORT2
-u_toupper(UChar32 c) {
-    return ucase_toupper(GET_CASE_PROPS(), c);
-}
-
-/* Transforms the Unicode character to its title case equivalent.*/
-U_CAPI UChar32 U_EXPORT2
-u_totitle(UChar32 c) {
-    return ucase_totitle(GET_CASE_PROPS(), c);
-}
-
-/* return the simple case folding mapping for c */
-U_CAPI UChar32 U_EXPORT2
-u_foldCase(UChar32 c, uint32_t options) {
-    return ucase_fold(GET_CASE_PROPS(), c, options);
-}
-
 /* bidi/shaping properties API ---------------------------------------------- */

 /* get the UBiDiProps singleton, or else its dummy, once and for all */
@ -261,23 +159,7 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
            return (u_getUnicodeProperties(c, column)&mask)!=0;
        } else {
            if(column==UPROPS_SRC_CASE) {
-                /* case mapping properties */
-                const UCaseProps *csp=GET_CASE_PROPS();
-                if(csp==NULL) {
-                    return FALSE;
-                }
-                switch(which) {
-                case UCHAR_LOWERCASE:
-                    return (UBool)(UCASE_LOWER==ucase_getType(csp, c));
-                case UCHAR_UPPERCASE:
-                    return (UBool)(UCASE_UPPER==ucase_getType(csp, c));
-                case UCHAR_SOFT_DOTTED:
-                    return ucase_isSoftDotted(csp, c);
-                case UCHAR_CASE_SENSITIVE:
-                    return ucase_isCaseSensitive(csp, c);
-                default:
-                    break;
-                }
+                return ucase_hasBinaryProperty(c, which);
            } else if(column==UPROPS_SRC_NORM) {
 #if !UCONFIG_NO_NORMALIZATION
                /* normalization properties from unorm.icu */
--- a/icu4c/source/common/utrie.c
+++ b/icu4c/source/common/utrie.c
@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 2001-2005, International Business Machines
+*   Copyright (C) 2001-2006, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@ -23,7 +23,6 @@
 #endif

 #include "unicode/utypes.h"
-#include "udataswp.h"
 #include "cmemory.h"
 #include "utrie.h"

@ -686,55 +685,6 @@ utrie_compact(UNewTrie *trie, UBool overlap, UErrorCode *pErrorCode) {

 /* serialization ------------------------------------------------------------ */

-/**
- * Trie data structure in serialized form:
- *
- * UTrieHeader header;
- * uint16_t index[header.indexLength];
- * uint16_t data[header.dataLength];
- */
-struct UTrieHeader {
-    /** "Trie" in big-endian US-ASCII (0x54726965) */
-    uint32_t signature;
-
-    /**
-     * options bit field:
-     *     9    1=Latin-1 data is stored linearly at data+UTRIE_DATA_BLOCK_LENGTH
-     *     8    0=16-bit data, 1=32-bit data
-     *  7..4    UTRIE_INDEX_SHIFT   // 0..UTRIE_SHIFT
-     *  3..0    UTRIE_SHIFT         // 1..9
-     */
-    uint32_t options;
-
-    /** indexLength is a multiple of UTRIE_SURROGATE_BLOCK_COUNT */
-    int32_t indexLength;
-
-    /** dataLength>=UTRIE_DATA_BLOCK_LENGTH */
-    int32_t dataLength;
-};
-
-typedef struct UTrieHeader UTrieHeader;
-
-/**
- * Constants for use with UTrieHeader.options.
- */
-enum {
-    /** Mask to get the UTRIE_SHIFT value from options. */
-    UTRIE_OPTIONS_SHIFT_MASK=0xf,
-
-    /** Shift options right this much to get the UTRIE_INDEX_SHIFT value. */
-    UTRIE_OPTIONS_INDEX_SHIFT=4,
-
-    /** If set, then the data (stage 2) array is 32 bits wide. */
-    UTRIE_OPTIONS_DATA_IS_32_BIT=0x100,
-
-    /**
-     * If set, then Latin-1 data (for U+0000..U+00ff) is stored in the data (stage 2) array
-     * as a simple, linear array at data+UTRIE_DATA_BLOCK_LENGTH.
-     */
-    UTRIE_OPTIONS_LATIN1_IS_LINEAR=0x200
-};
-
 /*
 * Default function for the folding value:
 * Just store the offset (16 bits) if there is any non-initial-value entry.
@ -1077,79 +1027,6 @@ utrie_unserializeDummy(UTrie *trie,
    return actualLength;
 }

-/* swapping ----------------------------------------------------------------- */
-
-U_CAPI int32_t U_EXPORT2
-utrie_swap(const UDataSwapper *ds,
-           const void *inData, int32_t length, void *outData,
-           UErrorCode *pErrorCode) {
-    const UTrieHeader *inTrie;
-    UTrieHeader trie;
-    int32_t size;
-    UBool dataIs32;
-
-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
-        return 0;
-    }
-    if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
-    }
-
-    /* setup and swapping */
-    if(length>=0 && length<sizeof(UTrieHeader)) {
-        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-        return 0;
-    }
-
-    inTrie=(const UTrieHeader *)inData;
-    trie.signature=ds->readUInt32(inTrie->signature);
-    trie.options=ds->readUInt32(inTrie->options);
-    trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
-    trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
-
-    if( trie.signature!=0x54726965 ||
-        (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
-        ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
-        trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
-        (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
-        trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
-        (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
-        ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
-    ) {
-        *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
-        return 0;
-    }
-
-    dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);
-    size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
-
-    if(length>=0) {
-        UTrieHeader *outTrie;
-
-        if(length<size) {
-            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-            return 0;
-        }
-
-        outTrie=(UTrieHeader *)outData;
-
-        /* swap the header */
-        ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
-
-        /* swap the index and the data */
-        if(dataIs32) {
-            ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
-            ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
-                                     (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
-        } else {
-            ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
-        }
-    }
-
-    return size;
-}
-
 /* enumeration -------------------------------------------------------------- */

 /* default UTrieEnumValue() returns the input value itself */
--- a/icu4c/source/common/utrie.h
+++ b/icu4c/source/common/utrie.h
@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 2001-2005, International Business Machines
+*   Copyright (C) 2001-2006, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@ -736,6 +736,57 @@ utrie_swap(const UDataSwapper *ds,
           const void *inData, int32_t length, void *outData,
           UErrorCode *pErrorCode);

+/* serialization ------------------------------------------------------------ */
+
+/**
+ * Trie data structure in serialized form:
+ *
+ * UTrieHeader header;
+ * uint16_t index[header.indexLength];
+ * uint16_t data[header.dataLength];
+ * @internal
+ */
+typedef struct UTrieHeader {
+    /** "Trie" in big-endian US-ASCII (0x54726965) */
+    uint32_t signature;
+
+    /**
+     * options bit field:
+     *     9    1=Latin-1 data is stored linearly at data+UTRIE_DATA_BLOCK_LENGTH
+     *     8    0=16-bit data, 1=32-bit data
+     *  7..4    UTRIE_INDEX_SHIFT   // 0..UTRIE_SHIFT
+     *  3..0    UTRIE_SHIFT         // 1..9
+     */
+    uint32_t options;
+
+    /** indexLength is a multiple of UTRIE_SURROGATE_BLOCK_COUNT */
+    int32_t indexLength;
+
+    /** dataLength>=UTRIE_DATA_BLOCK_LENGTH */
+    int32_t dataLength;
+} UTrieHeader;
+
+/**
+ * Constants for use with UTrieHeader.options.
+ * @internal
+ */
+enum {
+    /** Mask to get the UTRIE_SHIFT value from options. */
+    UTRIE_OPTIONS_SHIFT_MASK=0xf,
+
+    /** Shift options right this much to get the UTRIE_INDEX_SHIFT value. */
+    UTRIE_OPTIONS_INDEX_SHIFT=4,
+
+    /** If set, then the data (stage 2) array is 32 bits wide. */
+    UTRIE_OPTIONS_DATA_IS_32_BIT=0x100,
+
+    /**
+     * If set, then Latin-1 data (for U+0000..U+00ff) is stored in the data (stage 2) array
+     * as a simple, linear array at data+UTRIE_DATA_BLOCK_LENGTH.
+     */
+    UTRIE_OPTIONS_LATIN1_IS_LINEAR=0x200
+};
+
 U_CDECL_END

 #endif
--- a/icu4c/source/common/utrie_swap.c
+++ b/icu4c/source/common/utrie_swap.c
@ -0,0 +1,93 @@
+/*
+******************************************************************************
+*
+*   Copyright (C) 2001-2006, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  utrie_swap.c
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created by: Markus W. Scherer
+*
+* This performs data swapping for a folded trie (see utrie.c for details).
+*/
+
+#include "udataswp.h"
+#include "utrie.h"
+
+/* swapping ----------------------------------------------------------------- */
+
+U_CAPI int32_t U_EXPORT2
+utrie_swap(const UDataSwapper *ds,
+           const void *inData, int32_t length, void *outData,
+           UErrorCode *pErrorCode) {
+    const UTrieHeader *inTrie;
+    UTrieHeader trie;
+    int32_t size;
+    UBool dataIs32;
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    /* setup and swapping */
+    if(length>=0 && length<sizeof(UTrieHeader)) {
+        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+        return 0;
+    }
+
+    inTrie=(const UTrieHeader *)inData;
+    trie.signature=ds->readUInt32(inTrie->signature);
+    trie.options=ds->readUInt32(inTrie->options);
+    trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
+    trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
+
+    if( trie.signature!=0x54726965 ||
+        (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
+        ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
+        trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
+        (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
+        trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
+        (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
+        ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
+    ) {
+        *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
+        return 0;
+    }
+
+    dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);
+    size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
+
+    if(length>=0) {
+        UTrieHeader *outTrie;
+
+        if(length<size) {
+            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+
+        outTrie=(UTrieHeader *)outData;
+
+        /* swap the header */
+        ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
+
+        /* swap the index and the data */
+        if(dataIs32) {
+            ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
+            ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
+                                     (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
+        } else {
+            ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
+        }
+    }
+
+    return size;
+}
+