ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable)

X-SVN-Rev: 734
2025-04-07 14:31:31 +00:00 · 2000-02-08 23:41:16 +00:00 · 2000-02-08 23:41:16 +00:00 · 42e5acace0
commit 42e5acace0
parent 139685dd3f
9 changed files with 287 additions and 438 deletions
--- a/icu4c/source/common/ucnv.c
+++ b/icu4c/source/common/ucnv.c
@ -113,13 +113,10 @@ void ucnv_close (UConverter * converter)
 {
  if (converter == NULL)
    return;
-  /* ### this cleanup would be cleaner in a function in UConverterImpl */
-  if ((converter->sharedData->conversionType == UCNV_ISO_2022) &&
-      (converter->mode == UCNV_SO))
-    {
-      ucnv_close (((UConverterDataISO2022 *) (converter->extraInfo))->currentConverter);
-      uprv_free (converter->extraInfo);
-    }
+
+  if (converter->sharedData->impl->close != NULL) {
+    converter->sharedData->impl->close(converter);
+  }

  if (converter->sharedData->referenceCounter != ~0) {
    umtx_lock (NULL);
@ -341,18 +338,11 @@ void  ucnv_reset (UConverter * converter)
  converter->fromUnicodeStatus = 0;
  converter->UCharErrorBufferLength = 0;
  converter->charErrorBufferLength = 0;
-  if ((converter->sharedData->conversionType == UCNV_ISO_2022) &&
-      (converter->mode == UCNV_SO))
-    {
-      converter->charErrorBufferLength = 3;
-      converter->charErrorBuffer[0] = 0x1b;
-      converter->charErrorBuffer[1] = 0x25;
-      converter->charErrorBuffer[2] = 0x42;
-      ucnv_close (((UConverterDataISO2022 *) (converter->extraInfo))->currentConverter);
-      ((UConverterDataISO2022 *) (converter->extraInfo))->currentConverter = NULL;
-      ((UConverterDataISO2022 *) (converter->extraInfo))->escSeq2022Length = 0;
-    }
-  converter->mode = UCNV_SI;
+  if (converter->sharedData->impl->reset != NULL) {
+    converter->sharedData->impl->reset(converter);
+  } else {
+    converter->mode = UCNV_SI;
+  }

  return;
 }
@ -442,7 +432,6 @@ void   ucnv_fromUnicode (UConverter * _this,
 			 bool_t flush,
 			 UErrorCode * err)
 {
-  UConverterType myConvType;
  /*
   * Check parameters in for all conversions
   */
@ -471,43 +460,46 @@ void   ucnv_fromUnicode (UConverter * _this,
      *target += myTargetIndex;
      if (U_FAILURE (*err)) return;
    }
-  
-  myConvType = _this->sharedData->conversionType;  
-  if (offsets) 
-    {
-       int32_t targetSize = targetLimit - *target;
-       int32_t i;
-       switch (myConvType)
-	 {
-	 case UCNV_LATIN_1: case UCNV_SBCS : 
-	   {
-	     for (i=0; i<targetSize; i++) offsets[i] = i;
-	     break;
-	   }
-	 case UCNV_UTF16_LittleEndian: case UCNV_UTF16_BigEndian: case UCNV_DBCS: 
-	   {
-	     --targetSize;
-	     for (i=0; i<targetSize; i+=2) 
-	       {
-		 offsets[i] = i;
-		 offsets[i+1] = i;
-	       }
-	     break;
-	   }
-	 default:
-	   {
-	     _this->sharedData->impl->fromUnicodeWithOffsets(_this,
-								     target,
-								     targetLimit,
-								     source,
-								     sourceLimit,
-								     offsets,
-								     flush,
-								     err);
-	     return;
-	   }
-	 };    
+
+  if (offsets) {
+    if (_this->sharedData->impl->fromUnicodeWithOffsets != NULL) {
+	   _this->sharedData->impl->fromUnicodeWithOffsets(_this,
+								   target,
+								   targetLimit,
+								   source,
+								   sourceLimit,
+								   offsets,
+								   flush,
+								   err);
+       return;
+    } else {
+      /* all code points are of the same length */
+      int32_t targetSize = targetLimit - *target;
+      int32_t i, bytesPerChar = _this->sharedData->maxBytesPerChar;
+
+      if(bytesPerChar == 1) {
+        for (i=0; i<targetSize; i++) {
+          offsets[i] = i;
+        }
+      } else if(bytesPerChar == 2) {
+        for (i=0; i<targetSize; i++) {
+          offsets[i] = i>>1;
+        }
+      } else {
+        int32_t j = 0, k = bytesPerChar;
+
+        for (i=0; i<targetSize; i++) {
+          /* offsets[i] = i/bytesPerChar; -- without division */
+          offsets[i] = j;
+          if(--k == 0) {
+            k = bytesPerChar;
+            ++j;
+          }
+        }
+      }
    }
+  }
+
  /*calls the specific conversion routines */
  _this->sharedData->impl->fromUnicode(_this,
 					   target,
@ -535,7 +527,6 @@ void   ucnv_toUnicode (UConverter * _this,
  /*
   * Check parameters in for all conversions
   */
-  UConverterType myConvType;
  if (U_FAILURE (*err))   return;
  if ((_this == NULL) || ((UChar *) targetLimit < *target) || (sourceLimit < *source))
    {
@ -543,7 +534,6 @@ void   ucnv_toUnicode (UConverter * _this,
      return;
    }

-  myConvType = _this->sharedData->conversionType;
  /*
   * Deal with stored carry over data.  This is done in the common location
   * to avoid doing it for each conversion.
@ -563,40 +553,38 @@ void   ucnv_toUnicode (UConverter * _this,
 	return;
    }

-  if (offsets) 
-    {
+  if (offsets) {
+    if (_this->sharedData->impl->toUnicodeWithOffsets != NULL) {
+	  _this->sharedData->impl->toUnicodeWithOffsets(_this,
+								target,
+								targetLimit,
+								source,
+								sourceLimit,
+								offsets,
+								flush,
+								err);
+	  return;
+    } else {
+      /* all code points are of the same length */
      int32_t targetSize = targetLimit - *target;
-      int32_t i;
+      int32_t i, bytesPerChar = _this->sharedData->maxBytesPerChar;

-      switch (myConvType)
-	{
-	case UCNV_LATIN_1: case UCNV_SBCS : 
-	  {
-	    for (i=0; i<targetSize; i++) offsets[i] = i;
-	    break;
-	  }
-	case UCNV_UTF16_LittleEndian: case UCNV_UTF16_BigEndian: case UCNV_DBCS: 
-	  {
-	    for (i=0; i<targetSize; i++) 
-	      {
-		offsets[i] = i*2;
-	      }
-	    break;
-	  }
-	default:
-	  {
-	    _this->sharedData->impl->toUnicodeWithOffsets(_this,
-								  target,
-								  targetLimit,
-								  source,
-								  sourceLimit,
-								  offsets,
-								  flush,
-								  err);
-	    return;
-	  }
-	};
+      if(bytesPerChar == 1) {
+        for (i=0; i<targetSize; i++) {
+          offsets[i] = i;
+        }
+      } else if(bytesPerChar == 2) {
+        for (i=0; i<targetSize; i++) {
+          offsets[i] = i<<1;
+        }
+      } else {
+        for (i=0; i<targetSize; i++) {
+          offsets[i] = i*bytesPerChar;
+        }
+      }
    }
+  }
+
  /*calls the specific conversion routines */
  _this->sharedData->impl->toUnicode(_this,
 					  target,
--- a/icu4c/source/common/ucnv2022.c
+++ b/icu4c/source/common/ucnv2022.c
@ -120,6 +120,45 @@ UCNV_TableStates_2022 getKey_2022(char source,
                                  int32_t* key,
                                  int32_t* offset);

+static void
+_ISO2022Open(UConverter *cnv, const char *name, const char *locale, UErrorCode *errorCode) {
+    cnv->charErrorBufferLength = 3;
+    cnv->charErrorBuffer[0] = 0x1b;
+    cnv->charErrorBuffer[1] = 0x25;
+    cnv->charErrorBuffer[2] = 0x42;
+    cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
+    if(cnv->extraInfo != NULL) {
+        ((UConverterDataISO2022 *) cnv->extraInfo)->currentConverter = NULL;
+        ((UConverterDataISO2022 *) cnv->extraInfo)->escSeq2022Length = 0;
+    } else {
+        *errorCode = U_MEMORY_ALLOCATION_ERROR;
+    }
+}
+
+static void
+_ISO2022Close(UConverter *converter) {
+    if (converter->mode == UCNV_SO) {
+        ucnv_close (((UConverterDataISO2022 *) (converter->extraInfo))->currentConverter);
+        uprv_free (converter->extraInfo);
+    }
+
+}
+
+static void
+_ISO2022Reset(UConverter *converter) {
+  if (converter->mode == UCNV_SO)
+    {
+      converter->charErrorBufferLength = 3;
+      converter->charErrorBuffer[0] = 0x1b;
+      converter->charErrorBuffer[1] = 0x25;
+      converter->charErrorBuffer[2] = 0x42;
+      ucnv_close (((UConverterDataISO2022 *) (converter->extraInfo))->currentConverter);
+      ((UConverterDataISO2022 *) (converter->extraInfo))->currentConverter = NULL;
+      ((UConverterDataISO2022 *) (converter->extraInfo))->escSeq2022Length = 0;
+      converter->mode = UCNV_SI;
+    }
+}
+
 void T_UConverter_fromUnicode_ISO_2022(UConverter* _this,
                                       char** target,
                                       const char* targetLimit,
@ -632,6 +671,13 @@ UChar T_UConverter_getNextUChar_ISO_2022(UConverter* converter,
 static UConverterImpl _ISO2022Impl={
    UCNV_ISO_2022,

+    NULL,
+    NULL,
+
+    _ISO2022Open,
+    _ISO2022Close,
+    _ISO2022Reset,
+
    T_UConverter_toUnicode_ISO_2022,
    T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,
    T_UConverter_fromUnicode_ISO_2022,
@ -648,6 +694,12 @@ extern UConverterSharedData _ISO2022Data={

 /* EBCDICStateful ----------------------------------------------------------- */

+U_CFUNC void
+_DBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode);
+
+U_CFUNC void
+_DBCSUnload(UConverterSharedData *sharedData);
+
 void T_UConverter_toUnicode_EBCDIC_STATEFUL (UConverter * _this,
                                             UChar ** target,
                                             const UChar * targetLimit,
@ -1223,6 +1275,13 @@ UChar T_UConverter_getNextUChar_EBCDIC_STATEFUL(UConverter* converter,
 static UConverterImpl _EBCDICStatefulImpl={
    UCNV_EBCDIC_STATEFUL,

+    _DBCSLoad,
+    _DBCSUnload,
+
+    NULL,
+    NULL,
+    NULL,
+
    T_UConverter_toUnicode_EBCDIC_STATEFUL,
    T_UConverter_toUnicode_EBCDIC_STATEFUL_OFFSETS_LOGIC,
    T_UConverter_fromUnicode_EBCDIC_STATEFUL,
--- a/icu4c/source/common/ucnv_bld.c
+++ b/icu4c/source/common/ucnv_bld.c
@ -62,22 +62,6 @@ static struct {
 */
 static UConverter *createConverterFromFile (const char *converterName, UErrorCode * err);

-/*Given a file returns a newly allocated CompactByteArray based on the a serialized one */
-static CompactByteArray *createCompactByteArrayFromFile (FileStream * infile, UErrorCode * err);
-
-/*Given a file returns a newly allocated CompactShortArray based on the a serialized one */
-static CompactShortArray *createCompactShortArrayFromFile (FileStream * infile, UErrorCode * err);
-
-/*Currently we have function to take us from a codepage name to
- *a platform type and a codepage number
- *assuming the following
- *codepage name = $PLATFORM-#CODEPAGE
- *e.g. ibm-949 = platform type = UCNV_IBM and codepage # = 949
- *the functions below implement that
- */
-static UConverterPlatform getPlatformFromName (char *name);
-static int32_t getCodepageNumberFromName (char *name);
-
 static const UConverterSharedData *getAlgorithmicTypeFromName (const char *realName);


@ -86,10 +70,10 @@ static const UConverterSharedData *getAlgorithmicTypeFromName (const char *realN
 */
 static void initializeDataConverter (UConverter * myConverter);
 static void initializeAlgorithmicConverter (UConverter * myConverter);
+
 /**
 *hash function for UConverterSharedData
 */
-
 static int32_t uhash_hashSharedData (void *sharedData);

 /*Defines the struct of a UConverterSharedData the immutable, shared part of
@ -123,12 +107,11 @@ UConverterSharedData_1_4;
 /**
 * Un flatten shared data from a UDATA..
 */
-U_CAPI  UConverterSharedData* U_EXPORT2 ucnv_data_unFlattenClone(const UConverterSharedData_1_4 *data, UErrorCode *status);
+U_CAPI  UConverterSharedData* U_EXPORT2 ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *status);


 /*initializes some global variables */
 UHashtable *SHARED_DATA_HASHTABLE = NULL;
-UHashtable *ALGORITHMIC_CONVERTERS_HASHTABLE = NULL;

 /*Returns uppercased string */
 char *
@ -142,167 +125,6 @@ char *
  return name;
 }

-/* Returns true in c is a in set 'setOfChars', false otherwise
- */
-bool_t 
-  isInSet (char c, const char *setOfChars)
-{
-  uint8_t i = 0;
-
-  while (setOfChars[i] != '\0')
-    {
-      if (c == setOfChars[i++])
-        return TRUE;
-    }
-
-  return FALSE;
-}
-
-/* Returns pointer to the next non-whitespace (or non-separator)
- */
-int32_t 
-  nextTokenOffset (const char *line, const char *separators)
-{
-  int32_t i = 0;
-
-  while (line[i] && isInSet (line[i], separators))
-    i++;
-
-  return i;
-}
-
-/* Returns pointer to the next token based on the set of separators
- */
-char *
-  getToken (char *token, char *line, const char *separators)
-{
-  int32_t i = nextTokenOffset (line, separators);
-  int8_t j = 0;
-
-  while (line[i] && (!isInSet (line[i], separators)))
-    token[j++] = line[i++];
-  token[j] = '\0';
-
-  return line + i;
-}
-
-int32_t uhash_hashIString(const void* name)
-{
-  char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
-  uprv_strcpy(myName, (char*)name);
-  strtoupper(myName);
-
-  return uhash_hashString(myName);
-}
-
-CompactShortArray*  createCompactShortArrayFromFile (FileStream * infile, UErrorCode * err)
-{
-  int32_t i = 0;
-  int16_t *myShortArray = NULL;
-  uint16_t *myIndexArray = NULL;
-  int32_t myValuesCount = 0;
-  int32_t myIndexCount = 0;
-  int32_t myBlockShift = 0;
-
-  if (U_FAILURE (*err))
-    return NULL;
-
-  /*reads in the lengths of the 2 serialized array */
-  T_FileStream_read (infile, &myValuesCount, sizeof (int32_t));
-  T_FileStream_read (infile, &myIndexCount, sizeof (int32_t));
-  T_FileStream_read (infile, &myBlockShift, sizeof (int32_t));
-
-  if (myValuesCount < 0)
-    {
-      *err = U_INVALID_TABLE_FILE;
-      return NULL;
-    }
-  myShortArray = (int16_t *) uprv_malloc (myValuesCount * sizeof (int16_t));
-  if (myShortArray == NULL)
-    {
-      *err = U_MEMORY_ALLOCATION_ERROR;
-      return NULL;
-    }
-  /*reads in the first array */
-  T_FileStream_read (infile, myShortArray, myValuesCount * sizeof (int16_t));
-
-  if (myIndexCount < 0)
-    {
-      uprv_free (myShortArray);
-      *err = U_INVALID_TABLE_FILE;
-      return NULL;
-    }
-
-  myIndexArray = (uint16_t *) uprv_malloc (myIndexCount * sizeof (uint16_t));
-  if (myIndexArray == NULL)
-    {
-      uprv_free (myShortArray);
-      *err = U_MEMORY_ALLOCATION_ERROR;
-      return NULL;
-    }
-
-  /*reads in the second array */
-  T_FileStream_read (infile, myIndexArray, myIndexCount * sizeof (uint16_t));
-
-  /*create a compact array from the data just read
-   *that adopts our newly created arrays
-   */
-  return ucmp16_openAdoptWithBlockShift (myIndexArray, myShortArray, myValuesCount, 0, myBlockShift);
-}
-
-CompactByteArray*  createCompactByteArrayFromFile (FileStream * infile,
-                                                   UErrorCode * err)
-{
-  int32_t i = 0;
-  int8_t *myByteArray = NULL;
-  uint16_t *myIndexArray = NULL;
-  int32_t myValuesCount = 0;
-  int32_t myIndexCount = 0;
-
-  if (U_FAILURE (*err))
-    return NULL;
-
-  /*reads in the lengths of the 2 serialized array */
-  T_FileStream_read (infile, &myValuesCount, sizeof (int32_t));
-  T_FileStream_read (infile, &myIndexCount, sizeof (int32_t));
-
-  if (myValuesCount < 0)
-    {
-      *err = U_INVALID_TABLE_FILE;
-      return NULL;
-    }
-  myByteArray = (int8_t *) uprv_malloc (myValuesCount * sizeof (int8_t));
-  if (myByteArray == NULL)
-    {
-      *err = U_MEMORY_ALLOCATION_ERROR;
-      return NULL;
-    }
-  /*reads in the first array */
-  T_FileStream_read (infile, myByteArray, myValuesCount * sizeof (int8_t));
-
-  if (myIndexCount < 0)
-    {
-      uprv_free (myByteArray);
-      *err = U_INVALID_TABLE_FILE;
-      return NULL;
-    }
-  myIndexArray = (uint16_t *) uprv_malloc (myIndexCount * sizeof (uint16_t));
-  if (myIndexArray == NULL)
-    {
-      uprv_free (myByteArray);
-      *err = U_MEMORY_ALLOCATION_ERROR;
-      return NULL;
-    }
-  /*reads in the second array */
-  T_FileStream_read (infile, myIndexArray, myIndexCount * sizeof (uint16_t));
-
-  /*create a compact array from the data just read
-   *that adopts our newly created arrays
-   */
-  return ucmp8_openAdopt (myIndexArray, myByteArray, myValuesCount);
-}
-
-
 static bool_t
 isCnvAcceptable(void *context,
             const char *type, const char *name,
@ -323,20 +145,12 @@ isCnvAcceptable(void *context,

 UConverter*  createConverterFromFile (const char *fileName, UErrorCode * err)
 {
-  int32_t i = 0;
-  const int8_t *myByteArray = NULL;
-  const uint16_t *myIndexArray = NULL;
-  int32_t myValuesCount = 0;
-  int32_t myIndexCount = 0;
  UConverter *myConverter = NULL;
-  int8_t errorLevel = 0;
-
  UDataMemory *data;

  if (err == NULL || U_FAILURE (*err)) {
    return NULL;
  }
-  

  data = udata_openChoice(NULL, DATA_TYPE, fileName, isCnvAcceptable, NULL, err);
  if(U_FAILURE(*err))
@ -352,40 +166,19 @@ UConverter*  createConverterFromFile (const char *fileName, UErrorCode * err)
      return NULL;
    }

-  myConverter->sharedData =
-    (UConverterSharedData *) udata_getMemory(data);
-
-  if (myConverter->sharedData == NULL)
-    {
-      udata_close(data);
-      uprv_free (myConverter);
-      *err = U_MEMORY_ALLOCATION_ERROR;
-      return NULL;
-    }
-
-  /* clone it. OK to drop the original sharedData */
-  myConverter->sharedData = ucnv_data_unFlattenClone((UConverterSharedData_1_4 *)myConverter->sharedData, err);
-
-  myConverter->sharedData->dataMemory = (void*)data; /* for future use */
-
-
+  myConverter->sharedData = ucnv_data_unFlattenClone(data, err);
  if(U_FAILURE(*err))
    {
      udata_close(data);
      uprv_free (myConverter);
-      *err = U_MEMORY_ALLOCATION_ERROR;
      return NULL;
    }

-  if (U_SUCCESS (*err))
-    {
-      initializeDataConverter (myConverter);
-    }
+  initializeDataConverter (myConverter);

  return myConverter;
 }

-
 void 
  copyPlatformString (char *platformString, UConverterPlatform pltfrm)
 {
@ -420,35 +213,6 @@ const UConverterSharedData *
  return NULL;
 }

-
-UConverterPlatform 
-  getPlatformFromName (char *name)
-{
-  char myPlatform[10];
-  char mySeparators[2] = {'-', '\0'};
-
-  getToken (myPlatform, name, mySeparators);
-  strtoupper (myPlatform);
-
-  if (uprv_strcmp (myPlatform, "IBM") == 0)
-    return UCNV_IBM;
-  else
-    return UCNV_UNKNOWN;
-}
-
-int32_t 
-  getCodepageNumberFromName (char *name)
-{
-  char myNumber[10];
-  char mySeparators[2] = {'-', '\0'};
-  char *line = NULL;
-
-  line = getToken (myNumber, name, mySeparators);
-  getToken (myNumber, line, mySeparators);
-
-  return T_CString_stringToInteger (myNumber, 10);
-}
-
 int32_t uhash_hashSharedData (void *sharedData)
 {
  return uhash_hashIString(((UConverterSharedData *) sharedData)->name);
@ -487,16 +251,13 @@ UConverterSharedData *getSharedConverterData (const char *name)
  /*special case when no Table has yet been created we return NULL */
  if (SHARED_DATA_HASHTABLE == NULL)    return NULL;
  else
-    /*    return (UConverterSharedData *) uhash_get (SHARED_DATA_HASHTABLE, uhash_hashString (name));*/
    {
-      UConverterSharedData *i = (UConverterSharedData*)uhash_get (SHARED_DATA_HASHTABLE, uhash_hashIString (name));
-      return i;
+      return (UConverterSharedData*)uhash_get (SHARED_DATA_HASHTABLE, uhash_hashIString (name));
    }
 }

 /*frees the string of memory blocks associates with a sharedConverter
 *if and only if the referenceCounter == 0
- * ### this cleanup would be cleaner in a function in UConverterImpl
 */
 bool_t   deleteSharedConverterData (UConverterSharedData * deadSharedData)
 {
@ -510,36 +271,15 @@ bool_t   deleteSharedConverterData (UConverterSharedData * deadSharedData)
       
       When we have an API to simply 'init' a ucmp8, then no action at all will
       need to happen.   --srl 
+
+       This means that the compact arrays would have to be static fields in
+       UConverterSharedData, not pointers to allocated structures.
+       Markus
    */
-    
-    switch (deadSharedData->conversionType)
-    {
-    case UCNV_SBCS:
-    {
-        ucmp8_close (deadSharedData->table->sbcs.fromUnicode);
-        uprv_free (deadSharedData->table);
-    };
-    break;
-    
-    case UCNV_MBCS:
-    {
-        ucmp16_close (deadSharedData->table->mbcs.fromUnicode);
-        ucmp16_close (deadSharedData->table->mbcs.toUnicode);
-	    uprv_free (deadSharedData->table);
-    };
-    break;

-    case UCNV_DBCS:
-    case UCNV_EBCDIC_STATEFUL:
-    {
-        ucmp16_close (deadSharedData->table->dbcs.fromUnicode);
-        ucmp16_close (deadSharedData->table->dbcs.toUnicode);
-	    uprv_free (deadSharedData->table);
-    };
-    break;
-
-    default: ; /* semicolon makes MSVC happy */
-    };
+    if (deadSharedData->impl->unload != NULL) {
+        deadSharedData->impl->unload(deadSharedData);
+    }

    if(deadSharedData->dataMemory != NULL)
    {
@ -647,13 +387,6 @@ UConverter *
    }
  else
    {
-      /* ### we have an algorithmic converter, it does not need to be cached?! */
-      if (getSharedConverterData (realName) == NULL)
-        {
-          /* put the shared object in shared table */
-          shareConverterData (mySharedConverterData);
-        }
-
      myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
      if (myUConverter == NULL)
        {
@ -714,30 +447,19 @@ void
  myConverter->subCharLen = myConverter->sharedData->defaultConverterValues.subCharLen;
  uprv_memcpy (myConverter->subChar, myConverter->sharedData->defaultConverterValues.subChar, UCNV_MAX_SUBCHAR_LEN);

-  /* ### it would be cleaner to have the following in a function in UConverterImpl, with a UErrorCode */
-  switch (myConverter->sharedData->conversionType)
-    {
-    case UCNV_ISO_2022:
-      {
-        myConverter->charErrorBuffer[0] = 0x1b;
-        myConverter->charErrorBuffer[1] = 0x25;
-        myConverter->charErrorBuffer[2] = 0x42;
-        myConverter->charErrorBufferLength = 3;
-        myConverter->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
-        /* ### check for extraInfo==NULL !! does this need to be allocated at all? */
-        ((UConverterDataISO2022 *) myConverter->extraInfo)->currentConverter = NULL;
-        ((UConverterDataISO2022 *) myConverter->extraInfo)->escSeq2022Length = 0;
-        break;
-      }
-    default:
-      break;
-    };
+  if(myConverter->sharedData->impl->open != NULL) {
+    UErrorCode errorCode = U_ZERO_ERROR;
+
+    /* ### pass in real parameters, and use the error code */
+    myConverter->sharedData->impl->open(myConverter, NULL, NULL, &errorCode);
+  }
 }

-UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData_1_4 *source, UErrorCode *status)
+UConverterSharedData* ucnv_data_unFlattenClone(UDataMemory *pData, UErrorCode *status)
 {
-    const uint8_t *raw, *oldraw;
-    UConverterSharedData *data = NULL;
+    const uint8_t *raw = (const uint8_t *)udata_getMemory(pData);
+    const UConverterSharedData_1_4 *source = (const UConverterSharedData_1_4 *) raw;
+    UConverterSharedData *data;
    UConverterType type = source->conversionType;

    if(U_FAILURE(*status))
@ -769,6 +491,7 @@ UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData_1_4 *s
    }

    /* fill in fields from the loaded data */
+    data->dataMemory = (void*)pData; /* for future use */
    data->name = source->name; /* ### this could/should come from the caller - should be the same as the canonical name?!! */
    data->codepage = source->codepage;
    data->platform = source->platform;
@ -776,51 +499,12 @@ UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData_1_4 *s
    data->maxBytesPerChar = source->maxBytesPerChar;
    uprv_memcpy(&data->defaultConverterValues, &source->defaultConverterValues, sizeof(data->defaultConverterValues));

-    raw = (uint8_t*)source + source->structSize;
-
-    /* the checks above made sure that the type is valid for a data-based converter */
-  switch (data->conversionType)
-    {
-    case UCNV_SBCS:
-      data->table->sbcs.toUnicode = (UChar*)raw;
-      raw += sizeof(UChar)*256;
-
-      data->table->sbcs.fromUnicode = ucmp8_cloneFromData(&raw, status);
-
-      break;
-
-    case UCNV_EBCDIC_STATEFUL:
-    case UCNV_DBCS:
-      oldraw = raw;
-
-      data->table->dbcs.toUnicode=ucmp16_cloneFromData(&raw, status);
-
-      /* pad to 4 */
-      if(((raw-oldraw)&3)!=0) {
-          raw+=4-((raw-oldraw)&3);
-      }
-
-      data->table->dbcs.fromUnicode =ucmp16_cloneFromData(&raw, status);
-
-      break;
-
-    case UCNV_MBCS:
-      data->table->mbcs.starters = (bool_t*)raw;
-      raw += sizeof(bool_t)*256;
-      
-      oldraw = raw;
-
-      data->table->mbcs.toUnicode   = ucmp16_cloneFromData(&raw, status);
-
-      /* pad to 4 */
-      if(((raw-oldraw)&3)!=0) {
-          raw+=4-((raw-oldraw)&3);
-      }
-
-      data->table->mbcs.fromUnicode = ucmp16_cloneFromData(&raw, status);
-
-      break;
+    if(data->impl->load != NULL) {
+        data->impl->load(data, raw + source->structSize, status);
+        if(U_FAILURE(*status)) {
+            uprv_free(data);
+            return NULL;
+        }
    }
-  
-  return data;
+    return data;
 }
--- a/icu4c/source/common/ucnv_cnv.h
+++ b/icu4c/source/common/ucnv_cnv.h
@ -105,6 +105,14 @@ U_CDECL_BEGIN
                  for (;My_i < myTargetIndex;My_i++) {offsets[My_i] += currentOffset  ;   } \
                }

+typedef void (*UConverterLoad) (UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode);
+typedef void (*UConverterUnload) (UConverterSharedData *sharedData);
+
+typedef void (*UConverterOpen) (UConverter *cnv, const char *name, const char *locale, UErrorCode *pErrorCode);
+typedef void (*UConverterClose) (UConverter *cnv);
+
+typedef void (*UConverterReset) (UConverter *cnv);
+
 typedef void (*T_ToUnicodeFunction) (UConverter *,
 				     UChar **,
 				     const UChar *,
@ -147,6 +155,13 @@ void flushInternalCharBuffer (UConverter * _this,
 struct UConverterImpl {
    UConverterType type;

+    UConverterLoad load;
+    UConverterUnload unload;
+
+    UConverterOpen open;
+    UConverterClose close;
+    UConverterReset reset;
+
    T_ToUnicodeFunction toUnicode;
    T_ToUnicodeFunction toUnicodeWithOffsets;
    T_FromUnicodeFunction fromUnicode;
--- a/icu4c/source/common/ucnv_utf.c
+++ b/icu4c/source/common/ucnv_utf.c
@ -696,6 +696,13 @@ UChar T_UConverter_getNextUChar_UTF8(UConverter* converter,
 static UConverterImpl _UTF8Impl={
    UCNV_UTF8,

+    NULL,
+    NULL,
+
+    NULL,
+    NULL,
+    NULL,
+
    T_UConverter_toUnicode_UTF8,
    T_UConverter_toUnicode_UTF8_OFFSETS_LOGIC,
    T_UConverter_fromUnicode_UTF8,
@ -861,6 +868,13 @@ UChar T_UConverter_getNextUChar_UTF16_BE(UConverter* converter,
 static UConverterImpl _UTF16BEImpl={
    UCNV_UTF16_BigEndian,

+    NULL,
+    NULL,
+
+    NULL,
+    NULL,
+    NULL,
+
    T_UConverter_toUnicode_UTF16_BE,
    NULL,
    T_UConverter_fromUnicode_UTF16_BE,
@ -1030,6 +1044,13 @@ UChar T_UConverter_getNextUChar_UTF16_LE(UConverter* converter,
 static UConverterImpl _UTF16LEImpl={
    UCNV_UTF16_LittleEndian,

+    NULL,
+    NULL,
+
+    NULL,
+    NULL,
+    NULL,
+
    T_UConverter_toUnicode_UTF16_LE,
    NULL,
    T_UConverter_fromUnicode_UTF16_LE,
--- a/icu4c/source/common/ucnvlat1.c
+++ b/icu4c/source/common/ucnvlat1.c
@ -144,6 +144,13 @@ UChar T_UConverter_getNextUChar_LATIN_1(UConverter* converter,
 static UConverterImpl _Latin1Impl={
    UCNV_LATIN_1,

+    NULL,
+    NULL,
+
+    NULL,
+    NULL,
+    NULL,
+
    T_UConverter_toUnicode_LATIN_1,
    NULL,
    T_UConverter_fromUnicode_LATIN_1,
--- a/icu4c/source/common/ucnvmbcs.c
+++ b/icu4c/source/common/ucnvmbcs.c
@ -13,6 +13,7 @@
 */

 #include "unicode/utypes.h"
+#include "cmemory.h"
 #include "ucmp16.h"
 #include "ucmp8.h"
 #include "unicode/ucnv_bld.h"
@ -21,6 +22,27 @@

 /* MBCS --------------------------------------------------------------------- */

+static void
+_MBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode) {
+    const uint8_t *oldraw;
+
+    sharedData->table->mbcs.starters = (bool_t*)raw;
+    oldraw = raw += sizeof(bool_t)*256;
+
+    sharedData->table->mbcs.toUnicode   = ucmp16_cloneFromData(&raw, pErrorCode);
+    if(((raw-oldraw)&3)!=0) {
+        raw+=4-((raw-oldraw)&3);    /* pad to 4 */
+    }
+    sharedData->table->mbcs.fromUnicode = ucmp16_cloneFromData(&raw, pErrorCode);
+}
+
+static void
+_MBCSUnload(UConverterSharedData *sharedData) {
+    ucmp16_close (sharedData->table->mbcs.fromUnicode);
+    ucmp16_close (sharedData->table->mbcs.toUnicode);
+	uprv_free (sharedData->table);
+}
+
 void T_UConverter_toUnicode_MBCS (UConverter * _this,
                               UChar ** target,
                               const UChar * targetLimit,
@ -537,6 +559,13 @@ UChar T_UConverter_getNextUChar_MBCS(UConverter* converter,
 static UConverterImpl _MBCSImpl={
    UCNV_MBCS,

+    _MBCSLoad,
+    _MBCSUnload,
+
+    NULL,
+    NULL,
+    NULL,
+
    T_UConverter_toUnicode_MBCS,
    T_UConverter_toUnicode_MBCS_OFFSETS_LOGIC,
    T_UConverter_fromUnicode_MBCS,
--- a/icu4c/source/common/ucnvsbcs.c
+++ b/icu4c/source/common/ucnvsbcs.c
@ -13,6 +13,7 @@
 */

 #include "unicode/utypes.h"
+#include "cmemory.h"
 #include "ucmp16.h"
 #include "ucmp8.h"
 #include "unicode/ucnv_bld.h"
@ -21,6 +22,19 @@

 /* SBCS --------------------------------------------------------------------- */

+static void
+_SBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode) {
+    sharedData->table->sbcs.toUnicode = (UChar*)raw;
+    raw += sizeof(UChar)*256;
+    sharedData->table->sbcs.fromUnicode = ucmp8_cloneFromData(&raw, pErrorCode);
+}
+
+static void
+_SBCSUnload(UConverterSharedData *sharedData) {
+    ucmp8_close (sharedData->table->sbcs.fromUnicode);
+    uprv_free (sharedData->table);
+}
+
 void T_UConverter_toUnicode_SBCS (UConverter * _this,
                                  UChar ** target,
                                  const UChar * targetLimit,
@ -211,6 +225,13 @@ UChar T_UConverter_getNextUChar_SBCS(UConverter* converter,
 static UConverterImpl _SBCSImpl={
    UCNV_SBCS,

+    _SBCSLoad,
+    _SBCSUnload,
+
+    NULL,
+    NULL,
+    NULL,
+
    T_UConverter_toUnicode_SBCS,
    NULL,
    T_UConverter_fromUnicode_SBCS,
@ -227,6 +248,23 @@ extern UConverterSharedData _SBCSData={

 /* DBCS --------------------------------------------------------------------- */

+U_CFUNC void
+_DBCSLoad(UConverterSharedData *sharedData, const uint8_t *raw, UErrorCode *pErrorCode) {
+    const uint8_t *oldraw = raw;
+    sharedData->table->dbcs.toUnicode=ucmp16_cloneFromData(&raw, pErrorCode);
+    if(((raw-oldraw)&3)!=0) {
+        raw+=4-((raw-oldraw)&3);    /* pad to 4 */
+    }
+    sharedData->table->dbcs.fromUnicode =ucmp16_cloneFromData(&raw, pErrorCode);
+}
+
+U_CFUNC void
+_DBCSUnload(UConverterSharedData *sharedData) {
+    ucmp16_close (sharedData->table->dbcs.fromUnicode);
+    ucmp16_close (sharedData->table->dbcs.toUnicode);
+	uprv_free (sharedData->table);
+}
+
 void   T_UConverter_toUnicode_DBCS (UConverter * _this,
                                    UChar ** target,
                                    const UChar * targetLimit,
@ -471,6 +509,13 @@ UChar T_UConverter_getNextUChar_DBCS(UConverter* converter,
 static UConverterImpl _DBCSImpl={
    UCNV_DBCS,

+    _DBCSLoad,
+    _DBCSUnload,
+
+    NULL,
+    NULL,
+    NULL,
+
    T_UConverter_toUnicode_DBCS,
    NULL,
    T_UConverter_fromUnicode_DBCS,
--- a/icu4c/source/common/unicode/ucnv_bld.h
+++ b/icu4c/source/common/unicode/ucnv_bld.h
@ -153,6 +153,10 @@ typedef struct UConverterImpl UConverterImpl;
 * useful state fields in UConverter that are reserved for the callbacks,
 * and directly included structures instead of pointers to allocated
 * memory, like for UConverterTable and its variant fields.
+ *
+ * Also, with the more C++-like converter implementation,
+ * the conversionType does not need to be in UConverterSharedData any more:
+ * it is in UConverterImpl and hardly used.
 */

 /*
@ -254,7 +258,4 @@ UConverterDataISO2022;

 #define CONVERTER_FILE_EXTENSION ".cnv"

-/*case insensitive hash key*/
-U_CAPI int32_t U_EXPORT2 uhash_hashIString(const void* name);
-
 #endif /* _UCNV_BLD */