ICU-1220 Report position at which errors occur.

Fix memory dumps in messages by explicetly adding U+0000 at the end of the strings that we getBuffer() to u_wmsg(). Add --fallback and --no-fallback options to control use of fallback. X-SVN-Rev: 7421
2025-04-21 04:29:31 +00:00 · 2002-01-09 20:42:19 +00:00 · 2002-01-09 20:42:19 +00:00 · ca88544452
commit ca88544452
parent da1e186728
3 changed files with 89 additions and 43 deletions
--- a/icu4c/source/extra/uconv/root.txt
+++ b/icu4c/source/extra/uconv/root.txt
@ -1,6 +1,6 @@
 // -*- Coding: utf-8; -*-  [all uconv resource files]
 // Copyright (c) 2000 IBM, Inc. and Others.
-// $Revision: 1.18 $
+// $Revision: 1.19 $
 //
 // Root translation file for uconv messages.
 // So you want to translate this file??? Great!
@ -28,7 +28,7 @@ root

  lcUsageWord { "usage" }
  ucUsageWord { "Usage" }
-  usage { "{0}: {1} [ -h, -?, --help ] [ -V, --version ] [ -s, --silent ] [ -v, --verbose ] [ -l, --list | --list-code code | --default-code | -L, --list-transliterators ] [ --canon ] [ -x transliterator ] [ --to-callback callback | -c ] [ --from-callback callback | -i ] [ --callback callback ] -f, --from-code code -t, --to-code code [ file ... ] [ -o, --output file ]\n" }
+  usage { "{0}: {1} [ -h, -?, --help ] [ -V, --version ] [ -s, --silent ] [ -v, --verbose ] [ -l, --list | --list-code code | --default-code | -L, --list-transliterators ] [ --canon ] [ -x transliterator ] [ --to-callback callback | -c ] [ --from-callback callback | -i ] [ --callback callback ] [ --fallback | --no-fallback ] -f, --from-code code -t, --to-code code [ file ... ] [ -o, --output file ]\n" }

  help {  "Options:  -h, --help                    print this message\n"
          "          -V, --version                 print the program version\n"
@ -45,6 +45,8 @@ root
 	  "          --from-callback callback      use callback on original encoding\n"
 	  "          -i                            ignore invalid sequences in the input\n"
          "          --callback callback           use callback on both encodings\n"
+	  "          --fallback                    use fallback mapping\n"
+	  "          --no-fallback                 do not use fallback mapping\n"
 	  "          -f, --from-code code          set the original encoding\n"
 	  "          -t, --to-code code            set the destination encoding\n" 
          "          -o, --output file             write output to file\n"
@ -74,11 +76,11 @@ root
  cantWrite       { "The converted text couldn't be written: {0}.\n" } // 0: OS error string
  cantRead        { "Error reading from input file {0}.\n" } // 0: OS error string

-  premEnd         { "Premature end of Unicode conversion to codepage\n" }
-  premEndInput    { "Premature end of input, when converting from codepage to Unicode\n" }
+  premEnd         { "Premature end of Unicode conversion to codepage at or near position {0}.\n" }
+  premEndInput    { "Premature end of input when converting from codepage to Unicode at or near position {0}.\n" }

-  problemCvtToU   { "Conversion to Unicode from codepage failed: {0}.\n" } // 0: err
-  problemCvtFromU { "Problem converting from Unicode to codepage: {0}.\n"} // 0: err
+  problemCvtToU   { "Conversion to Unicode from codepage failed at or near position {0}: {1}.\n" } // 0: position, 1: err
+  problemCvtFromU { "Conversion from Unicode to codepage failed at or near position {0}: {1}.\n"} // 0: err

 // ICU errors - used by u_wmsg_errorName()

--- a/icu4c/source/extra/uconv/uconv.1.in
+++ b/icu4c/source/extra/uconv/uconv.1.in
@ -52,6 +52,11 @@
 [
 .BI "\-\-callback" " callback"
 ]
+[
+.BI "\-\-fallback"
+|
+.BI "\-\-no\-fallback"
+]
 .BI "\-f\fP, \fB\-\-from\-code" " encoding"
 .BI "\-t\fP, \fB\-\-to\-code" " encoding"
 [
@ -165,6 +170,15 @@ encoding. See section
 .B CALLBACKS
 for details on valid callbacks.
 .TP
+.BI "\-\-fallback"
+Use the fallback mapping when transcoding from
+Unicode to the destination encoding.
+.TP
+.BI "\-\-no\-fallback"
+Do not use the fallback mapping when transcoding from Unicode to the
+destination encoding.
+This is the default.
+.TP
 .BI "\-f\fP, \fB\-\-from\-code" " encoding"
 Set the original encoding of the data to 
 .IR encoding .
--- a/icu4c/source/extra/uconv/uconv.cpp
+++ b/icu4c/source/extra/uconv/uconv.cpp
@ -1,23 +1,11 @@
 /******************************************************************************
 *
-*   Copyright (C) 1999-2000, International Business Machines
+*   Copyright (C) 1999-2002, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************/
 //
-// uconv demonstration example of ICU and codepage conversion
-// Purpose is to be a similar tool as the UNIX iconv program.
-//
-// Usage: uconv [flag] [file]
-// -f [codeset]  Convert file from this codeset
-// -t [codeset]  Convert file to this code set
-// -l            Display all available converters
-// -x [transliterator]  Run everything through a transliterator
-// -L            Display all available transliterators
-// If no file is given, uconv tries to read from stdin
-// 
-// To compile: c++ -o uconv -I${ICUHOME}/include -Wall -g uconv.cpp -L${ICUHOME}/lib -licuuc -licui18n
-//
+// uconv: an iconv(1)-like converter using ICU.
 // Original contributor was Jonas Utterström <jonas.utterstrom@vittran.norrnod.se> in 1999
 // Converted to the C conversion API and many improvements by Yves Arrouye <yves@realnames.com>. 
 //
@ -194,7 +182,7 @@ static int printConverters(const char *pname, const char *lookfor, int canon)
            if (U_FAILURE(err)) {
                printf("%s", name);
                    
-                UnicodeString str(name);
+                UnicodeString str(name, strlen(name) + 1);
                putchar('\t');
                u_wmsg("cantGetAliases", str.getBuffer(), u_wmsg_errorName(err));
                return -1;
@ -205,7 +193,7 @@ static int printConverters(const char *pname, const char *lookfor, int canon)
                    const char *alias = ucnv_getAlias(name, a, &err);
                        
                    if (U_FAILURE(err)) {
-                        UnicodeString str(name);
+                        UnicodeString str(name, strlen(name) + 1);
                        putchar('\t');
                        u_wmsg("cantGetAliases", str.getBuffer(), u_wmsg_errorName(err));
                        return -1;
@ -303,15 +291,20 @@ static int printTransliterators(const char *pname, int canon) {
    return 0;
 }

+// Compute the offset of data in its source
+static int32_t dataOffset(const int32_t *fromoffsets, int32_t whereto, const int32_t *tooffsets) {
+    return fromoffsets[tooffsets[whereto]];
+}

 // Convert a file from one encoding to another
 static UBool convertFile(const char *pname,
-                         const char* fromcpage, 
+                         const char* fromcpage,
                         UConverterToUCallback toucallback,
                         const void *touctxt,
                         const char* tocpage,
                         UConverterFromUCallback fromucallback,
                         const void *fromuctxt,
+                         int fallback,
                         const char *translit,
                         const char* infilestr, 
                         FILE* outfile,
@ -337,6 +330,7 @@ static UBool convertFile(const char *pname,
    const UChar* cuniiter;
    UChar* uniiter;
    UChar* unibuff = 0;
+    int32_t *fromoffsets = 0, *tooffsets = 0;

    size_t rd, totbuffsize;

@ -380,6 +374,7 @@ static UBool convertFile(const char *pname,
        UnicodeString str(translit);
        t = Transliterator::createInstance(str, UTRANS_FORWARD, err);
        if (U_FAILURE(err)) {
+            str.append((UChar32) 0);
            initMsg(pname);
            u_wmsg("cantOpenTranslit", str.getBuffer(), u_wmsg_errorName(err));
            if (t) {
@ -397,7 +392,7 @@ static UBool convertFile(const char *pname,
    convfrom = ucnv_open(fromcpage, &err);
    if (U_FAILURE(err))
    {
-      UnicodeString str(fromcpage,"");
+      UnicodeString str(fromcpage, strlen(fromcpage) + 1);
      initMsg(pname);
      u_wmsg("cantOpenFromCodeset",str.getBuffer(),
             u_wmsg_errorName(err));
@ -414,7 +409,7 @@ static UBool convertFile(const char *pname,
    convto = ucnv_open(tocpage, &err);
    if (U_FAILURE(err))
    {
-        UnicodeString str(tocpage,"");
+        UnicodeString str(tocpage, strlen(tocpage) + 1);
        initMsg(pname);
        u_wmsg("cantOpenToCodeset",str.getBuffer(),
               u_wmsg_errorName(err));
@ -427,6 +422,7 @@ static UBool convertFile(const char *pname,
        u_wmsg("cantSetCallback", u_wmsg_errorName(err));
        goto error_exit;
    }
+    ucnv_setFallback(convto, fallback);

    // To ensure that the buffer always is of enough size, we
    // must take the worst case scenario, that is the character in the codepage
@ -435,6 +431,9 @@ static UBool convertFile(const char *pname,
    totbuffsize = buffsize * ucnv_getMaxCharSize(convto);
    buff = new char[totbuffsize];
    unibuff = new UChar[buffsize];
+    
+    fromoffsets = new int32_t[buffsize];
+    tooffsets = new int32_t[totbuffsize];

    // OK, we can convert now.

@ -443,7 +442,8 @@ static UBool convertFile(const char *pname,
        rd = fread(buff, 1, readsize, infile);
        if (ferror(infile) != 0)
        {
-            UnicodeString str(strerror(errno), "");
+            UnicodeString str(strerror(errno));
+            str.append((UChar32) 0);
            initMsg(pname);
            u_wmsg("cantRead",str.getBuffer());
            goto error_exit;
@ -461,14 +461,17 @@ static UBool convertFile(const char *pname,
        uniiter = unibuff;
        cbuffiter = buff;
        flush = rd!=readsize;
-        ucnv_toUnicode(convfrom, &uniiter, uniiter + buffsize, &cbuffiter, cbuffiter + rd, 0, flush, &err);
+        ucnv_toUnicode(convfrom, &uniiter, uniiter + buffsize, &cbuffiter, cbuffiter + rd, fromoffsets, flush, &err);
          
-        foffset += uniiter - unibuff;
+        foffset += cbuffiter - buff - 1;

        if (U_FAILURE(err))
        {
+            char pos[32];
+            sprintf(pos, "%u", foffset);
+            UnicodeString str(pos, strlen(pos) + 1);
            initMsg(pname);
-            u_wmsg("problemCvtToU", u_wmsg_errorName(err));
+            u_wmsg("problemCvtToU", str.getBuffer(), u_wmsg_errorName(err));
            goto error_exit;
        }
            
@ -476,8 +479,11 @@ static UBool convertFile(const char *pname,
        // of chars read.
        if (flush && cbuffiter!=(buff+rd))
        {
+            char pos[32];
+            sprintf(pos, "%u", foffset);
+            UnicodeString str(pos, strlen(pos) + 1);
            initMsg(pname);
-            u_wmsg("premEndInput");
+            u_wmsg("premEndInput", str.getBuffer());
            goto error_exit;
        }
            
@ -499,12 +505,18 @@ static UBool convertFile(const char *pname,
            
          }

-        ucnv_fromUnicode(convto, &buffiter, buffiter + totbuffsize, &cuniiter, cuniiter + (size_t) (uniiter - unibuff), 0, flush, &err);
+        ucnv_fromUnicode(convto, &buffiter, buffiter + totbuffsize, &cuniiter, cuniiter + (size_t) (uniiter - unibuff), tooffsets, flush, &err);
            
        if (U_FAILURE(err))
        {
+            char pos[32];
+
+            uint32_t erroffset = dataOffset(fromoffsets, buffiter - buff, tooffsets);
+         
+            sprintf(pos, "%u", foffset - (uniiter - unibuff) + erroffset);
+            UnicodeString str(pos, strlen(pos) + 1);
            initMsg(pname);
-            u_wmsg("problemCvtFromU", u_wmsg_errorName(err));
+            u_wmsg("problemCvtFromU", str.getBuffer(), u_wmsg_errorName(err));
            goto error_exit;
        }
                        
@ -512,8 +524,11 @@ static UBool convertFile(const char *pname,
        // of consumed characters.
        if (flush && cuniiter!=(unibuff+(size_t)(uniiter-unibuff)))
        {
+            char pos[32];
+            sprintf(pos, "%u", foffset);
+            UnicodeString str(pos, strlen(pos) + 1);
            initMsg(pname);
-            u_wmsg("premEnd");
+            u_wmsg("premEnd", str.getBuffer());
            goto error_exit;
        }
            
@ -530,18 +545,24 @@ static UBool convertFile(const char *pname,
    } while (!flush); // Stop when we have flushed the converters (this means that it's the end of output)

    goto normal_exit;
+
  error_exit:
    ret = FALSE;
+
  normal_exit:
+    // Close the created converters
+
    if (convfrom) ucnv_close(convfrom);
    if (convto) ucnv_close(convto);

    if ( t ) delete t;

-    // Close the created converters
    if (buff) delete [] buff;
    if (unibuff) delete [] unibuff;

+    if (fromoffsets) delete [] fromoffsets;
+    if (tooffsets) delete [] tooffsets;
+
    if (infile != stdin) {
        fclose(infile);
    }
@ -557,21 +578,21 @@ static void usage(const char *pname, int ecode)
   
  initMsg(pname);
  msg = ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord", &msgLen, &err);
-  UnicodeString upname(pname);
-  UnicodeString mname(msg, msgLen);
+  UnicodeString upname(pname, strlen(pname) + 1);
+  UnicodeString mname(msg, msgLen + 1);

  u_wmsg("usage", mname.getBuffer(), upname.getBuffer());
  if (!ecode) {
-    putchar('\n');
+    fputc('\n', stderr);
    u_wmsg("help");

    /* Now dump callbacks and finish. */

    int i, count = sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
    for (i = 0; i < count; ++i) {
-        printf(" %s", transcode_callbacks[i].name);
+        fprintf(stderr, " %s", transcode_callbacks[i].name);
    }
-    putchar('\n');
+    fputc('\n', stderr);
  }

  exit(ecode);
@ -587,6 +608,7 @@ int main(int argc, char** argv)
    const char* tocpage = 0;
    const char *translit = 0;
    const char* outfilestr = 0;
+    int fallback = 0;

    UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
    const void *fromuctxt = 0;
@ -602,7 +624,7 @@ int main(int argc, char** argv)
    const char *printName = 0;
    int printTranslits = 0;

-    int silent = 0, verbose = 0;
+    int verbose = 0;

    // Prettify pname.
    for (pname = *argv + strlen(*argv) - 1; pname != *argv && *pname != U_FILE_SEP_CHAR; --pname);
@ -636,6 +658,10 @@ int main(int argc, char** argv)
                translit = *iter;
            else
                usage(pname, 1);
+        } else if (!strcmp("--fallback", *iter)) {
+            fallback = 1;
+        } else if (!strcmp("--no-fallback", *iter)) {
+            fallback = 0;
        }
        else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter))
        {
@ -743,7 +769,7 @@ int main(int argc, char** argv)
            }
        }
        else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
-            silent = 1;
+            verbose = 0;
        } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
            verbose = 1;
        } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
@ -827,6 +853,10 @@ int main(int argc, char** argv)
        else if (strcmp("-x", *iter) == 0)
        {
            iter++;
+        } else if (!strcmp("--fallback", *iter)) {
+            ;
+        } else if (!strcmp("--no-fallback", *iter)) {
+            ;
        }
        else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter))
        {
@ -875,14 +905,14 @@ int main(int argc, char** argv)
            ++iter;
        } else {
            seenf = 1;
-            if (!convertFile(pname, fromcpage, toucallback, touctxt, tocpage, fromucallback, fromuctxt, translit, *iter, outfile, verbose)) {
+            if (!convertFile(pname, fromcpage, toucallback, touctxt, tocpage, fromucallback, fromuctxt, fallback, translit, *iter, outfile, verbose)) {
                goto error_exit;
            }
        }
    }

    if (!seenf) {
-        if (!convertFile(pname, fromcpage, toucallback, touctxt, tocpage, fromucallback, fromuctxt, translit, 0, outfile, verbose)) {
+        if (!convertFile(pname, fromcpage, toucallback, touctxt, tocpage, fromucallback, fromuctxt, fallback, translit, 0, outfile, verbose)) {
            goto error_exit;
        }
    }