mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-21 04:29:31 +00:00
ICU-1220 Report position at which errors occur.
Fix memory dumps in messages by explicetly adding U+0000 at the end of the strings that we getBuffer() to u_wmsg(). Add --fallback and --no-fallback options to control use of fallback. X-SVN-Rev: 7421
This commit is contained in:
parent
da1e186728
commit
ca88544452
3 changed files with 89 additions and 43 deletions
|
@ -1,6 +1,6 @@
|
|||
// -*- Coding: utf-8; -*- [all uconv resource files]
|
||||
// Copyright (c) 2000 IBM, Inc. and Others.
|
||||
// $Revision: 1.18 $
|
||||
// $Revision: 1.19 $
|
||||
//
|
||||
// Root translation file for uconv messages.
|
||||
// So you want to translate this file??? Great!
|
||||
|
@ -28,7 +28,7 @@ root
|
|||
|
||||
lcUsageWord { "usage" }
|
||||
ucUsageWord { "Usage" }
|
||||
usage { "{0}: {1} [ -h, -?, --help ] [ -V, --version ] [ -s, --silent ] [ -v, --verbose ] [ -l, --list | --list-code code | --default-code | -L, --list-transliterators ] [ --canon ] [ -x transliterator ] [ --to-callback callback | -c ] [ --from-callback callback | -i ] [ --callback callback ] -f, --from-code code -t, --to-code code [ file ... ] [ -o, --output file ]\n" }
|
||||
usage { "{0}: {1} [ -h, -?, --help ] [ -V, --version ] [ -s, --silent ] [ -v, --verbose ] [ -l, --list | --list-code code | --default-code | -L, --list-transliterators ] [ --canon ] [ -x transliterator ] [ --to-callback callback | -c ] [ --from-callback callback | -i ] [ --callback callback ] [ --fallback | --no-fallback ] -f, --from-code code -t, --to-code code [ file ... ] [ -o, --output file ]\n" }
|
||||
|
||||
help { "Options: -h, --help print this message\n"
|
||||
" -V, --version print the program version\n"
|
||||
|
@ -45,6 +45,8 @@ root
|
|||
" --from-callback callback use callback on original encoding\n"
|
||||
" -i ignore invalid sequences in the input\n"
|
||||
" --callback callback use callback on both encodings\n"
|
||||
" --fallback use fallback mapping\n"
|
||||
" --no-fallback do not use fallback mapping\n"
|
||||
" -f, --from-code code set the original encoding\n"
|
||||
" -t, --to-code code set the destination encoding\n"
|
||||
" -o, --output file write output to file\n"
|
||||
|
@ -74,11 +76,11 @@ root
|
|||
cantWrite { "The converted text couldn't be written: {0}.\n" } // 0: OS error string
|
||||
cantRead { "Error reading from input file {0}.\n" } // 0: OS error string
|
||||
|
||||
premEnd { "Premature end of Unicode conversion to codepage\n" }
|
||||
premEndInput { "Premature end of input, when converting from codepage to Unicode\n" }
|
||||
premEnd { "Premature end of Unicode conversion to codepage at or near position {0}.\n" }
|
||||
premEndInput { "Premature end of input when converting from codepage to Unicode at or near position {0}.\n" }
|
||||
|
||||
problemCvtToU { "Conversion to Unicode from codepage failed: {0}.\n" } // 0: err
|
||||
problemCvtFromU { "Problem converting from Unicode to codepage: {0}.\n"} // 0: err
|
||||
problemCvtToU { "Conversion to Unicode from codepage failed at or near position {0}: {1}.\n" } // 0: position, 1: err
|
||||
problemCvtFromU { "Conversion from Unicode to codepage failed at or near position {0}: {1}.\n"} // 0: err
|
||||
|
||||
// ICU errors - used by u_wmsg_errorName()
|
||||
|
||||
|
|
|
@ -52,6 +52,11 @@
|
|||
[
|
||||
.BI "\-\-callback" " callback"
|
||||
]
|
||||
[
|
||||
.BI "\-\-fallback"
|
||||
|
|
||||
.BI "\-\-no\-fallback"
|
||||
]
|
||||
.BI "\-f\fP, \fB\-\-from\-code" " encoding"
|
||||
.BI "\-t\fP, \fB\-\-to\-code" " encoding"
|
||||
[
|
||||
|
@ -165,6 +170,15 @@ encoding. See section
|
|||
.B CALLBACKS
|
||||
for details on valid callbacks.
|
||||
.TP
|
||||
.BI "\-\-fallback"
|
||||
Use the fallback mapping when transcoding from
|
||||
Unicode to the destination encoding.
|
||||
.TP
|
||||
.BI "\-\-no\-fallback"
|
||||
Do not use the fallback mapping when transcoding from Unicode to the
|
||||
destination encoding.
|
||||
This is the default.
|
||||
.TP
|
||||
.BI "\-f\fP, \fB\-\-from\-code" " encoding"
|
||||
Set the original encoding of the data to
|
||||
.IR encoding .
|
||||
|
|
|
@ -1,23 +1,11 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2000, International Business Machines
|
||||
* Copyright (C) 1999-2002, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************/
|
||||
//
|
||||
// uconv demonstration example of ICU and codepage conversion
|
||||
// Purpose is to be a similar tool as the UNIX iconv program.
|
||||
//
|
||||
// Usage: uconv [flag] [file]
|
||||
// -f [codeset] Convert file from this codeset
|
||||
// -t [codeset] Convert file to this code set
|
||||
// -l Display all available converters
|
||||
// -x [transliterator] Run everything through a transliterator
|
||||
// -L Display all available transliterators
|
||||
// If no file is given, uconv tries to read from stdin
|
||||
//
|
||||
// To compile: c++ -o uconv -I${ICUHOME}/include -Wall -g uconv.cpp -L${ICUHOME}/lib -licuuc -licui18n
|
||||
//
|
||||
// uconv: an iconv(1)-like converter using ICU.
|
||||
// Original contributor was Jonas Utterström <jonas.utterstrom@vittran.norrnod.se> in 1999
|
||||
// Converted to the C conversion API and many improvements by Yves Arrouye <yves@realnames.com>.
|
||||
//
|
||||
|
@ -194,7 +182,7 @@ static int printConverters(const char *pname, const char *lookfor, int canon)
|
|||
if (U_FAILURE(err)) {
|
||||
printf("%s", name);
|
||||
|
||||
UnicodeString str(name);
|
||||
UnicodeString str(name, strlen(name) + 1);
|
||||
putchar('\t');
|
||||
u_wmsg("cantGetAliases", str.getBuffer(), u_wmsg_errorName(err));
|
||||
return -1;
|
||||
|
@ -205,7 +193,7 @@ static int printConverters(const char *pname, const char *lookfor, int canon)
|
|||
const char *alias = ucnv_getAlias(name, a, &err);
|
||||
|
||||
if (U_FAILURE(err)) {
|
||||
UnicodeString str(name);
|
||||
UnicodeString str(name, strlen(name) + 1);
|
||||
putchar('\t');
|
||||
u_wmsg("cantGetAliases", str.getBuffer(), u_wmsg_errorName(err));
|
||||
return -1;
|
||||
|
@ -303,15 +291,20 @@ static int printTransliterators(const char *pname, int canon) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
// Compute the offset of data in its source
|
||||
static int32_t dataOffset(const int32_t *fromoffsets, int32_t whereto, const int32_t *tooffsets) {
|
||||
return fromoffsets[tooffsets[whereto]];
|
||||
}
|
||||
|
||||
// Convert a file from one encoding to another
|
||||
static UBool convertFile(const char *pname,
|
||||
const char* fromcpage,
|
||||
const char* fromcpage,
|
||||
UConverterToUCallback toucallback,
|
||||
const void *touctxt,
|
||||
const char* tocpage,
|
||||
UConverterFromUCallback fromucallback,
|
||||
const void *fromuctxt,
|
||||
int fallback,
|
||||
const char *translit,
|
||||
const char* infilestr,
|
||||
FILE* outfile,
|
||||
|
@ -337,6 +330,7 @@ static UBool convertFile(const char *pname,
|
|||
const UChar* cuniiter;
|
||||
UChar* uniiter;
|
||||
UChar* unibuff = 0;
|
||||
int32_t *fromoffsets = 0, *tooffsets = 0;
|
||||
|
||||
size_t rd, totbuffsize;
|
||||
|
||||
|
@ -380,6 +374,7 @@ static UBool convertFile(const char *pname,
|
|||
UnicodeString str(translit);
|
||||
t = Transliterator::createInstance(str, UTRANS_FORWARD, err);
|
||||
if (U_FAILURE(err)) {
|
||||
str.append((UChar32) 0);
|
||||
initMsg(pname);
|
||||
u_wmsg("cantOpenTranslit", str.getBuffer(), u_wmsg_errorName(err));
|
||||
if (t) {
|
||||
|
@ -397,7 +392,7 @@ static UBool convertFile(const char *pname,
|
|||
convfrom = ucnv_open(fromcpage, &err);
|
||||
if (U_FAILURE(err))
|
||||
{
|
||||
UnicodeString str(fromcpage,"");
|
||||
UnicodeString str(fromcpage, strlen(fromcpage) + 1);
|
||||
initMsg(pname);
|
||||
u_wmsg("cantOpenFromCodeset",str.getBuffer(),
|
||||
u_wmsg_errorName(err));
|
||||
|
@ -414,7 +409,7 @@ static UBool convertFile(const char *pname,
|
|||
convto = ucnv_open(tocpage, &err);
|
||||
if (U_FAILURE(err))
|
||||
{
|
||||
UnicodeString str(tocpage,"");
|
||||
UnicodeString str(tocpage, strlen(tocpage) + 1);
|
||||
initMsg(pname);
|
||||
u_wmsg("cantOpenToCodeset",str.getBuffer(),
|
||||
u_wmsg_errorName(err));
|
||||
|
@ -427,6 +422,7 @@ static UBool convertFile(const char *pname,
|
|||
u_wmsg("cantSetCallback", u_wmsg_errorName(err));
|
||||
goto error_exit;
|
||||
}
|
||||
ucnv_setFallback(convto, fallback);
|
||||
|
||||
// To ensure that the buffer always is of enough size, we
|
||||
// must take the worst case scenario, that is the character in the codepage
|
||||
|
@ -435,6 +431,9 @@ static UBool convertFile(const char *pname,
|
|||
totbuffsize = buffsize * ucnv_getMaxCharSize(convto);
|
||||
buff = new char[totbuffsize];
|
||||
unibuff = new UChar[buffsize];
|
||||
|
||||
fromoffsets = new int32_t[buffsize];
|
||||
tooffsets = new int32_t[totbuffsize];
|
||||
|
||||
// OK, we can convert now.
|
||||
|
||||
|
@ -443,7 +442,8 @@ static UBool convertFile(const char *pname,
|
|||
rd = fread(buff, 1, readsize, infile);
|
||||
if (ferror(infile) != 0)
|
||||
{
|
||||
UnicodeString str(strerror(errno), "");
|
||||
UnicodeString str(strerror(errno));
|
||||
str.append((UChar32) 0);
|
||||
initMsg(pname);
|
||||
u_wmsg("cantRead",str.getBuffer());
|
||||
goto error_exit;
|
||||
|
@ -461,14 +461,17 @@ static UBool convertFile(const char *pname,
|
|||
uniiter = unibuff;
|
||||
cbuffiter = buff;
|
||||
flush = rd!=readsize;
|
||||
ucnv_toUnicode(convfrom, &uniiter, uniiter + buffsize, &cbuffiter, cbuffiter + rd, 0, flush, &err);
|
||||
ucnv_toUnicode(convfrom, &uniiter, uniiter + buffsize, &cbuffiter, cbuffiter + rd, fromoffsets, flush, &err);
|
||||
|
||||
foffset += uniiter - unibuff;
|
||||
foffset += cbuffiter - buff - 1;
|
||||
|
||||
if (U_FAILURE(err))
|
||||
{
|
||||
char pos[32];
|
||||
sprintf(pos, "%u", foffset);
|
||||
UnicodeString str(pos, strlen(pos) + 1);
|
||||
initMsg(pname);
|
||||
u_wmsg("problemCvtToU", u_wmsg_errorName(err));
|
||||
u_wmsg("problemCvtToU", str.getBuffer(), u_wmsg_errorName(err));
|
||||
goto error_exit;
|
||||
}
|
||||
|
||||
|
@ -476,8 +479,11 @@ static UBool convertFile(const char *pname,
|
|||
// of chars read.
|
||||
if (flush && cbuffiter!=(buff+rd))
|
||||
{
|
||||
char pos[32];
|
||||
sprintf(pos, "%u", foffset);
|
||||
UnicodeString str(pos, strlen(pos) + 1);
|
||||
initMsg(pname);
|
||||
u_wmsg("premEndInput");
|
||||
u_wmsg("premEndInput", str.getBuffer());
|
||||
goto error_exit;
|
||||
}
|
||||
|
||||
|
@ -499,12 +505,18 @@ static UBool convertFile(const char *pname,
|
|||
|
||||
}
|
||||
|
||||
ucnv_fromUnicode(convto, &buffiter, buffiter + totbuffsize, &cuniiter, cuniiter + (size_t) (uniiter - unibuff), 0, flush, &err);
|
||||
ucnv_fromUnicode(convto, &buffiter, buffiter + totbuffsize, &cuniiter, cuniiter + (size_t) (uniiter - unibuff), tooffsets, flush, &err);
|
||||
|
||||
if (U_FAILURE(err))
|
||||
{
|
||||
char pos[32];
|
||||
|
||||
uint32_t erroffset = dataOffset(fromoffsets, buffiter - buff, tooffsets);
|
||||
|
||||
sprintf(pos, "%u", foffset - (uniiter - unibuff) + erroffset);
|
||||
UnicodeString str(pos, strlen(pos) + 1);
|
||||
initMsg(pname);
|
||||
u_wmsg("problemCvtFromU", u_wmsg_errorName(err));
|
||||
u_wmsg("problemCvtFromU", str.getBuffer(), u_wmsg_errorName(err));
|
||||
goto error_exit;
|
||||
}
|
||||
|
||||
|
@ -512,8 +524,11 @@ static UBool convertFile(const char *pname,
|
|||
// of consumed characters.
|
||||
if (flush && cuniiter!=(unibuff+(size_t)(uniiter-unibuff)))
|
||||
{
|
||||
char pos[32];
|
||||
sprintf(pos, "%u", foffset);
|
||||
UnicodeString str(pos, strlen(pos) + 1);
|
||||
initMsg(pname);
|
||||
u_wmsg("premEnd");
|
||||
u_wmsg("premEnd", str.getBuffer());
|
||||
goto error_exit;
|
||||
}
|
||||
|
||||
|
@ -530,18 +545,24 @@ static UBool convertFile(const char *pname,
|
|||
} while (!flush); // Stop when we have flushed the converters (this means that it's the end of output)
|
||||
|
||||
goto normal_exit;
|
||||
|
||||
error_exit:
|
||||
ret = FALSE;
|
||||
|
||||
normal_exit:
|
||||
// Close the created converters
|
||||
|
||||
if (convfrom) ucnv_close(convfrom);
|
||||
if (convto) ucnv_close(convto);
|
||||
|
||||
if ( t ) delete t;
|
||||
|
||||
// Close the created converters
|
||||
if (buff) delete [] buff;
|
||||
if (unibuff) delete [] unibuff;
|
||||
|
||||
if (fromoffsets) delete [] fromoffsets;
|
||||
if (tooffsets) delete [] tooffsets;
|
||||
|
||||
if (infile != stdin) {
|
||||
fclose(infile);
|
||||
}
|
||||
|
@ -557,21 +578,21 @@ static void usage(const char *pname, int ecode)
|
|||
|
||||
initMsg(pname);
|
||||
msg = ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord", &msgLen, &err);
|
||||
UnicodeString upname(pname);
|
||||
UnicodeString mname(msg, msgLen);
|
||||
UnicodeString upname(pname, strlen(pname) + 1);
|
||||
UnicodeString mname(msg, msgLen + 1);
|
||||
|
||||
u_wmsg("usage", mname.getBuffer(), upname.getBuffer());
|
||||
if (!ecode) {
|
||||
putchar('\n');
|
||||
fputc('\n', stderr);
|
||||
u_wmsg("help");
|
||||
|
||||
/* Now dump callbacks and finish. */
|
||||
|
||||
int i, count = sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
|
||||
for (i = 0; i < count; ++i) {
|
||||
printf(" %s", transcode_callbacks[i].name);
|
||||
fprintf(stderr, " %s", transcode_callbacks[i].name);
|
||||
}
|
||||
putchar('\n');
|
||||
fputc('\n', stderr);
|
||||
}
|
||||
|
||||
exit(ecode);
|
||||
|
@ -587,6 +608,7 @@ int main(int argc, char** argv)
|
|||
const char* tocpage = 0;
|
||||
const char *translit = 0;
|
||||
const char* outfilestr = 0;
|
||||
int fallback = 0;
|
||||
|
||||
UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
|
||||
const void *fromuctxt = 0;
|
||||
|
@ -602,7 +624,7 @@ int main(int argc, char** argv)
|
|||
const char *printName = 0;
|
||||
int printTranslits = 0;
|
||||
|
||||
int silent = 0, verbose = 0;
|
||||
int verbose = 0;
|
||||
|
||||
// Prettify pname.
|
||||
for (pname = *argv + strlen(*argv) - 1; pname != *argv && *pname != U_FILE_SEP_CHAR; --pname);
|
||||
|
@ -636,6 +658,10 @@ int main(int argc, char** argv)
|
|||
translit = *iter;
|
||||
else
|
||||
usage(pname, 1);
|
||||
} else if (!strcmp("--fallback", *iter)) {
|
||||
fallback = 1;
|
||||
} else if (!strcmp("--no-fallback", *iter)) {
|
||||
fallback = 0;
|
||||
}
|
||||
else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter))
|
||||
{
|
||||
|
@ -743,7 +769,7 @@ int main(int argc, char** argv)
|
|||
}
|
||||
}
|
||||
else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
|
||||
silent = 1;
|
||||
verbose = 0;
|
||||
} else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
|
||||
verbose = 1;
|
||||
} else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
|
||||
|
@ -827,6 +853,10 @@ int main(int argc, char** argv)
|
|||
else if (strcmp("-x", *iter) == 0)
|
||||
{
|
||||
iter++;
|
||||
} else if (!strcmp("--fallback", *iter)) {
|
||||
;
|
||||
} else if (!strcmp("--no-fallback", *iter)) {
|
||||
;
|
||||
}
|
||||
else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter))
|
||||
{
|
||||
|
@ -875,14 +905,14 @@ int main(int argc, char** argv)
|
|||
++iter;
|
||||
} else {
|
||||
seenf = 1;
|
||||
if (!convertFile(pname, fromcpage, toucallback, touctxt, tocpage, fromucallback, fromuctxt, translit, *iter, outfile, verbose)) {
|
||||
if (!convertFile(pname, fromcpage, toucallback, touctxt, tocpage, fromucallback, fromuctxt, fallback, translit, *iter, outfile, verbose)) {
|
||||
goto error_exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!seenf) {
|
||||
if (!convertFile(pname, fromcpage, toucallback, touctxt, tocpage, fromucallback, fromuctxt, translit, 0, outfile, verbose)) {
|
||||
if (!convertFile(pname, fromcpage, toucallback, touctxt, tocpage, fromucallback, fromuctxt, fallback, translit, 0, outfile, verbose)) {
|
||||
goto error_exit;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue