diff --git a/icu4c/source/tools/makeconv/misc/canonucm.c b/icu4c/source/tools/makeconv/misc/canonucm.c index 5999192484e..a37f8ddf74a 100644 --- a/icu4c/source/tools/makeconv/misc/canonucm.c +++ b/icu4c/source/tools/makeconv/misc/canonucm.c @@ -24,148 +24,6 @@ * On Windows: cl canonucm.c */ -#include -#include -#include +#error File moved to charset/source/ucmtools/ on 2002-nov-06 -typedef struct Mapping { - unsigned long u, b, f; -} Mapping; - -static Mapping -mappings[200000]; - -/* lexically compare Mappings for sorting */ -static int -compareMappings(const void *left, const void *right) { - const Mapping *l=(const Mapping *)left, *r=(const Mapping *)right; - long result; - - /* shift right 16 with sign-extend to take care of int possibly being 16 bits wide */ - result=(long)(l->u-r->u); - if(result!=0) { - return (int)(result>>16)|1; - } - result=(long)(l->b-r->b); - if(result!=0) { - return (int)(result>>16)|1; - } - return (int)(l->f-r->f); -} - -extern int -main(int argc, const char *argv[]) { - char line[200]; - char *s, *end; - unsigned long b, i, mappingsTop=0; - - /* parse the input file from stdin */ - /* read and copy header */ - do { - if(gets(line)==NULL) { - fprintf(stderr, "error: no mapping section"); - return 1; - } - puts(line); - } while(0!=strcmp(line, "CHARMAP")); - - /* copy empty and comment lines before the first mapping */ - for(;;) { - if(gets(line)==NULL) { - fprintf(stderr, "error: no mappings"); - return 1; - } - if(line[0]!=0 && line[0]!='#') { - break; - } - puts(line); - } - - /* process the charmap section, start with the line read above */ - for(;;) { - /* ignore empty and comment lines */ - if(line[0]!=0 && line[0]!='#') { - if(0!=strcmp(line, "END CHARMAP")) { - if(mappingsTop==sizeof(mappings)/sizeof(mappings[0])) { - fprintf(stderr, "too many mappings\n"); - return 1; - } - /* parse mapping */ - if(line[0]!='<' || line[1]!='U') { - fprintf(stderr, "parse error (does not start with \"0x10ffff || *end!='>') { - fprintf(stderr, "parse error (Unicode code point) in mapping line \"%s\"\n", line); - return 1; - } - /* skip white space */ - s=end+1; - while(*s==' ' || *s=='\t') { - ++s; - } - /* parse codepage bytes */ - b=0; - for(;;) { - if(*s!='\\') { - break; - } - if(s[1]!='x') { - fprintf(stderr, "parse error (no 'x' in \"\\xXX\") in mapping line \"%s\"\n", line); - return 1; - } - s+=2; - b=(b<<8)|strtoul(s, &end, 16); - if(end!=s+2) { - fprintf(stderr, "parse error (codepage byte) in mapping line \"%s\"\n", line); - return 1; - } - s+=2; - } - mappings[mappingsTop].b=b; - /* skip everything until the fallback indicator */ - while(*s!='|') { - if(*s==0) { - fprintf(stderr, "parse error (missing '|' fallback indicator) in mapping line \"%s\"\n", line); - return 1; - } - ++s; - } - /* parse fallback indicator */ - i=s[1]-'0'; - if(i>3) { - fprintf(stderr, "parse error (fallback indicator not 0..3) in mapping line \"%s\"\n", line); - return 1; - } - mappings[mappingsTop++].f=i; - } else { - /* sort and write all mappings */ - if(mappingsTop>0) { - qsort(mappings, mappingsTop, sizeof(Mapping), compareMappings); - for(i=0; i \\x%02lX |%lu\n", mappings[i].u, b, mappings[i].f); - } else if(b<=0xffff) { - printf(" \\x%02lX\\x%02lX |%lu\n", mappings[i].u, b>>8, b&0xff, mappings[i].f); - } else if(b<=0xffffff) { - printf(" \\x%02lX\\x%02lX\\x%02lX |%lu\n", mappings[i].u, b>>16, (b>>8)&0xff, b&0xff, mappings[i].f); - } else { - printf(" \\x%02lX\\x%02lX\\x%02lX\\x%02lX |%lu\n", mappings[i].u, b>>24, (b>>16)&0xff, (b>>8)&0xff, b&0xff, mappings[i].f); - } - } - } - /* output "END CHARMAP" */ - puts(line); - return 0; - } - } - /* read the next line */ - if(gets(line)==NULL) { - fprintf(stderr, "incomplete charmap section\n"); - return 1; - } - } -} +/* see http://oss.software.ibm.com/cvs/icu/charset/source/ucmtools/ */ diff --git a/icu4c/source/tools/makeconv/misc/rptp2ucm.c b/icu4c/source/tools/makeconv/misc/rptp2ucm.c index da0dc0c5f57..7ec83209200 100644 --- a/icu4c/source/tools/makeconv/misc/rptp2ucm.c +++ b/icu4c/source/tools/makeconv/misc/rptp2ucm.c @@ -26,921 +26,6 @@ * On Windows: cl rptp2ucm.c */ -#include -#include -#include -#include +#error File moved to charset/source/ucmtools/ on 2002-nov-06 -typedef struct UCMSubchar { - const char *name; - unsigned long subchar, subchar1; -} UCMSubchar; - -static const UCMSubchar -knownSubchars[]={ - "274_P100", 0x3f, 0, - "850_P100", 0x7f, 0, - "913_P100", 0x1a, 0, - "1047_P100", 0x3f, 0 -}; - -typedef struct CCSIDStateTable { - unsigned int ccsid; - const char *table; -} CCSIDStateTable; - -/*Year when the ucm files were produced using this tool*/ -#define YEAR "2002" -/**/ -#define japanesePCDBCSStates " 0-ff:2, 81-9f:1, a0-fc:1\n"\ - " 40-7e, 80-fc\n"\ - "\n" - -static const CCSIDStateTable -knownStateTables[]={ - - 301, " 0-ff:2, 81-9f:1, e0-fc:1\n" - " 40-7e, 80-fc\n" - "\n", - 367, " 0-7f\n", - - 927, japanesePCDBCSStates, - - 926, japanesePCDBCSStates, - - 928, japanesePCDBCSStates, - - 932, " 0-7f,80,81-9f:1,a0-df,fd-ff, e0-fc:1\n" - " 40-7e, 80-fc\n", - - - 941, japanesePCDBCSStates, - - 942, " 0-80, 81-9f:1, a0-df, e0-fc:1, fd-ff\n" - " 40-7e, 80-fc\n", - - 943, " 0-7f, 81-9f:1, a0-df, e0-fc:1\n" - " 40-7e, 80-fc\n", - - 944, " 0-80, 81-bf:1, c0-ff\n" - " 40-7e, 80-fe\n", - - 946, " 0-80, 81-fb:1,fc:2,fd-ff\n" - " 40-7e, 80-fe\n" - " 80-fe.u,fc", - - 947, " 0-7f, 80-fe:1\n" - " 40-7e, 80-fe\n", - - 948, " 0-80, 81-fb:1,fc:2,fd-fe\n" - " 40-7e, 80-fe\n" - " 80-fe.u,fc\n", - - 949, " 0-84, 8f-fe:1\n" - " 40-7e, 80-fe\n", - - 950, " 0-7f, 81-fe:1\n" - " 40-7e, 81-fe\n", - - 954, " 0-8d, 8e:2, 8f:3, 90-9f, a1-fe:1\n" - " a1-fe\n" - " a1-e4\n" - " a1-fe:1, a1:4\n" - " a1-fe.u\n", - - 955, " 0-20:2, 21-7e:1, 7f-ff:2\n" - " 21-7e\n" - "\n", - - 963, " 0-20:2, 21-7e:1, 7f-ff:2\n" - " 21-7e\n" - "\n", - - 964, " 0-8d, 8e:2, 90-9f, a1-fe:1, aa-c1:5, c3:5, fe:5\n" - " a1-fe\n" - " a1-b0:3, a1:4, a2:8, a3-ab:4, ac:7, ad:6, ae-b0:4\n" - " a1-fe:1\n" - " a1-fe:5\n" - " a1-fe.u\n" - " a1-a4:1, a5-fe:5\n" - " a1-e2:1, e3-fe:5\n" - " a1-f2:1, f3-fe:5\n", - - 970, " 0-9f, a1-fe:1\n" - " a1-fe\n", - - 1363, " 0-7f, 81-fe:1\n" - " 40-7e, 80-fe\n", - 1350, " 0-8d, 8e:2, 8f:3, 90-9f, a1-fe:1\n" - " a1-fe\n" - " a1-e4\n" - " a1-fe:1, a1:4, a3-a5:4, a8:4, ac-af:4, ee-f2:4\n" - " a1-fe.u\n", - - 1351, " 0-ff:2, 81-9f:1, e0-fc:1\n" - " 40-7e, 80-fc\n" - "\n", - - 1370, " 0-80, 81-fe:1\n" - " 40-7e, 81-fe\n", - - 1381, " 0-84, 8c-fe:1\n" - " a1-fe\n", - - 1383, " 0-9f, a1-fe:1\n" - " a1-fe\n", - - 1385, " 0-ff:2,81-fe:1\n" - " 40-7e, 80-fe\n" - "\n", - - 1386, " 0-80, 81-fe:1\n" /* Was 0-7f, 81-fe:1 */ - " 40-7e, 80-fe\n", - - 5039, " 0-80, 81-9f:1, a0-df, e0-fc:1, fd-ff\n" - " 40-7e, 80-fc\n", - - 5050, " 0-8d, 8e:2, 8f:3, 90-9f, a1-fe:1\n" - " a1-fe\n" - " a1-e4\n" - " a1-fe:1, a1:4, a3-af:4, b6:4, d6:4, da-db:4, ed-f2:4\n" - " a1-fe.u\n", - 5067, " 0-ff:2, 21-7e:1\n" - " 21-7e\n" - "\n", - - 5478, " 0-ff:2, 21-7e:1\n" - " 21-7e\n" - "\n", - - 21427, " 0-80:2, 81-fe:1, ff:2\n" - " 40-7e, 80-fe\n" - "\n", - 25546, " 0-7f, e:1.s, f:0.s\n" - " initial, 0-20:3, e:1.s, f:0.s, 21-7e:2, 7f-ff:3\n" - " 0-20:1.i, 21-7e:1., 7f-ff:1.i\n" - " 0-ff:1.i\n", - - 33722, " 0-8d, 8e:2, 8f:3, 90-9f, a1-fe:1\n" - " a1-fe\n" - " a1-e4\n" - " a1-fe:1, a1:4, a3-af:4, b6:4, d6:4, da-db:4, ed-f2:4\n" - " a1-fe.u\n" - - -}; - -typedef struct Mapping { - /* - * u bits: - * 31..24 fallback indicator - * 0 roundtrip - * 1 Unicode->codepage - * 3 codepage->Unicode - * 23.. 0 Unicode code point - * - * b: codepage bytes with leading zeroes - */ - unsigned long u, b; -} Mapping; - -#define MAX_MAPPINGS_COUNT 200000 - -static Mapping -fromUMappings[MAX_MAPPINGS_COUNT], toUMappings[MAX_MAPPINGS_COUNT]; - -static long fromUMappingsTop, toUMappingsTop; - -static unsigned long subchar, subchar1; -static unsigned int ccsid; - -enum { - ASCII, - EBCDIC, - UNKNOWN -}; - -static char -minCharLength, -maxCharLength, -charsetFamily, -usesPUA, -variantLF, -variantASCII, -variantControls, -variantSUB, -is7Bit; - -static void -init() { - fromUMappingsTop=toUMappingsTop=0; - - subchar=subchar1=0; - ccsid=0; - - minCharLength=4; - maxCharLength=0; - charsetFamily=UNKNOWN; - usesPUA=0; - variantLF=0; - variantASCII=0; - variantControls=0; - variantSUB=0; - is7Bit=0; -} - -/* lexically compare Mappings for sorting */ -static int -compareMappings(const void *left, const void *right) { - const Mapping *l=(const Mapping *)left, *r=(const Mapping *)right; - long result; - - /* the code points use fewer than 32 bits, just cast them to signed values and subtract */ - result=(long)(l->u&0xffffff)-(long)(r->u&0xffffff); - if(result!=0) { - /* shift right 16 with sign-extend to take care of int possibly being 16 bits wide */ - return (int)(result>>16)|1; - } - - /* the b fields may use all 32 bits as unsigned long, so result=(long)(l->b-r->b) would not work (try l->b=0x80000000 and r->b=1) */ - if(l->bb) { - return -1; - } else if(l->b>r->b) { - return 1; - } - - return (int)(l->u>>24)-(int)(r->u>>24); -} - -static const char * -skipWhitespace(const char *s) { - while(*s==' ' || *s=='\t') { - ++s; - } - return s; -} - -static long -parseMappings(FILE *f, Mapping *mappings) { - char line[200]; - Mapping *oldMappings; - char *s, *end; - long mappingsTop=0; - long lineNum=0; - - oldMappings=mappings; - while(fgets(line, sizeof(line), f)!=NULL) { - s=(char *)skipWhitespace(line); - lineNum++; - - /* skip empty lines */ - if(*s==0 || *s=='\n' || *s=='\r') { - continue; - } - - /* explicit end of table */ - if(memcmp(s, "END CHARMAP", 11)==0) { - break; - } - - /* comment lines, parse substitution characters, otherwise skip them */ - if(*s=='#' || *s=='*') { - /* get subchar1 */ - s=strstr(line, "for U+00xx"); - if(s!=NULL) { - s=strstr(line, "x'"); - if(s!=NULL) { - s+=2; - subchar1=strtoul(s, &end, 16); - if(end!=s+2 || *end!='\'') { - fprintf(stderr, "error parsing subchar1 from \"%s\"\n", line); - exit(2); - } - continue; - } else { - fprintf(stderr, "error finding subchar1 on \"%s\"\n", line); - exit(2); - } - } - - /* get subchar */ - s=strstr(line, "for U+xxxx"); - if(s!=NULL) { - s=strstr(line, "x'"); - if(s!=NULL) { - s+=2; - subchar=strtoul(s, &end, 16); - if(endb=strtoul(s, &end, 16); - if(s==end || (*end!=' ' && *end!='\t')) { - if((s+1)==end && *end=='-' && (mappings->b<=3)) { - /* this is a special EUC format where the code set number prepends the bytes */ - unsigned long prefix; - - switch(mappings->b) { - case 0: - prefix=0; - break; - case 1: - prefix=0; - break; - case 2: - prefix=0x8e; - break; - case 3: - prefix=0x8f; - break; - default: - /* never occurs because of above check */ - break; - } - - s+=2; - mappings->b=strtoul(s, &end, 16); - if(s==end || ((end-s)&1) || (*end!=' ' && *end!='\t')) { - fprintf(stderr, "error parsing EUC codepage bytes on \"%s\"\n", line); - exit(2); - } - mappings->b|=prefix<<(4*(end-s)); - } else { - fprintf(stderr, "%d: error parsing codepage bytes on \"%s\"\n", lineNum, line); - exit(2); - } - } - - s=(char *)skipWhitespace(end); - mappings->u=strtoul(s, &end, 16); - if(s==end || (*end!=' ' && *end!='\t' && *end!='\n' && *end!='\r' && *end!=0)) { - if(strncmp(s, "????", 4)==0 || strstr(s, "UNASSIGNED")!=NULL) { - /* this is a non-entry, do not add it to the mapping table */ - continue; - } - fprintf(stderr, "error parsing Unicode code point on \"%s\"\n", line); - exit(2); - } - - ++mappings; - if(++mappingsTop>=MAX_MAPPINGS_COUNT) { - fprintf(stderr, "error: too many mappings at \"%s\"\n", line); - exit(2); - } - } - - /* sort the mappings */ - qsort(oldMappings, mappingsTop, sizeof(Mapping), compareMappings); - - return mappingsTop; -} - -/* merge the mappings into fromUMappings and add fallback indicator values to Mapping.u bits 31..24 */ -static void -mergeMappings() { - long fromUIndex, toUIndex, newFromUMappingsTop=fromUMappingsTop; - int cmp; - - fromUIndex=toUIndex=0; - while(fromUIndexcodepage - */ - if(fromUMappings[fromUIndex].b!=subchar && fromUMappings[fromUIndex].b!=subchar1) { - fromUMappings[fromUIndex++].u|=0x1000000; - } else { - fromUMappings[fromUIndex++].u|=0x2000000; - } - } else { - /* - * the toU mapping does not have a fromU counterpart: - * (reverse) fallback codepage->Unicode, copy it to the fromU table - */ - fromUMappings[newFromUMappingsTop].u=toUMappings[toUIndex].u|=0x3000000; - fromUMappings[newFromUMappingsTop++].b=toUMappings[toUIndex++].b; - } - } - - /* either one or both tables are exhausted */ - while(fromUIndex>24; - u=fromUMappings[i].u&0xffffff; - b=fromUMappings[i].b; - - oredBytes|=b; - - /* character length? */ - if(b<=0xff) { - length=1; - } else if(b<=0xffff) { - length=2; - if(bmaxTwoByte) { - maxTwoByte=b; - } - } else if(b<=0xffffff) { - length=3; - } else { - length=4; - } - if(lengthmaxCharLength) { - maxCharLength=length; - } - - /* PUA used? */ - if((unsigned long)(u-0xe000)<0x1900 || (unsigned long)(u-0xf0000)<0x20000) { - usesPUA=1; - } - - /* only consider roundtrip mappings for the rest */ - if(f!=0) { - continue; - } - - /* ASCII or EBCDIC? */ - if(u==0x41) { - if(b==0x41) { - charsetFamily=ASCII; - } else if(b==0xc1) { - charsetFamily=EBCDIC; - } - } else if(u==0xa) { - if(b==0xa) { - charsetFamily=ASCII; - } else if(b==0x25) { - charsetFamily=EBCDIC; - variantLF=0; - } else if(b==0x15) { - charsetFamily=EBCDIC; - variantLF=1; - } - } - - /* US-ASCII? */ - if((unsigned long)(u-0x21)<94) { - if(u==b) { - ++countASCII; - } else { - variantASCII=1; - } - } else if(u<0x20 || u==0x7f) { - /* non-ISO C0 controls? */ - if(u!=b) { - /* IBM PC rotation of SUB and other controls: 0x1a->0x7f->0x1c->0x1a */ - if(u==0x1a && b==0x7f || u==0x1c && b==0x1a || u==0x7f && b==0x1c) { - charsetFamily=ASCII; - variantSUB=1; - } else { - variantControls=1; - } - } - } - } - - is7Bit= oredBytes<=0x7f; - - if(charsetFamily==UNKNOWN) { - if(minCharLength==2 && maxCharLength==2) { - /* guess the charset family for DBCS according to typical byte distributions */ - if( ((0x2020<=minTwoByte || minTwoByte<=0x217e) && maxTwoByte<=0x7e7e) || - ((0xa0a0<=minTwoByte || minTwoByte<=0xa1fe) && maxTwoByte<=0xfefe) || - ((0x8140<=minTwoByte || minTwoByte<=0x81fe) && maxTwoByte<=0xfefe) - ) { - charsetFamily=ASCII; - } else if((minTwoByte==0x4040 || (0x4141<=minTwoByte && minTwoByte<=0x41fe)) && maxTwoByte<=0xfefe) { - charsetFamily=EBCDIC; - } - } - if(charsetFamily==UNKNOWN) { - fprintf(stderr, "error: unable to determine the charset family\n"); - exit(3); - } - } - - /* reset variant indicators if they do not apply */ - if(charsetFamily!=ASCII || minCharLength!=1) { - variantASCII=variantSUB=variantControls=0; - } else if(countASCII!=94) { - /* if there are not 94 mappings for ASCII graphic characters, then set variantASCII */ - variantASCII=1; - } - - if(charsetFamily!=EBCDIC || minCharLength!=1) { - variantLF=0; - } -} - -static int -getSubchar(const char *name) { - int i; - - for(i=0; i", 9)==0) { - s=(char *)skipWhitespace(s+9); - p=&subchar; - } else if(memcmp(s, "", 10)==0) { - s=(char *)skipWhitespace(s+10); - p=&subchar1; - } else if(memcmp(s, "#", 11)==0) { - s=(char *)skipWhitespace(s+11); - p=&subchar1; - } else { - continue; - } - - /* get the value and store it in *p */ - bytes=0; - while(s[0]=='\\' && s[1]=='x') { - value=strtoul(s+2, &end, 16); - s+=4; - if(end!=s) { - fprintf(stderr, "error parsing UPMAP subchar from \"%s\"\n", line); - exit(2); - } - bytes=(bytes<<8)|value; - } - *p=bytes; - } -} - -static const char * -getStateTable() { - int i; - - for(i=0; i>8, b&0xff); - } else if(b<=0xffffff) { - sprintf(s, "\\x%02lX\\x%02lX\\x%02lX", b>>16, (b>>8)&0xff, b&0xff); - } else { - sprintf(s, "\\x%02lX\\x%02lX\\x%02lX\\x%02lX", b>>24, (b>>16)&0xff, (b>>8)&0xff, b&0xff); - } -} - -static void -writeUCM(FILE *f, const char *ucmname, const char *rpname, const char *tpname) { - char buffer[100]; - const char *s; - long i; - - /* write the header */ - fprintf(f, - "# *******************************************************************************\n" - "# *\n" - "# * Copyright (C) 1995-2001, International Business Machines\n" - "# * Corporation and others. All Rights Reserved.\n" - "# *\n" - "# *******************************************************************************\n" - "#\n" - "# File created by rptp2ucm (compiled on %s)\n" - "# from source files %s and %s\n" - "#\n", __DATE__, rpname, tpname); - - /* ucmname does not have a path or .ucm */ - fprintf(f, " \"%s\"\n", ucmname); - - fputs(" \"AXXXX\"\n", f); - fprintf(f, " %u\n", maxCharLength); - fprintf(f, " %u\n", minCharLength); - - if(maxCharLength==1) { - fputs(" \"SBCS\"\n", f); - } else if(maxCharLength==2) { - if(minCharLength==1) { - if(charsetFamily==EBCDIC) { - fputs(" \"EBCDIC_STATEFUL\"\n", f); - } else { - fputs(" \"MBCS\"\n", f); - } - } else if(minCharLength==2) { - fputs(" \"DBCS\"\n", f); - } else { - fputs(" \"MBCS\"\n", f); - } - } else { - fputs(" \"MBCS\"\n", f); - } - - if(subchar!=0) { - writeBytes(buffer, subchar); - fprintf(f, " %s\n", buffer); - } - - if(subchar1!=0) { - fprintf(f, " \\x%02X\n", subchar1); - } - - /* write charset family */ - if(charsetFamily==ASCII) { - fputs(" \"ASCII\"\n", f); - } else { - fputs(" \"EBCDIC\"\n", f); - } - - /* write alias describing the codepage */ - sprintf(buffer, " \"ibm-%u", ccsid); - if(!usesPUA && !variantLF && !variantASCII && !variantControls && !variantSUB) { - strcat(buffer, "_STD\"\n\n"); - } else { - /* add variant indicators in alphabetic order */ - if(variantASCII) { - strcat(buffer, "_VASCII"); - } - if(variantControls) { - strcat(buffer, "_VGCTRL"); - } - if(variantLF) { - strcat(buffer, "_VLF"); - } - if(variantSUB) { - strcat(buffer, "_VSUB"); - } - if(usesPUA) { - strcat(buffer, "_VPUA"); - } - strcat(buffer, "\"\n\n"); - } - fputs(buffer, f); - - /* write the state table - */ - s=getStateTable(); - if(s!=NULL) { - fputs(s, f); - fputs("\n", f); - } else if(is7Bit) { - fputs(" 0-7f\n\n", f); - } - - /* write the mappings */ - fputs("CHARMAP\n", f); - for(i=0; i %s |%lu\n", fromUMappings[i].u&0xffffff, buffer, fromUMappings[i].u>>24); - } - fputs("END CHARMAP\n", f); -} - -static void -processTable(const char *arg) { - char filename[1024], tpname[32]; - const char *basename, *s; - FILE *rpmap, *tpmap, *ucm; - unsigned long value, unicode; - int length; - - init(); - - /* separate path and basename */ - basename=strrchr(arg, '/'); - if(basename==NULL) { - basename=strrchr(arg, '\\'); - if(basename==NULL) { - basename=arg; - } else { - ++basename; - } - } else { - ++basename; - s=strrchr(arg, '\\'); - if(s!=NULL && ++s>basename) { - basename=s; - } - } - - /* is this a standard RPMAP filename? */ - value=strtoul(basename, (char **)&s, 16); - if( strlen(basename)!=17 || - (memcmp(basename+9, "RPMAP", 5)!=0 && memcmp(basename+9, "rpmap", 5)!=0 && - memcmp(basename+9, "RXMAP", 5)!=0 && memcmp(basename+9, "rxmap", 5)!=0) || - (s-basename)!=8 || - *s!='.' - ) { - fprintf(stderr, "error: \"%s\" is not a standard RPMAP filename\n", basename); - exit(1); - } - - /* is this really a Unicode conversion table? - get the CCSID */ - unicode=value&0xffff; - if(unicode==13488 || unicode==17584) { - ccsid=(unsigned int)(value>>16); - } else { - unicode=value>>16; - if(unicode==13488 || unicode==17584) { - ccsid=(unsigned int)(value&0xffff); - } else { - fprintf(stderr, "error: \"%s\" is not a Unicode conversion table\n", basename); - exit(1); - } - } - - /* try to open the RPMAP file */ - rpmap=fopen(arg, "r"); - if(rpmap==NULL) { - fprintf(stderr, "error: unable to open \"%s\"\n", arg); - exit(1); - } - - /* try to open the TPMAP file */ - strcpy(filename, arg); - length=strlen(filename); - - /* guess the TPMAP filename; note that above we have checked the format of the basename */ - /* replace the R in RPMAP by T, keep upper- or lowercase */ - if(filename[length-8]=='R') { - filename[length-8]='T'; - } else { - filename[length-8]='t'; - } - - /* reverse the CCSIDs */ - memcpy(filename+length-17, basename+4, 4); - memcpy(filename+length-13, basename, 4); - - /* first, keep the same suffix */ - tpmap=fopen(filename, "r"); - if(tpmap==NULL) { - /* next, try reducing the second to last digit by 1 */ - --filename[length-2]; - tpmap=fopen(filename, "r"); - if(tpmap==NULL) { - /* there is no TPMAP */ - fprintf(stderr, "error: unable to find the TPMAP file for \"%s\"\n", arg); - exit(1); - } - } - puts(filename); - strcpy(tpname, filename+length-17); - - /* parse both files */ - fromUMappingsTop=parseMappings(rpmap, fromUMappings); - toUMappingsTop=parseMappings(tpmap, toUMappings); - fclose(tpmap); - fclose(rpmap); - - /* if there is no subchar, then try to get it from the corresponding UPMAP */ - if(subchar==0) { - FILE *f; - - /* restore the RPMAP filename and just replace the R by U */ - strcpy(filename+length-17, basename); - if(filename[length-8]=='R') { - filename[length-8]='U'; - } else { - filename[length-8]='u'; - } - - f=fopen(filename, "r"); - if(f==NULL) { - /* try reversing the CCSIDs */ - memcpy(filename+length-17, basename+4, 4); - memcpy(filename+length-13, basename, 4); - f=fopen(filename, "r"); - } - if(f!=NULL) { - getSubcharFromUPMAP(f); - fclose(f); - } - } - - /* generate the .ucm filename - necessary before getSubchar() */ - length=sprintf(filename, "ibm-%u_", ccsid); - - /* uppercase and append the suffix */ - filename[length++]=toupper(basename[10]); /* P or X */ - filename[length++]=toupper(basename[14]); /* last 3 suffix characters */ - filename[length++]=toupper(basename[15]); - filename[length++]=toupper(basename[16]); - filename[length++]='-'; - filename[length]=0; - /*concatenate year*/ - strcat(filename,YEAR); - /* find the subchar if still necessary - necessary before merging for correct |2 */ - if(subchar==0 && !getSubchar(filename+4)) { - fprintf(stderr, "warning: missing subchar in \"%s\" (CCSID=0x%04X)\n", filename, ccsid); - } - - /* merge the mappings */ - mergeMappings(); - - /* analyze the conversion table */ - analyzeTable(); - - /* open the .ucm file */ - strcat(filename, ".ucm"); - ucm=fopen(filename, "w"); - if(ucm==NULL) { - fprintf(stderr, "error: unable to open output file \"%s\"\n", filename); - exit(4); - } - - /* remove the .ucm from the filename for the following processing */ - filename[strlen(filename)-4]=0; - - /* write the .ucm file */ - writeUCM(ucm, filename, basename, tpname); - fclose(ucm); -} - -extern int -main(int argc, const char *argv[]) { - if(argc<2) { - fprintf(stderr, - "usage: %s { rpmap/rxmap-filename }+\n", - argv[0]); - exit(1); - } - - while(--argc>0) { - puts(*++argv); - processTable(*argv); - } - - return 0; -} +/* see http://oss.software.ibm.com/cvs/icu/charset/source/ucmtools/ */ diff --git a/icu4c/source/tools/makeconv/misc/ucmmerge.c b/icu4c/source/tools/makeconv/misc/ucmmerge.c index c0cffd3ca5b..a8035bbaaca 100644 --- a/icu4c/source/tools/makeconv/misc/ucmmerge.c +++ b/icu4c/source/tools/makeconv/misc/ucmmerge.c @@ -21,87 +21,6 @@ * On Windows: cl ucmmerge.c */ -#include -#include -#include +#error File moved to charset/source/ucmtools/ on 2002-nov-06 -extern int -main(int argc, const char *argv[]) { - FILE *old, *update; - char line[200]; - char *s, *end; - unsigned long b, i, mappingsTop=0; - - /* open the two input files */ - if(argc<3) { - fprintf(stderr, "usage: %s old-ucm-filename new-ucm-filename\n", argv[0]); - return 2; - } - old=fopen(argv[1], "r"); - if(old==NULL) { - fprintf(stderr, "error: unable to open %s\n", argv[1]); - return 2; - } - update=fopen(argv[2], "r"); - if(update==NULL) { - fprintf(stderr, "error: unable to open %s\n", argv[2]); - return 2; - } - - /* copy old until before the "created on" line */ - for(;;) { - if(fgets(line, sizeof(line), old)==NULL) { - return 1; - } - if(0==strncmp(line, "# File created on ", 18)) { - break; - } - fputs(line, stdout); - } - - /* skip update until before the "created on" line */ - for(;;) { - if(fgets(line, sizeof(line), update)==NULL) { - return 1; - } - if(0==strncmp(line, "# File created on ", 18)) { - break; - } - } - - /* copy the "created on" line from update */ - fputs(line, stdout); - - /* copy the rest of the old header including the "CHARMAP" line */ - for(;;) { - if(fgets(line, sizeof(line), old)==NULL) { - return 1; - } - fputs(line, stdout); - if(0==strncmp(line, "CHARMAP", 7)) { - break; - } - } - - /* skip the rest of the update header */ - for(;;) { - if(fgets(line, sizeof(line), update)==NULL) { - return 1; - } - if(0==strncmp(line, "CHARMAP", 7)) { - break; - } - } - - /* copy the rest of the update file */ - for(;;) { - if(fgets(line, sizeof(line), update)==NULL) { - break; - } - fputs(line, stdout); - } - - fclose(old); - fclose(update); - return 0; -} +/* see http://oss.software.ibm.com/cvs/icu/charset/source/ucmtools/ */ diff --git a/icu4c/source/tools/makeconv/misc/ucmstrip.c b/icu4c/source/tools/makeconv/misc/ucmstrip.c index fccade2165d..751af780035 100644 --- a/icu4c/source/tools/makeconv/misc/ucmstrip.c +++ b/icu4c/source/tools/makeconv/misc/ucmstrip.c @@ -23,34 +23,6 @@ * On Windows: cl ucmstrip.c */ -#include -#include -#include +#error File moved to charset/source/ucmtools/ on 2002-nov-06 -extern int -main(int argc, const char *argv[]) { - char line[200]; - char *s, *end; - unsigned long b, i, mappingsTop=0; - - /* parse the input file from stdin */ - /* skip lines until and including the one with "created on" */ - for(;;) { - if(gets(line)==NULL) { - return 0; - } - if(0==strncmp(line, "# File created on ", 18)) { - break; - } - } - - /* write all lines except with and and */ - for(;;) { - if(gets(line)==NULL) { - return 0; - } - if(0!=strncmp(line, "", 13) && 0!=strncmp(line, "", 11) && 0!=strncmp(line, "", 14)) { - puts(line); - } - } -} +/* see http://oss.software.ibm.com/cvs/icu/charset/source/ucmtools/ */