From baffa188a76c682748eb8f39d48de15990d72214 Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Mon, 12 Jun 2000 20:29:04 +0000 Subject: [PATCH] ICU-434 new sample [convsamp] X-SVN-Rev: 1606 --- .gitattributes | 1 + icu4c/source/samples/ucnv/convsamp.cpp | 408 +++++++++++++++++++++++++ icu4c/source/samples/ucnv/data01.ut8 | 15 + 3 files changed, 424 insertions(+) create mode 100644 icu4c/source/samples/ucnv/convsamp.cpp create mode 100644 icu4c/source/samples/ucnv/data01.ut8 diff --git a/.gitattributes b/.gitattributes index 27d1d0b4a33..7184b471e4c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -75,6 +75,7 @@ icu4c/source/data/brkitr/wordBE.brk -text icu4c/source/data/brkitr/wordLE.brk -text icu4c/source/data/brkitr/word_thBE.brk -text icu4c/source/data/brkitr/word_thLE.brk -text +icu4c/source/samples/ucnv/data01.ut8 -text icu4c/source/test/testdata/en_US.uni -text icu4c/source/test/testdata/uni-text.txt -text icu4j/src/com/ibm/demo/rbbi/english.dict -text diff --git a/icu4c/source/samples/ucnv/convsamp.cpp b/icu4c/source/samples/ucnv/convsamp.cpp new file mode 100644 index 00000000000..7199c807319 --- /dev/null +++ b/icu4c/source/samples/ucnv/convsamp.cpp @@ -0,0 +1,408 @@ +/************************************************************************** +* +* Copyright (C) 2000, International Business Machines +* Corporation and others. All Rights Reserved. +* +*************************************************************************** +* file name: convsamp.c +* encoding: ASCII (7-bit) +* +* created on: 2000may30 +* created by: Steven R. Loomis +* +* Sample code for the ICU conversion routines. +* +* Note: Nothing special is needed to build this sample. Link with +* the icu UC and icu I18N libraries. +* +* I use 'assert' for error checking, you probably will want +* something more flexible. '***BEGIN SAMPLE***' and +* '***END SAMPLE***' mark pieces suitable for stand alone +* code snippets. +* +*/ + +#include +#include /* for isspace, etc. */ +#include + +#include "unicode/utypes.h" /* Basic ICU data types */ +#include "unicode/ucnv.h" /* C Converter API */ +#include "unicode/convert.h" /* C++ Converter API */ +#include "unicode/ustring.h" /* some more string fcns*/ + +/* Some utility functions */ + +static const UChar kNone[] = { 0x0000 }; + +/* Print a UChar if possible, in seven characters. */ +void prettyPrintUChar(UChar c) +{ + if( (c <= 0x007F) && + (isgraph(c)) ) { + printf(" '%c' ", (char)(0x00FF&c)); + } else if ( c > 0x007F ) { + char buf[100]; + UErrorCode status = U_ZERO_ERROR; + UTextOffset o; + + o = u_charName(c, U_UNICODE_CHAR_NAME, buf, 100, &status); + if(U_SUCCESS(status)) { + buf[6] = 0; + printf("%- 7s", buf); + } else { + printf("??????? "); + } + } else { + switch((char)(c & 0x007F)) { + case ' ': + printf(" ' ' "); + break; + case '\t': + printf(" \t "); + break; + case '\n': + printf(" \n "); + break; + default: + printf(" "); + break; + } + } +} + + +void printUChars(const char *name = "?", + const UChar *uch = kNone, + int32_t len = -1 ) +{ + int32_t i; + + if( (len == -1) && (uch) ) { + len = u_strlen(uch); + } + + printf("% 5s:", name); + for( i = 0; i koi8-r conversion\n"); + + + // **************************** START SAMPLE ******************* + // "Moscva!" in cyrillic letters, to be converted to the KOI8-R + // Russian code page. + UChar source[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432, + 0x0430, 0x0021, 0x0000 }; + char target[100]; + int32_t targetSize = sizeof(target); + UnicodeString myString(source); + UErrorCode status = U_ZERO_ERROR; + + // set up the converter + UnicodeConverterCPP conv("koi8-r", status); + assert(U_SUCCESS(status)); + + // convert to KOI8-R + conv.fromUnicodeString(target, targetSize, myString, status); + assert(U_SUCCESS(status)); + + // ***************************** END SAMPLE ******************** + + // Print it out + printUChars("src", source); + printf("\n"); + printBytes("targ", target, targetSize); + + return U_ZERO_ERROR; +} + + +/****************************************************** + Similar sample to the preceding one. + You must call ucnv_close to clean up the memory used by the + converter. + + 'len' returns the number of OUTPUT bytes resulting from the + conversion. In this case, it will be 9 even though there are + only 6 unicode characters going in. This is because the + letters 'cat' each only take up one byte, but the remaining + three UChars each take up 2 bytes in the output codepage. + + src: 0 1 2 3 4 5 + uni: \u0063 \u0061 \u0074 \u732B \uFF2F \uFF2B + ch: 'c' 'a' 't' CJK UN + + targ: 0 1 2 3 4 5 6 7 8 + uni: \x63 \x61 \x74 \x94 \x4C \x82 \x6E \x82 \x6A + ch: 'c' 'a' 't' [not readable here........] + + */ + +UErrorCode convsample_02() +{ + printf("\n\n==============================================\n" + "Sample 02: C: simple Unicode -> Shift_Jis conversion\n"); + + + // **************************** START SAMPLE ******************* + // "catOK" + UChar source[] = { 0x0063, 0x0061, 0x0074, 0x732B, 0xFF2F, 0xFF2B, + 0x0000 }; + char target[100]; + UErrorCode status = U_ZERO_ERROR; + UConverter *conv; + int32_t len; + + // set up the converter + conv = ucnv_open("shift_jis", &status); + assert(U_SUCCESS(status)); + + // convert to shift-jis + len = ucnv_fromUChars(conv, target, 100, source, -1, &status); + assert(U_SUCCESS(status)); + + // close the converter + ucnv_close(conv); + + // ***************************** END SAMPLE ******************** + + // Print it out + printUChars("src", source); + printf("\n"); + printBytes("targ", target, len); + + return U_ZERO_ERROR; +} + + +UErrorCode convsample_03() +{ + printf("\n\n==============================================\n" + "Sample 03: C: print out all converters\n"); + + int32_t count; + int32_t i; + + // **************************** START SAMPLE ******************* + count = ucnv_countAvailable(); + printf("Available converters: %d\n", count); + + for(i=0;i 0) ) + { + // Convert bytes to unicode + source = inBuf; + sourceLimit = inBuf + count; + + do + { + target = uBuf; + targetLimit = uBuf + uBufSize; + + ucnv_toUnicode(conv, &target, targetLimit, + &source, sourceLimit, NULL, + feof(f)?TRUE:FALSE, /* pass 'flush' when eof */ + /* is true (when no more data will come) */ + &status); + + if(status != U_INDEX_OUTOFBOUNDS_ERROR) + { + // simply ran out of space - we'll reset the target ptr the next + // time through the loop. + status = U_ZERO_ERROR; + } + else + { + // Check other errors here. + assert(U_SUCCESS(status)); + // Break out of the loop (by force) + } + + // Process the Unicode + // Todo: handle UTF-16/surrogates + + for(p = uBuf; p