mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-12 all public include files are now in unicode dir, all private icu_ functions renamed to uprv_
X-SVN-Rev: 472
This commit is contained in:
parent
4414ab71fa
commit
8e21f86812
132 changed files with 12021 additions and 11991 deletions
|
@ -18,4 +18,4 @@
|
|||
#endif
|
||||
|
||||
// provide an object for the implementations of the member functions of BiDi
|
||||
#include "bidi.h"
|
||||
#include "unicode/bidi.h"
|
||||
|
|
|
@ -1,255 +1,2 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: ubidi.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep15
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
#error Please include unicode/bidi.h instead
|
||||
|
||||
#ifndef BIDI_H
|
||||
#define BIDI_H
|
||||
|
||||
#include "utypes.h"
|
||||
#include "ubidi.h"
|
||||
|
||||
#ifndef XP_CPLUSPLUS
|
||||
# error This is a C++ header file.
|
||||
#endif
|
||||
|
||||
/**
|
||||
* BiDi is a C++ wrapper class for UBiDi.
|
||||
* You need one BiDi object in place of one UBiDi object.
|
||||
* For details on the API and implementation of the
|
||||
* Unicode BiDi algorithm, see ubidi.h.
|
||||
*
|
||||
* @see UBiDi
|
||||
*/
|
||||
class U_COMMON_API BiDi {
|
||||
public:
|
||||
/** @memo Default constructor, calls ubidi_open(). */
|
||||
BiDi();
|
||||
|
||||
/** @memo Constructor, calls ubidi_open(). */
|
||||
BiDi(UErrorCode &rErrorCode);
|
||||
|
||||
/** @memo Preallocating constructor, calls ubidi_openSized(). */
|
||||
BiDi(UTextOffset maxLength, UTextOffset maxRunCount, UErrorCode &rErrorCode);
|
||||
|
||||
/** @memo Destructor, calls ubidi_close(). */
|
||||
~BiDi();
|
||||
|
||||
/** @memo Set this object for one paragraph's text. */
|
||||
BiDi &
|
||||
setPara(const UChar *text, UTextOffset length,
|
||||
UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
|
||||
UErrorCode &rErrorCode);
|
||||
|
||||
|
||||
/** @memo Set this object for one line of the paragraph object's text. */
|
||||
BiDi &
|
||||
setLine(const BiDi &rParaBiDi,
|
||||
UTextOffset start, UTextOffset limit,
|
||||
UErrorCode &rErrorCode);
|
||||
|
||||
/** @memo Get the directionality of the text. */
|
||||
UBiDiDirection
|
||||
getDirection() const;
|
||||
|
||||
/** @memo Get the length of the text. */
|
||||
UTextOffset
|
||||
getLength() const;
|
||||
|
||||
/** @memo Get the paragraph level of the text. */
|
||||
UBiDiLevel
|
||||
getParaLevel() const;
|
||||
|
||||
/** @memo Get the level for one character. */
|
||||
UBiDiLevel
|
||||
getLevelAt(UTextOffset charIndex) const;
|
||||
|
||||
/** @memo Get an array of levels for each character. */
|
||||
const UBiDiLevel *
|
||||
getLevels(UErrorCode &rErrorCode);
|
||||
|
||||
/** @memo Get a logical run. */
|
||||
void
|
||||
getLogicalRun(UTextOffset logicalStart,
|
||||
UTextOffset &rLogicalLimit, UBiDiLevel &rLevel) const;
|
||||
|
||||
/** @memo Get the number of runs. */
|
||||
UTextOffset
|
||||
countRuns(UErrorCode &rErrorCode);
|
||||
|
||||
/**
|
||||
* @memo Get one run's logical start, length, and directionality,
|
||||
* which can be 0 for LTR or 1 for RTL.
|
||||
*/
|
||||
UBiDiDirection
|
||||
getVisualRun(UTextOffset runIndex, UTextOffset &rLogicalStart, UTextOffset &rLength);
|
||||
|
||||
/** @memo Get the visual position from a logical text position. */
|
||||
UTextOffset
|
||||
getVisualIndex(UTextOffset logicalIndex, UErrorCode &rErrorCode);
|
||||
|
||||
/** @memo Get the logical text position from a visual position. */
|
||||
UTextOffset
|
||||
getLogicalIndex(UTextOffset visualIndex, UErrorCode &rErrorCode);
|
||||
|
||||
/**
|
||||
* @memo Get a logical-to-visual index map (array) for the characters in the UBiDi
|
||||
* (paragraph or line) object.
|
||||
*/
|
||||
void
|
||||
getLogicalMap(UTextOffset *indexMap, UErrorCode &rErrorCode);
|
||||
|
||||
/**
|
||||
* @memo Get a visual-to-logical index map (array) for the characters in the UBiDi
|
||||
* (paragraph or line) object.
|
||||
*/
|
||||
void
|
||||
getVisualMap(UTextOffset *indexMap, UErrorCode &rErrorCode);
|
||||
|
||||
/** @memo Same as ubidi_reorderLogical(). */
|
||||
static void
|
||||
reorderLogical(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap);
|
||||
|
||||
/** @memo Same as ubidi_reorderVisual(). */
|
||||
static void
|
||||
reorderVisual(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap);
|
||||
|
||||
/** @memo Same as ubidi_invertMap(). */
|
||||
static void
|
||||
invertMap(const UTextOffset *srcMap, UTextOffset *destMap, UTextOffset length);
|
||||
|
||||
protected:
|
||||
UBiDi *pBiDi;
|
||||
};
|
||||
|
||||
/* Inline implementations. -------------------------------------------------- */
|
||||
|
||||
inline BiDi::BiDi() {
|
||||
pBiDi=ubidi_open();
|
||||
}
|
||||
|
||||
inline BiDi::BiDi(UErrorCode &rErrorCode) {
|
||||
if(U_SUCCESS(rErrorCode)) {
|
||||
pBiDi=ubidi_open();
|
||||
if(pBiDi==0) {
|
||||
rErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
} else {
|
||||
pBiDi=0;
|
||||
}
|
||||
}
|
||||
|
||||
inline BiDi::BiDi(UTextOffset maxLength, UTextOffset maxRunCount, UErrorCode &rErrorCode) {
|
||||
pBiDi=ubidi_openSized(maxLength, maxRunCount, &rErrorCode);
|
||||
}
|
||||
|
||||
inline BiDi::~BiDi() {
|
||||
ubidi_close(pBiDi);
|
||||
pBiDi=0;
|
||||
}
|
||||
|
||||
inline BiDi &
|
||||
BiDi::setPara(const UChar *text, UTextOffset length,
|
||||
UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
|
||||
UErrorCode &rErrorCode) {
|
||||
ubidi_setPara(pBiDi, text, length, paraLevel, embeddingLevels, &rErrorCode);
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
inline BiDi &
|
||||
BiDi::setLine(const BiDi &rParaBiDi,
|
||||
UTextOffset start, UTextOffset limit,
|
||||
UErrorCode &rErrorCode) {
|
||||
ubidi_setLine(rParaBiDi.pBiDi, start, limit, pBiDi, &rErrorCode);
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline UBiDiDirection
|
||||
BiDi::getDirection() const {
|
||||
return ubidi_getDirection(pBiDi);
|
||||
}
|
||||
|
||||
inline UTextOffset
|
||||
BiDi::getLength() const {
|
||||
return ubidi_getLength(pBiDi);
|
||||
}
|
||||
|
||||
inline UBiDiLevel
|
||||
BiDi::getParaLevel() const {
|
||||
return ubidi_getParaLevel(pBiDi);
|
||||
}
|
||||
|
||||
inline UBiDiLevel
|
||||
BiDi::getLevelAt(UTextOffset charIndex) const {
|
||||
return ubidi_getLevelAt(pBiDi, charIndex);
|
||||
}
|
||||
|
||||
inline const UBiDiLevel *
|
||||
BiDi::getLevels(UErrorCode &rErrorCode) {
|
||||
return ubidi_getLevels(pBiDi, &rErrorCode);
|
||||
}
|
||||
|
||||
inline void
|
||||
BiDi::getLogicalRun(UTextOffset logicalStart,
|
||||
UTextOffset &rLogicalLimit, UBiDiLevel &rLevel) const {
|
||||
ubidi_getLogicalRun(pBiDi, logicalStart, &rLogicalLimit, &rLevel);
|
||||
}
|
||||
|
||||
inline UTextOffset
|
||||
BiDi::countRuns(UErrorCode &rErrorCode) {
|
||||
return ubidi_countRuns(pBiDi, &rErrorCode);
|
||||
}
|
||||
|
||||
inline UBiDiDirection
|
||||
BiDi::getVisualRun(UTextOffset runIndex, UTextOffset &rLogicalStart, UTextOffset &rLength) {
|
||||
return ubidi_getVisualRun(pBiDi, runIndex, &rLogicalStart, &rLength);
|
||||
}
|
||||
|
||||
inline UTextOffset
|
||||
BiDi::getVisualIndex(UTextOffset logicalIndex, UErrorCode &rErrorCode) {
|
||||
return ubidi_getVisualIndex(pBiDi, logicalIndex, &rErrorCode);
|
||||
}
|
||||
|
||||
inline UTextOffset
|
||||
BiDi::getLogicalIndex(UTextOffset visualIndex, UErrorCode &rErrorCode) {
|
||||
return ubidi_getLogicalIndex(pBiDi, visualIndex, &rErrorCode);
|
||||
}
|
||||
|
||||
inline void
|
||||
BiDi::getLogicalMap(UTextOffset *indexMap, UErrorCode &rErrorCode) {
|
||||
ubidi_getLogicalMap(pBiDi, indexMap, &rErrorCode);
|
||||
}
|
||||
|
||||
inline void
|
||||
BiDi::getVisualMap(UTextOffset *indexMap, UErrorCode &rErrorCode) {
|
||||
ubidi_getVisualMap(pBiDi, indexMap, &rErrorCode);
|
||||
}
|
||||
|
||||
inline void
|
||||
BiDi::reorderLogical(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap) {
|
||||
ubidi_reorderLogical(levels, length, indexMap);
|
||||
}
|
||||
|
||||
inline void
|
||||
BiDi::reorderVisual(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap) {
|
||||
ubidi_reorderVisual(levels, length, indexMap);
|
||||
}
|
||||
|
||||
inline void
|
||||
BiDi::invertMap(const UTextOffset *srcMap, UTextOffset *destMap, UTextOffset length) {
|
||||
ubidi_invertMap(srcMap, destMap, length);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
**********************************************************************
|
||||
*/
|
||||
|
||||
#include "chariter.h"
|
||||
#include "unicode/chariter.h"
|
||||
|
||||
const UChar CharacterIterator::DONE = 0xffff;
|
||||
|
||||
|
|
|
@ -1,194 +1 @@
|
|||
|
||||
/*
|
||||
********************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef CHARITER_H
|
||||
#define CHARITER_H
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unistr.h"
|
||||
|
||||
|
||||
/**
|
||||
* Abstract class defining a protcol for accessing characters in a text-storage object.
|
||||
<P>Examples:<P>
|
||||
|
||||
Function processing characters, in this example simple output
|
||||
<pre>
|
||||
. void processChar( UChar c )
|
||||
. {
|
||||
. cout << " " << c;
|
||||
. }
|
||||
</pre>
|
||||
Traverse the text from start to finish
|
||||
<pre>
|
||||
. void traverseForward(CharacterIterator& iter)
|
||||
. {
|
||||
. for(UChar c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
|
||||
. processChar(c);
|
||||
. }
|
||||
. }
|
||||
</pre>
|
||||
Traverse the text backwards, from end to start
|
||||
<pre>
|
||||
. void traverseBackward(CharacterIterator& iter)
|
||||
. {
|
||||
. for(UChar c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
|
||||
. processChar(c);
|
||||
. }
|
||||
. }
|
||||
</pre>
|
||||
Traverse both forward and backward from a given position in the text.
|
||||
Calls to notBoundary() in this example represents some additional stopping criteria.
|
||||
<pre>
|
||||
. void traverseOut(CharacterIterator& iter, UTextOffset pos)
|
||||
. {
|
||||
. UChar c;
|
||||
. for (c = iter.setIndex(pos);
|
||||
. c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
|
||||
. c = iter.next()) {}
|
||||
. UTextOffset end = iter.getIndex();
|
||||
. for (c = iter.setIndex(pos);
|
||||
. c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
|
||||
. c = iter.previous()) {}
|
||||
. UTextOffset start = iter.getIndex() + 1;
|
||||
.
|
||||
. cout << "start: " << start << " end: " << end << endl;
|
||||
. for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
|
||||
. processChar(c);
|
||||
. }
|
||||
. }
|
||||
</pre>
|
||||
Creating a StringCharacterIteratorand calling the test functions
|
||||
<pre>
|
||||
. void CharacterIterator_Example( void )
|
||||
. {
|
||||
. cout << endl << "===== CharacterIterator_Example: =====" << endl;
|
||||
. UnicodeString text("Ein kleiner Satz.");
|
||||
. StringCharacterIterator iterator(text);
|
||||
. cout << "----- traverseForward: -----------" << endl;
|
||||
. traverseForward( iterator );
|
||||
. cout << endl << endl << "----- traverseBackward: ----------" << endl;
|
||||
. traverseBackward( iterator );
|
||||
. cout << endl << endl << "----- traverseOut: ---------------" << endl;
|
||||
. traverseOut( iterator, 7 );
|
||||
. cout << endl << endl << "-----" << endl;
|
||||
. }
|
||||
</pre>
|
||||
*/
|
||||
class U_COMMON_API CharacterIterator
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Value returned by most of CharacterIterator's functions
|
||||
* when the iterator has reached the limits of its iteration. */
|
||||
static const UChar DONE;
|
||||
|
||||
/**
|
||||
* Destructor. */
|
||||
virtual ~CharacterIterator();
|
||||
|
||||
/**
|
||||
* Returns true when both iterators refer to the same
|
||||
* character in the same character-storage object. */
|
||||
virtual bool_t operator==(const CharacterIterator& that) const = 0;
|
||||
|
||||
/**
|
||||
* Returns true when the iterators refer to different
|
||||
* text-storage objects, or to different characters in the
|
||||
* same text-storage object. */
|
||||
bool_t operator!=(const CharacterIterator& that) const { return !operator==(that); }
|
||||
|
||||
/**
|
||||
* Returns a pointer to a new CharacterIterator of the same
|
||||
* concrete class as this one, and referring to the same
|
||||
* character in the same text-storage object as this one. The
|
||||
* caller is responsible for deleting the new clone. */
|
||||
virtual CharacterIterator*
|
||||
clone(void) const = 0;
|
||||
|
||||
/**
|
||||
* Generates a hash code for this iterator. */
|
||||
virtual int32_t hashCode(void) const = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first character in its
|
||||
* iteration range, and returns that character, */
|
||||
virtual UChar first(void) = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last character in its
|
||||
* iteration range, and returns that character. */
|
||||
virtual UChar last(void) = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the "position"-th character
|
||||
* in the text-storage object the iterator refers to, and
|
||||
* returns that character. */
|
||||
virtual UChar setIndex(UTextOffset position) = 0;
|
||||
|
||||
/**
|
||||
* Returns the character the iterator currently refers to. */
|
||||
virtual UChar current(void) const = 0;
|
||||
|
||||
/**
|
||||
* Advances to the next character in the iteration range
|
||||
* (toward last()), and returns that character. If there are
|
||||
* no more characters to return, returns DONE. */
|
||||
virtual UChar next(void) = 0;
|
||||
|
||||
/**
|
||||
* Advances to the previous character in the iteration rance
|
||||
* (toward first()), and returns that character. If there are
|
||||
* no more characters to return, returns DONE. */
|
||||
virtual UChar previous(void) = 0;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying text-storage
|
||||
* object of the character returned by first(). Since it's
|
||||
* possible to create an iterator that iterates across only
|
||||
* part of a text-storage object, this number isn't
|
||||
* necessarily 0. */
|
||||
virtual UTextOffset startIndex(void) const = 0;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying text-storage
|
||||
* object of the position immediately BEYOND the character
|
||||
* returned by last(). */
|
||||
virtual UTextOffset endIndex(void) const = 0;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying text-storage
|
||||
* object of the character the iterator currently refers to
|
||||
* (i.e., the character returned by current()). */
|
||||
virtual UTextOffset getIndex(void) const = 0;
|
||||
|
||||
/**
|
||||
* Copies the text under iteration into the UnicodeString
|
||||
* referred to by "result". @param result Receives a copy of
|
||||
* the text under iteration. */
|
||||
virtual void getText(UnicodeString& result) = 0;
|
||||
|
||||
/**
|
||||
* Returns a UClassID for this CharacterIterator ("poor man's
|
||||
* RTTI").<P> Despite the fact that this function is public,
|
||||
* DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API! */
|
||||
virtual UClassID getDynamicClassID(void) const = 0;
|
||||
|
||||
protected:
|
||||
CharacterIterator() {}
|
||||
CharacterIterator(const CharacterIterator&) {}
|
||||
CharacterIterator& operator=(const CharacterIterator&) { return *this; }
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#error Please include unicode/chariter.h instead
|
||||
|
|
|
@ -27,12 +27,12 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define icu_malloc(size) malloc(size)
|
||||
#define icu_realloc(buffer, size) realloc(buffer, size)
|
||||
#define icu_free(buffer) free(buffer)
|
||||
#define icu_memcpy(dst, src, size) memcpy(dst, src, size)
|
||||
#define icu_memmove(dst, src, size) memmove(dst, src, size)
|
||||
#define icu_memset(buffer, mark, size) memset(buffer, mark, size)
|
||||
#define icu_memcmp(buffer1, buffer2, size) memcmp(buffer1, buffer2,size)
|
||||
#define uprv_malloc(size) malloc(size)
|
||||
#define uprv_realloc(buffer, size) realloc(buffer, size)
|
||||
#define uprv_free(buffer) free(buffer)
|
||||
#define uprv_memcpy(dst, src, size) memcpy(dst, src, size)
|
||||
#define uprv_memmove(dst, src, size) memmove(dst, src, size)
|
||||
#define uprv_memset(buffer, mark, size) memset(buffer, mark, size)
|
||||
#define uprv_memcmp(buffer1, buffer2, size) memcmp(buffer1, buffer2,size)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -277,25 +277,25 @@ SOURCE=.\uvector.cpp
|
|||
# PROP Default_Filter "h;hpp;hxx;hm;inl"
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\bidi.h
|
||||
SOURCE=.\unicode\bidi.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\bidi.h
|
||||
InputPath=.\unicode\bidi.h
|
||||
|
||||
"..\..\include\bidi.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy bidi.h ..\..\include
|
||||
"..\..\include\unicode\bidi.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\bidi.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\bidi.h
|
||||
InputPath=.\unicode\bidi.h
|
||||
|
||||
"..\..\include\bidi.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy bidi.h ..\..\include
|
||||
"..\..\include\unicode\bidi.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\bidi.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -304,25 +304,25 @@ InputPath=.\bidi.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\chariter.h
|
||||
SOURCE=.\unicode\chariter.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\chariter.h
|
||||
InputPath=.\unicode\chariter.h
|
||||
|
||||
"..\..\include\chariter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy chariter.h ..\..\include
|
||||
"..\..\include\unicode\chariter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\chariter.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\chariter.h
|
||||
InputPath=.\unicode\chariter.h
|
||||
|
||||
"..\..\include\chariter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy chariter.h ..\..\include
|
||||
"..\..\include\unicode\chariter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\chariter.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -343,25 +343,25 @@ SOURCE=.\compitr.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\convert.h
|
||||
SOURCE=.\unicode\convert.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\convert.h
|
||||
InputPath=.\unicode\convert.h
|
||||
|
||||
"..\..\include\convert.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy convert.h ..\..\include
|
||||
"..\..\include\unicode\convert.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\convert.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\convert.h
|
||||
InputPath=.\unicode\convert.h
|
||||
|
||||
"..\..\include\convert.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy convert.h ..\..\include
|
||||
"..\..\include\unicode\convert.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\convert.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -390,25 +390,25 @@ SOURCE=.\filestrm.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\locid.h
|
||||
SOURCE=.\unicode\locid.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\locid.h
|
||||
InputPath=.\unicode\locid.h
|
||||
|
||||
"..\..\include\locid.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy locid.h ..\..\include
|
||||
"..\..\include\unicode\locid.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\locid.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\locid.h
|
||||
InputPath=.\unicode\locid.h
|
||||
|
||||
"..\..\include\locid.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy locid.h ..\..\include
|
||||
"..\..\include\unicode\locid.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\locid.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -425,25 +425,25 @@ SOURCE=.\mutex.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\normlzr.h
|
||||
SOURCE=.\unicode\normlzr.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\normlzr.h
|
||||
InputPath=.\unicode\normlzr.h
|
||||
|
||||
"..\..\include\normlzr.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy normlzr.h ..\..\include
|
||||
"..\..\include\unicode\normlzr.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\normlzr.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\normlzr.h
|
||||
InputPath=.\unicode\normlzr.h
|
||||
|
||||
"..\..\include\normlzr.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy normlzr.h ..\..\include
|
||||
"..\..\include\unicode\normlzr.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\normlzr.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -452,25 +452,25 @@ InputPath=.\normlzr.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\putil.h
|
||||
SOURCE=.\unicode\putil.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\putil.h
|
||||
InputPath=.\unicode\putil.h
|
||||
|
||||
"..\..\include\putil.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy putil.h ..\..\include
|
||||
"..\..\include\unicode\putil.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\putil.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\putil.h
|
||||
InputPath=.\unicode\putil.h
|
||||
|
||||
"..\..\include\putil.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy putil.h ..\..\include
|
||||
"..\..\include\unicode\putil.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\putil.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -479,25 +479,25 @@ InputPath=.\putil.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\pwin32.h
|
||||
SOURCE=.\unicode\pwin32.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\pwin32.h
|
||||
InputPath=.\unicode\pwin32.h
|
||||
|
||||
"..\..\include\pwin32.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy pwin32.h ..\..\include
|
||||
"..\..\include\unicode\pwin32.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\pwin32.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\pwin32.h
|
||||
InputPath=.\unicode\pwin32.h
|
||||
|
||||
"..\..\include\pwin32.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy pwin32.h ..\..\include
|
||||
"..\..\include\unicode\pwin32.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\pwin32.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -518,25 +518,25 @@ SOURCE=.\rbread.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\rep.h
|
||||
SOURCE=.\unicode\rep.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\rep.h
|
||||
InputPath=.\unicode\rep.h
|
||||
|
||||
"..\..\include\rep.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy rep.h ..\..\include
|
||||
"..\..\include\unicode\rep.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\rep.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\rep.h
|
||||
InputPath=.\unicode\rep.h
|
||||
|
||||
"..\..\include\rep.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy rep.h ..\..\include
|
||||
"..\..\include\unicode\rep.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\rep.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -545,25 +545,25 @@ InputPath=.\rep.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\resbund.h
|
||||
SOURCE=.\unicode\resbund.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\resbund.h
|
||||
InputPath=.\unicode\resbund.h
|
||||
|
||||
"..\..\include\resbund.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy resbund.h ..\..\include
|
||||
"..\..\include\unicode\resbund.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\resbund.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\resbund.h
|
||||
InputPath=.\unicode\resbund.h
|
||||
|
||||
"..\..\include\resbund.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy resbund.h ..\..\include
|
||||
"..\..\include\unicode\resbund.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\resbund.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -572,25 +572,25 @@ InputPath=.\resbund.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\schriter.h
|
||||
SOURCE=.\unicode\schriter.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\schriter.h
|
||||
InputPath=.\unicode\schriter.h
|
||||
|
||||
"..\..\include\schriter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy schriter.h ..\..\include
|
||||
"..\..\include\unicode\schriter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\schriter.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\schriter.h
|
||||
InputPath=.\unicode\schriter.h
|
||||
|
||||
"..\..\include\schriter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy schriter.h ..\..\include
|
||||
"..\..\include\unicode\schriter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\schriter.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -599,25 +599,25 @@ InputPath=.\schriter.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\scsu.h
|
||||
SOURCE=.\unicode\scsu.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\scsu.h
|
||||
InputPath=.\unicode\scsu.h
|
||||
|
||||
"..\..\include\scsu.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy scsu.h ..\..\include
|
||||
"..\..\include\unicode\scsu.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\scsu.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\scsu.h
|
||||
InputPath=.\unicode\scsu.h
|
||||
|
||||
"..\..\include\scsu.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy scsu.h ..\..\include
|
||||
"..\..\include\unicode\scsu.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\scsu.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -626,25 +626,25 @@ InputPath=.\scsu.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ubidi.h
|
||||
SOURCE=.\unicode\ubidi.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\ubidi.h
|
||||
InputPath=.\unicode\ubidi.h
|
||||
|
||||
"..\..\include\ubidi.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy ubidi.h ..\..\include
|
||||
"..\..\include\unicode\ubidi.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\ubidi.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\ubidi.h
|
||||
InputPath=.\unicode\ubidi.h
|
||||
|
||||
"..\..\include\ubidi.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy ubidi.h ..\..\include
|
||||
"..\..\include\unicode\ubidi.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\ubidi.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -657,25 +657,25 @@ SOURCE=.\ubidiimp.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\uchar.h
|
||||
SOURCE=.\unicode\uchar.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\uchar.h
|
||||
InputPath=.\unicode\uchar.h
|
||||
|
||||
"..\..\include\uchar.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy uchar.h ..\..\include
|
||||
"..\..\include\unicode\uchar.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\uchar.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\uchar.h
|
||||
InputPath=.\unicode\uchar.h
|
||||
|
||||
"..\..\include\uchar.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy uchar.h ..\..\include
|
||||
"..\..\include\unicode\uchar.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\uchar.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -684,25 +684,25 @@ InputPath=.\uchar.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\uchriter.h
|
||||
SOURCE=.\unicode\uchriter.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\uchriter.h
|
||||
InputPath=.\unicode\uchriter.h
|
||||
|
||||
"..\..\include\uchriter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy uchriter.h ..\..\include
|
||||
"..\..\include\unicode\uchriter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\uchriter.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\uchriter.h
|
||||
InputPath=.\unicode\uchriter.h
|
||||
|
||||
"..\..\include\uchriter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy uchriter.h ..\..\include
|
||||
"..\..\include\unicode\uchriter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\uchriter.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -723,25 +723,25 @@ SOURCE=.\ucmp8.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucnv.h
|
||||
SOURCE=.\unicode\ucnv.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\ucnv.h
|
||||
InputPath=.\unicode\ucnv.h
|
||||
|
||||
"..\..\include\ucnv.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy ucnv.h ..\..\include
|
||||
"..\..\include\unicode\ucnv.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\ucnv.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\ucnv.h
|
||||
InputPath=.\unicode\ucnv.h
|
||||
|
||||
"..\..\include\ucnv.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy ucnv.h ..\..\include
|
||||
"..\..\include\unicode\ucnv.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\ucnv.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -750,25 +750,25 @@ InputPath=.\ucnv.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucnv_bld.h
|
||||
SOURCE=.\unicode\ucnv_bld.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\ucnv_bld.h
|
||||
InputPath=.\unicode\ucnv_bld.h
|
||||
|
||||
"..\..\include\ucnv_bld.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy ucnv_bld.h ..\..\include
|
||||
"..\..\include\unicode\ucnv_bld.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\ucnv_bld.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\ucnv_bld.h
|
||||
InputPath=.\unicode\ucnv_bld.h
|
||||
|
||||
"..\..\include\ucnv_bld.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy ucnv_bld.h ..\..\include
|
||||
"..\..\include\unicode\ucnv_bld.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\ucnv_bld.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -781,25 +781,25 @@ SOURCE=.\ucnv_cnv.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ucnv_err.h
|
||||
SOURCE=.\unicode\ucnv_err.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\ucnv_err.h
|
||||
InputPath=.\unicode\ucnv_err.h
|
||||
|
||||
"..\..\include\ucnv_err.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy ucnv_err.h ..\..\include
|
||||
"..\..\include\unicode\ucnv_err.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\ucnv_err.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\ucnv_err.h
|
||||
InputPath=.\unicode\ucnv_err.h
|
||||
|
||||
"..\..\include\ucnv_err.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy ucnv_err.h ..\..\include
|
||||
"..\..\include\unicode\ucnv_err.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\ucnv_err.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -816,25 +816,25 @@ SOURCE=.\ucnv_io.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\udata.h
|
||||
SOURCE=.\unicode\udata.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\udata.h
|
||||
InputPath=.\unicode\udata.h
|
||||
|
||||
"..\..\include\udata.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy udata.h ..\..\include
|
||||
"..\..\include\unicode\udata.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\udata.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\udata.h
|
||||
InputPath=.\unicode\udata.h
|
||||
|
||||
"..\..\include\udata.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy udata.h ..\..\include
|
||||
"..\..\include\unicode\udata.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\udata.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -847,25 +847,25 @@ SOURCE=.\uhash.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\uloc.h
|
||||
SOURCE=.\unicode\uloc.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\uloc.h
|
||||
InputPath=.\unicode\uloc.h
|
||||
|
||||
"..\..\include\uloc.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy uloc.h ..\..\include
|
||||
"..\..\include\unicode\uloc.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\uloc.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\uloc.h
|
||||
InputPath=.\unicode\uloc.h
|
||||
|
||||
"..\..\include\uloc.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy uloc.h ..\..\include
|
||||
"..\..\include\unicode\uloc.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\uloc.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -874,16 +874,16 @@ InputPath=.\uloc.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\umisc.h
|
||||
SOURCE=.\unicode\umisc.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\umisc.h
|
||||
InputPath=.\unicode\umisc.h
|
||||
InputName=umisc
|
||||
|
||||
"..\..\include\$(InputName).h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(InputName).h ..\..\include
|
||||
"..\..\include\unicode\$(InputName).h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\$(InputName).h ..\..\include\unicode
|
||||
echo $(InputName)
|
||||
|
||||
# End Custom Build
|
||||
|
@ -891,11 +891,11 @@ InputName=umisc
|
|||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\umisc.h
|
||||
InputPath=.\unicode\umisc.h
|
||||
InputName=umisc
|
||||
|
||||
"..\..\include\$(InputName).h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy $(InputName).h ..\..\include
|
||||
"..\..\include\unicode\$(InputName).h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\$(InputName).h ..\..\include\unicode
|
||||
echo $(InputName)
|
||||
|
||||
# End Custom Build
|
||||
|
@ -909,25 +909,25 @@ SOURCE=.\umutex.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\unicode.h
|
||||
SOURCE=.\unicode\unicode.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\unicode.h
|
||||
InputPath=.\unicode\unicode.h
|
||||
|
||||
"..\..\include\unicode.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode.h ..\..\include
|
||||
"..\..\include\unicode\unicode.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\unicode.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\unicode.h
|
||||
InputPath=.\unicode\unicode.h
|
||||
|
||||
"..\..\include\unicode.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode.h ..\..\include
|
||||
"..\..\include\unicode\unicode.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\unicode.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -936,25 +936,25 @@ InputPath=.\unicode.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\unistr.h
|
||||
SOURCE=.\unicode\unistr.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\unistr.h
|
||||
InputPath=.\unicode\unistr.h
|
||||
|
||||
"..\..\include\unistr.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unistr.h ..\..\include
|
||||
"..\..\include\unicode\unistr.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\unistr.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\unistr.h
|
||||
InputPath=.\unicode\unistr.h
|
||||
|
||||
"..\..\include\unistr.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unistr.h ..\..\include
|
||||
"..\..\include\unicode\unistr.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\unistr.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -967,25 +967,25 @@ SOURCE=.\unistrm.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ures.h
|
||||
SOURCE=.\unicode\ures.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\ures.h
|
||||
InputPath=.\unicode\ures.h
|
||||
|
||||
"..\..\include\ures.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy ures.h ..\..\include
|
||||
"..\..\include\unicode\ures.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\ures.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\ures.h
|
||||
InputPath=.\unicode\ures.h
|
||||
|
||||
"..\..\include\ures.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy ures.h ..\..\include
|
||||
"..\..\include\unicode\ures.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\ures.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -998,25 +998,25 @@ SOURCE=.\uresdata.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\ustring.h
|
||||
SOURCE=.\unicode\ustring.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\ustring.h
|
||||
InputPath=.\unicode\ustring.h
|
||||
|
||||
"..\..\include\ustring.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy ustring.h ..\..\include
|
||||
"..\..\include\unicode\ustring.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\ustring.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\ustring.h
|
||||
InputPath=.\unicode\ustring.h
|
||||
|
||||
"..\..\include\ustring.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy ustring.h ..\..\include
|
||||
"..\..\include\unicode\ustring.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\ustring.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
@ -1025,25 +1025,25 @@ InputPath=.\ustring.h
|
|||
# End Source File
|
||||
# Begin Source File
|
||||
|
||||
SOURCE=.\utypes.h
|
||||
SOURCE=.\unicode\utypes.h
|
||||
|
||||
!IF "$(CFG)" == "common - Win32 Release"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\utypes.h
|
||||
InputPath=.\unicode\utypes.h
|
||||
|
||||
"..\..\include\utypes.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy utypes.h ..\..\include
|
||||
"..\..\include\unicode\utypes.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\utypes.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
!ELSEIF "$(CFG)" == "common - Win32 Debug"
|
||||
|
||||
# Begin Custom Build
|
||||
InputPath=.\utypes.h
|
||||
InputPath=.\unicode\utypes.h
|
||||
|
||||
"..\..\include\utypes.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy utypes.h ..\..\include
|
||||
"..\..\include\unicode\utypes.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
|
||||
copy unicode\utypes.h ..\..\include\unicode
|
||||
|
||||
# End Custom Build
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
*/
|
||||
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "ucmp8.h"
|
||||
#include "ucmp16.h"
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
#include "compitr.h"
|
||||
|
||||
#include "normlzr.h"
|
||||
#include "unicode/normlzr.h"
|
||||
|
||||
/**
|
||||
* Constant that indicates the iteration has completed.
|
||||
|
|
|
@ -11,8 +11,8 @@
|
|||
#define COMPITR_H
|
||||
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unistr.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
|
||||
/**
|
||||
|
|
|
@ -10,16 +10,16 @@ class Locale;
|
|||
class UnicodeString;
|
||||
class Mutex;
|
||||
|
||||
#include "utypes.h"
|
||||
#include "resbund.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/resbund.h"
|
||||
#include "cmemory.h"
|
||||
#include "mutex.h"
|
||||
extern "C" {
|
||||
#include "ucnv_io.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnv.h"
|
||||
#include "unicode/ucnv_bld.h"
|
||||
#include "unicode/ucnv.h"
|
||||
}
|
||||
#include "convert.h"
|
||||
#include "unicode/convert.h"
|
||||
|
||||
/* list of converter and alias names */
|
||||
const char **UnicodeConverterCPP::availableConverterNames=NULL;
|
||||
|
@ -75,11 +75,11 @@ bool_t UnicodeConverterCPP::operator==(const UnicodeConverterCPP& that) const
|
|||
(myUnicodeConverter->fromCharErrorBehaviour == that.myUnicodeConverter->fromCharErrorBehaviour) &&
|
||||
(myUnicodeConverter->toUnicodeStatus == that.myUnicodeConverter->toUnicodeStatus) &&
|
||||
(myUnicodeConverter->subCharLen == that.myUnicodeConverter->subCharLen) &&
|
||||
(icu_memcmp(myUnicodeConverter->subChar, that.myUnicodeConverter->subChar, myUnicodeConverter->subCharLen) == 0) &&
|
||||
(uprv_memcmp(myUnicodeConverter->subChar, that.myUnicodeConverter->subChar, myUnicodeConverter->subCharLen) == 0) &&
|
||||
(myUnicodeConverter->UCharErrorBufferLength == that.myUnicodeConverter->UCharErrorBufferLength) &&
|
||||
(myUnicodeConverter->charErrorBufferLength == that.myUnicodeConverter->charErrorBufferLength) &&
|
||||
(icu_memcmp(myUnicodeConverter->UCharErrorBuffer, that.myUnicodeConverter->UCharErrorBuffer, myUnicodeConverter->UCharErrorBufferLength) == 0) &&
|
||||
(icu_memcmp(myUnicodeConverter->charErrorBuffer, that.myUnicodeConverter->charErrorBuffer, myUnicodeConverter->charErrorBufferLength) == 0) &&
|
||||
(uprv_memcmp(myUnicodeConverter->UCharErrorBuffer, that.myUnicodeConverter->UCharErrorBuffer, myUnicodeConverter->UCharErrorBufferLength) == 0) &&
|
||||
(uprv_memcmp(myUnicodeConverter->charErrorBuffer, that.myUnicodeConverter->charErrorBuffer, myUnicodeConverter->charErrorBufferLength) == 0) &&
|
||||
(myUnicodeConverter->fromUCharErrorBehaviour == that.myUnicodeConverter->fromUCharErrorBehaviour))
|
||||
return TRUE;
|
||||
else return FALSE;
|
||||
|
@ -180,7 +180,7 @@ UnicodeConverterCPP::toUnicodeString(UnicodeString& target,
|
|||
*on a "normal" call, only one iteration will be necessary.
|
||||
*/
|
||||
myTargetUChars =
|
||||
(UChar*)icu_malloc(sizeof(UChar)*(myTargetUCharsLength = (sourceSize/(int32_t)getMinBytesPerChar())));
|
||||
(UChar*)uprv_malloc(sizeof(UChar)*(myTargetUCharsLength = (sourceSize/(int32_t)getMinBytesPerChar())));
|
||||
|
||||
if (myTargetUChars == NULL)
|
||||
{
|
||||
|
@ -216,7 +216,7 @@ UnicodeConverterCPP::toUnicodeString(UnicodeString& target,
|
|||
} while (err == U_INDEX_OUTOFBOUNDS_ERROR);
|
||||
|
||||
|
||||
icu_free(myTargetUChars);
|
||||
uprv_free(myTargetUChars);
|
||||
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -1,325 +1 @@
|
|||
/*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1998-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef CONVERT_H
|
||||
#define CONVERT_H
|
||||
|
||||
|
||||
#include "unistr.h"
|
||||
#include "ucnv.h"
|
||||
|
||||
class U_COMMON_API UnicodeConverterCPP
|
||||
{
|
||||
private:
|
||||
/*Internal Data representation of the Converter*/
|
||||
UConverter* myUnicodeConverter;
|
||||
/*Debug method*/
|
||||
void printRef(void) const;
|
||||
|
||||
/* list of converter and alias names */
|
||||
static const char **availableConverterNames;
|
||||
static int32_t availableConverterNamesCount;
|
||||
|
||||
public:
|
||||
|
||||
//Constructors and a destructor
|
||||
|
||||
/**
|
||||
* Creates Unicode Conversion Object will default to LATIN1 <-> encoding
|
||||
* @return An object Handle if successful or a NULL if the creation failed
|
||||
*/
|
||||
UnicodeConverterCPP();
|
||||
|
||||
/**
|
||||
* Creates Unicode Conversion Object by specifying the codepage name. The name
|
||||
* string is in ASCII format.
|
||||
* @param code_set the pointer to a char[] object containing a codepage name. (I)
|
||||
* @param UErrorCode Error status (I/O) IILLEGAL_ARGUMENT_ERROR will be returned if the string is empty.
|
||||
* If the internal program does not work correctly, for example, if there's no such codepage,
|
||||
* U_INTERNAL_PROGRAM_ERROR will be returned.
|
||||
* @return An object Handle if successful or a NULL if the creation failed
|
||||
*/
|
||||
UnicodeConverterCPP(const char* name,
|
||||
UErrorCode& err);
|
||||
|
||||
/**
|
||||
*Creates a UnicodeConverter object with the names specified as unicode strings. The name should be limited to
|
||||
*the ASCII-7 alphanumerics. Dash and underscore characters are allowed for readability, but are ignored in the
|
||||
*search.
|
||||
*@param code_set name of the uconv table in Unicode string (I)
|
||||
*@param err error status (I/O) IILLEGAL_ARGUMENT_ERROR will be returned if the string is empty. If the internal
|
||||
*program does not work correctly, for example, if there's no such codepage, U_INTERNAL_PROGRAM_ERROR will be
|
||||
*returned.
|
||||
*@return the created Unicode converter object
|
||||
*/
|
||||
UnicodeConverterCPP(const UnicodeString& name,
|
||||
UErrorCode& err);
|
||||
|
||||
/**
|
||||
* Creates Unicode Conversion Object using the codepage ID number.
|
||||
* @param code_set a codepage # (I)
|
||||
* @UErrorCode Error status (I/O) IILLEGAL_ARGUMENT_ERROR will be returned if the string is empty.
|
||||
* If the internal program does not work correctly, for example, if there's no such codepage,
|
||||
* U_INTERNAL_PROGRAM_ERROR will be returned.
|
||||
* @return An object Handle if successful or a NULL if failed
|
||||
*
|
||||
*/
|
||||
UnicodeConverterCPP(int32_t codepageNumber,
|
||||
UConverterPlatform platform,
|
||||
UErrorCode& err);
|
||||
|
||||
~UnicodeConverterCPP();
|
||||
|
||||
|
||||
/**
|
||||
* Transcodes the source UnicodeString to the target string in a codepage encoding
|
||||
* with the specified Unicode converter. For example, if a Unicode to/from JIS
|
||||
* converter is specified, the source string in Unicode will be transcoded to JIS
|
||||
* encoding. The result will be stored in JIS encoding.
|
||||
*
|
||||
* @param source the source Unicode string
|
||||
* @param target the target string in codepage encoding
|
||||
* @param targetSize Input the number of bytes available in the "target" buffer, Output the number of bytes copied to it
|
||||
* @param err the error status code. U_MEMORY_ALLOCATION_ERROR will be returned if the
|
||||
* the internal process buffer cannot be allocated for transcoding. U_ILLEGAL_ARGUMENT_ERROR
|
||||
* is returned if the converter is null or the source or target string is empty.
|
||||
*/
|
||||
void fromUnicodeString(char* target,
|
||||
int32_t& targetSize,
|
||||
const UnicodeString& source,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Transcode the source string in codepage encoding to the target string in
|
||||
* Unicode encoding. For example, if a Unicode to/from JIS
|
||||
* converter is specified, the source string in JIS encoding will be transcoded
|
||||
* to Unicode encoding. The result will be stored in Unicode encoding.
|
||||
* @param source the source string in codepage encoding
|
||||
* @param target the target string in Unicode encoding
|
||||
* @param targetSize : I/O parameter, Input size buffer, Output # of bytes copied to it
|
||||
* @param err the error status code U_MEMORY_ALLOCATION_ERROR will be returned if the
|
||||
* the internal process buffer cannot be allocated for transcoding. U_ILLEGAL_ARGUMENT_ERROR
|
||||
* is returned if the converter is null or the source or target string is empty.
|
||||
*/
|
||||
void toUnicodeString(UnicodeString& target,
|
||||
const char* source,
|
||||
int32_t sourceSize,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Transcodes an array of unicode characters to an array of codepage characters.
|
||||
* The source pointer is an I/O parameter, it starts out pointing at the place
|
||||
* to begin translating, and ends up pointing after the first sequence of the bytes
|
||||
* that it encounters that are semantically invalid.
|
||||
* if T_UnicodeConverter_setMissingCharAction is called with an action other than STOP
|
||||
* before a call is made to this API, consumed and source should point to the same place
|
||||
* (unless target ends with an imcomplete sequence of bytes and flush is FALSE).
|
||||
* @param target : I/O parameter. Input : Points to the beginning of the buffer to copy
|
||||
* codepage characters to. Output : points to after the last codepage character copied
|
||||
* to target.
|
||||
* @param targetLimit the pointer to the end of the target array
|
||||
* @param source the source Unicode character array
|
||||
* @param sourceLimit the pointer to the end of the source array
|
||||
* @param flush TRUE if the buffer is the last buffer and the conversion will finish
|
||||
* in this call, FALSE otherwise. (future feature pending)
|
||||
* @param UErrorCode the error status. U_ILLEGAL_ARGUMENT_ERROR will be returned if the
|
||||
* converter is null.
|
||||
*/
|
||||
void fromUnicode(char*& target,
|
||||
const char* targetLimit,
|
||||
const UChar*& source,
|
||||
const UChar* sourceLimit,
|
||||
int32_t * offsets,
|
||||
bool_t flush,
|
||||
UErrorCode& err);
|
||||
|
||||
|
||||
/**
|
||||
* Converts an array of codepage characters into an array of unicode characters.
|
||||
* The source pointer is an I/O parameter, it starts out pointing at the place
|
||||
* to begin translating, and ends up pointing after the first sequence of the bytes
|
||||
* that it encounters that are semantically invalid.
|
||||
* if T_UnicodeConverter_setMissingUnicodeAction is called with an action other than STOP
|
||||
* before a call is made to this API, consumed and source should point to the same place
|
||||
* (unless target ends with an imcomplete sequence of bytes and flush is FALSE).
|
||||
* @param target : I/O parameter. Input : Points to the beginning of the buffer to copy
|
||||
* Unicode characters to. Output : points to after the last UChar copied to target.
|
||||
* @param targetLimit the pointer to the end of the target array
|
||||
* @param source the source codepage character array
|
||||
* @param sourceLimit the pointer to the end of the source array
|
||||
* @param flush TRUE if the buffer is the last buffer and the conversion will finish
|
||||
* in this call, FALSE otherwise. (future feature pending)
|
||||
* @param err the error code status U_ILLEGAL_ARGUMENT_ERROR will be returned if the
|
||||
* converter is null, targetLimit < target, sourceLimit < source
|
||||
*/
|
||||
void toUnicode(UChar*& target,
|
||||
const UChar* targetLimit,
|
||||
const char*& source,
|
||||
const char* sourceLimit,
|
||||
int32_t * offsets,
|
||||
bool_t flush,
|
||||
UErrorCode& err);
|
||||
|
||||
|
||||
/*
|
||||
* Returns the maximum length of bytes used by a character. This varies between 1 and 4
|
||||
* @return the max number of bytes per codepage character * converter is null, targetLimit < target, sourceLimit < source
|
||||
*/
|
||||
int8_t getMaxBytesPerChar(void) const;
|
||||
|
||||
/**
|
||||
* Returns the minimum byte length for characters in this codepage. This is either
|
||||
* 1 or 2 for all supported codepages.
|
||||
* @return the minimum number of byte per codepage character
|
||||
*/
|
||||
int8_t getMinBytesPerChar(void) const;
|
||||
|
||||
/**
|
||||
*Gets the type of conversion associated with the converter
|
||||
* e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022, EBCDIC_STATEFUL, LATIN_1
|
||||
* @return the type of the converter
|
||||
*/
|
||||
UConverterType getType(void) const;
|
||||
|
||||
/**
|
||||
*Gets the "starter" bytes for the converters of type MBCS
|
||||
*will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in
|
||||
*is not MBCS.
|
||||
*fills in an array of boolean, with the value of the byte as offset to the array.
|
||||
*At return, if TRUE is found in at offset 0x20, it means that the byte 0x20 is a starter byte
|
||||
*in this converter.
|
||||
* @param starters: an array of size 256 to be filled in
|
||||
* @param err: an array of size 256 to be filled in
|
||||
* @see ucnv_getType
|
||||
*/
|
||||
void getStarters(bool_t starters[256],
|
||||
UErrorCode& err) const;
|
||||
/**
|
||||
* Fills in the output parameter, subChars, with the substitution characters
|
||||
* as multiple bytes.
|
||||
* @param subChars the subsitution characters
|
||||
* @param len the number of bytes of the substitution character array
|
||||
* @param err the error status code. U_ILLEGAL_ARGUMENT_ERROR will be returned if
|
||||
* the converter is null. If the substitution character array is too small, an
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR will be returned.
|
||||
*/
|
||||
void getSubstitutionChars(char* subChars,
|
||||
int8_t& len,
|
||||
UErrorCode& err) const;
|
||||
/**
|
||||
* Sets the substitution chars when converting from unicode to a codepage. The
|
||||
* substitution is specified as a string of 1-4 bytes, and may contain null byte.
|
||||
* The fill-in parameter err will get the error status on return.
|
||||
* @param cstr the substitution character array to be set with
|
||||
* @param len the number of bytes of the substitution character array and upon return will contain the
|
||||
* number of bytes copied to that buffer
|
||||
* @param err the error status code. U_ILLEGAL_ARGUMENT_ERROR if the converter is
|
||||
* null. or if the number of bytes provided are not in the codepage's range (e.g length 1 for ucs-2)
|
||||
*/
|
||||
void setSubstitutionChars(const char* subChars,
|
||||
int8_t len,
|
||||
UErrorCode& err);
|
||||
|
||||
/**
|
||||
* Resets the state of stateful conversion to the default state. This is used
|
||||
* in the case of error to restart a conversion from a known default state.
|
||||
*/
|
||||
void resetState(void);
|
||||
|
||||
/**
|
||||
* Gets the name of the converter (zero-terminated).
|
||||
* the name will be the internal name of the converter
|
||||
* @param converter the Unicode converter
|
||||
* @param err the error status code. U_INDEX_OUTOFBOUNDS_ERROR in the converterNameLen is too
|
||||
* small to contain the name.
|
||||
*/
|
||||
const char* getName( UErrorCode& err) const;
|
||||
|
||||
|
||||
/**
|
||||
* Gets a codepage number associated with the converter. This is not guaranteed
|
||||
* to be the one used to create the converter. Some converters do not represent
|
||||
* IBM registered codepages and return zero for the codepage number.
|
||||
* The error code fill-in parameter indicates if the codepage number is available.
|
||||
* @param err the error status code. U_ILLEGAL_ARGUMENT_ERROR will returned if
|
||||
* the converter is null or if converter's data table is null.
|
||||
* @return If any error occurrs, null will be returned.
|
||||
*/
|
||||
int32_t getCodepage(UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Returns the current setting action taken when a character from a codepage
|
||||
* is missing. (Currently STOP or SUBSTITUTE).
|
||||
* @return the action constant when a Unicode character cannot be converted to a
|
||||
* codepage equivalent
|
||||
*/
|
||||
UConverterToUCallback getMissingCharAction(void) const;
|
||||
|
||||
/**
|
||||
* Return the current setting action taken when a unicode character is missing.
|
||||
* (Currently STOP or SUBSTITUTE).
|
||||
* @return the action constant when a codepage character cannot be converted to a
|
||||
* Unicode eqivalent
|
||||
*/
|
||||
UConverterFromUCallback getMissingUnicodeAction(void) const;
|
||||
|
||||
/**
|
||||
* Sets the current setting action taken when a character from a codepage is
|
||||
* missing. (Currently STOP or SUBSTITUTE).
|
||||
* @param action the action constant if an equivalent codepage character is missing
|
||||
*/
|
||||
void setMissingCharAction(UConverterToUCallback action,
|
||||
UErrorCode& err);
|
||||
|
||||
/**
|
||||
* Sets the current setting action taken when a unicode character is missing.
|
||||
* (currently T_UnicodeConverter_MissingUnicodeAction is either STOP or SUBSTITUTE,
|
||||
* SKIP, CLOSEST_MATCH, ESCAPE_SEQ may be added in the future).
|
||||
* @param action the action constant if an equivalent Unicode character is missing
|
||||
* @param err the error status code
|
||||
*/
|
||||
void setMissingUnicodeAction(UConverterFromUCallback action,
|
||||
UErrorCode& err);
|
||||
/**
|
||||
* Returns the localized name of the UnicodeConverter, if for any reason it is
|
||||
* available, the internal name will be returned instead.
|
||||
* @param displayLocale the valid Locale, from which we want to localize
|
||||
* @param displayString a UnicodeString that is going to be filled in.
|
||||
*/
|
||||
void getDisplayName(const Locale& displayLocale,
|
||||
UnicodeString& displayName) const;
|
||||
|
||||
/**
|
||||
* Returns the T_UnicodeConverter_platform (ICU defined enum) of a UnicodeConverter
|
||||
* available, the internal name will be returned instead.
|
||||
* @param err the error code status
|
||||
* @return the codepages platform
|
||||
*/
|
||||
UConverterPlatform getCodepagePlatform(UErrorCode& err) const;
|
||||
|
||||
|
||||
UnicodeConverterCPP& operator=(const UnicodeConverterCPP& that);
|
||||
bool_t operator==(const UnicodeConverterCPP& that) const;
|
||||
bool_t operator!=(const UnicodeConverterCPP& that) const;
|
||||
UnicodeConverterCPP(const UnicodeConverterCPP& that);
|
||||
|
||||
/**
|
||||
* Returns the available names. Lazy evaluated, Library owns the storage
|
||||
* @param num the number of available converters
|
||||
* @param err the error code status
|
||||
* @return the name array
|
||||
*/
|
||||
static const char* const* getAvailableNames(int32_t& num,
|
||||
UErrorCode& err);
|
||||
|
||||
/**
|
||||
* Iterates through every cached converter and frees all the unused ones
|
||||
* @return the number of cached converters successfully deleted
|
||||
*/
|
||||
static int32_t flushCache(void);
|
||||
};
|
||||
#endif
|
||||
#error Please include unicode/convert.h instead
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#define EXTENDED_FUNCTIONALITY
|
||||
#include "cpputils.h"
|
||||
#include "cstring.h"
|
||||
#include "ustring.h"
|
||||
#include "unicode/ustring.h"
|
||||
|
||||
/**********************************************************************
|
||||
* Copyright (C) 1999, International Business Machines
|
||||
|
@ -21,7 +21,7 @@ void T_fillOutputParams(const UnicodeString* temp,
|
|||
|
||||
const int32_t actual = temp->length();
|
||||
const bool_t overflowed = actual >= resultLength;
|
||||
const int32_t returnedSize = icu_min(actual, resultLength-1);
|
||||
const int32_t returnedSize = uprv_min(actual, resultLength-1);
|
||||
if ((temp->length() < resultLength) && (result != temp->getUChars()) && (returnedSize > 0)) {
|
||||
u_strcpy(result, temp->getUChars());
|
||||
}
|
||||
|
|
|
@ -14,49 +14,49 @@
|
|||
#ifndef CPPUTILS_H
|
||||
#define CPPUTILS_H
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#ifdef XP_CPLUSPLUS
|
||||
|
||||
#include "cmemory.h"
|
||||
#include "unistr.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Array copy utility functions */
|
||||
/*===========================================================================*/
|
||||
|
||||
inline void icu_arrayCopy(const double* src, double* dst, int32_t count)
|
||||
{ icu_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
|
||||
inline void uprv_arrayCopy(const double* src, double* dst, int32_t count)
|
||||
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
inline void icu_arrayCopy(const double* src, int32_t srcStart,
|
||||
inline void uprv_arrayCopy(const double* src, int32_t srcStart,
|
||||
double* dst, int32_t dstStart, int32_t count)
|
||||
{ icu_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
inline void icu_arrayCopy(const int8_t* src, int8_t* dst, int32_t count)
|
||||
{ icu_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
|
||||
inline void uprv_arrayCopy(const int8_t* src, int8_t* dst, int32_t count)
|
||||
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
inline void icu_arrayCopy(const int8_t* src, int32_t srcStart,
|
||||
inline void uprv_arrayCopy(const int8_t* src, int32_t srcStart,
|
||||
int8_t* dst, int32_t dstStart, int32_t count)
|
||||
{ icu_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
inline void icu_arrayCopy(const int16_t* src, int16_t* dst, int32_t count)
|
||||
{ icu_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
|
||||
inline void uprv_arrayCopy(const int16_t* src, int16_t* dst, int32_t count)
|
||||
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
inline void icu_arrayCopy(const int16_t* src, int32_t srcStart,
|
||||
inline void uprv_arrayCopy(const int16_t* src, int32_t srcStart,
|
||||
int16_t* dst, int32_t dstStart, int32_t count)
|
||||
{ icu_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
inline void icu_arrayCopy(const int32_t* src, int32_t* dst, int32_t count)
|
||||
{ icu_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
|
||||
inline void uprv_arrayCopy(const int32_t* src, int32_t* dst, int32_t count)
|
||||
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
inline void icu_arrayCopy(const int32_t* src, int32_t srcStart,
|
||||
inline void uprv_arrayCopy(const int32_t* src, int32_t srcStart,
|
||||
int32_t* dst, int32_t dstStart, int32_t count)
|
||||
{ icu_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
inline void
|
||||
icu_arrayCopy(const UChar *src, int32_t srcStart,
|
||||
uprv_arrayCopy(const UChar *src, int32_t srcStart,
|
||||
UChar *dst, int32_t dstStart, int32_t count)
|
||||
{ icu_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
|
||||
|
||||
/******************************************************
|
||||
* Simple utility to set output buffer parameters
|
||||
|
|
|
@ -28,8 +28,8 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
#include "utypes.h"
|
||||
#include "putil.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "cstring.h"
|
||||
|
||||
char*
|
||||
|
@ -121,7 +121,7 @@ T_CString_stricmp(const char *str1, const char *str2) {
|
|||
return 1;
|
||||
} else {
|
||||
/* compare non-zero characters with lowercase */
|
||||
rc=(int)(unsigned char)icu_tolower(c1)-(int)(unsigned char)icu_tolower(c2);
|
||||
rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
|
||||
if(rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
|
|
|
@ -28,38 +28,38 @@
|
|||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#define icu_strcpy(dst, src) strcpy(dst, src)
|
||||
#define icu_strcpyWithSize(dst, src, size) strncpy(dst, src, size)
|
||||
#define icu_strncpy(dst, src, size) strncpy(dst, src, size)
|
||||
#define icu_strlen(str) strlen(str)
|
||||
#define icu_strcmp(s1, s2) strcmp(s1, s2)
|
||||
#define icu_strncmp(s1, s2, n) strncmp(s1, s2, n)
|
||||
#define icu_strcat(dst, src) strcat(dst, src)
|
||||
#define icu_strncat(dst, src, n) strncat(dst, src, n)
|
||||
#define icu_strchr(s, c) strchr(s, c)
|
||||
#define icu_strstr(s, c) strstr(s, c)
|
||||
#define icu_strrchr(s, c) strrchr(s, c)
|
||||
#define icu_toupper(c) toupper(c)
|
||||
#define icu_tolower(c) tolower(c)
|
||||
#define icu_strtoul(str, end, base) strtoul(str, end, base)
|
||||
#define uprv_strcpy(dst, src) strcpy(dst, src)
|
||||
#define uprv_strcpyWithSize(dst, src, size) strncpy(dst, src, size)
|
||||
#define uprv_strncpy(dst, src, size) strncpy(dst, src, size)
|
||||
#define uprv_strlen(str) strlen(str)
|
||||
#define uprv_strcmp(s1, s2) strcmp(s1, s2)
|
||||
#define uprv_strncmp(s1, s2, n) strncmp(s1, s2, n)
|
||||
#define uprv_strcat(dst, src) strcat(dst, src)
|
||||
#define uprv_strncat(dst, src, n) strncat(dst, src, n)
|
||||
#define uprv_strchr(s, c) strchr(s, c)
|
||||
#define uprv_strstr(s, c) strstr(s, c)
|
||||
#define uprv_strrchr(s, c) strrchr(s, c)
|
||||
#define uprv_toupper(c) toupper(c)
|
||||
#define uprv_tolower(c) tolower(c)
|
||||
#define uprv_strtoul(str, end, base) strtoul(str, end, base)
|
||||
#ifdef WIN32
|
||||
# define icu_stricmp(str1, str2) _stricmp(str1, str2)
|
||||
# define uprv_stricmp(str1, str2) _stricmp(str1, str2)
|
||||
#elif defined(POSIX)
|
||||
# define icu_stricmp(str1, str2) strcasecmp(str1, str2)
|
||||
# define uprv_stricmp(str1, str2) strcasecmp(str1, str2)
|
||||
#else
|
||||
# define icu_stricmp(str1, str2) T_CString_stricmp(str1, str2)
|
||||
# define uprv_stricmp(str1, str2) T_CString_stricmp(str1, str2)
|
||||
#endif
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Wide-character functions */
|
||||
/*===========================================================================*/
|
||||
#define icu_wcscat(dst, src) wcscat(dst, src)
|
||||
#define icu_wcscpy(dst, src) wcscpy(dst, src)
|
||||
#define icu_wcslen(src) wcslen(src)
|
||||
#define icu_wcstombs(mbstr, wcstr, count) wcstombs(mbstr, wcstr, count)
|
||||
#define icu_mbstowcs(wcstr, mbstr, count) mbstowcs(wcstr, mbstr, count)
|
||||
#define uprv_wcscat(dst, src) wcscat(dst, src)
|
||||
#define uprv_wcscpy(dst, src) wcscpy(dst, src)
|
||||
#define uprv_wcslen(src) wcslen(src)
|
||||
#define uprv_wcstombs(mbstr, wcstr, count) wcstombs(mbstr, wcstr, count)
|
||||
#define uprv_mbstowcs(wcstr, mbstr, count) mbstowcs(wcstr, mbstr, count)
|
||||
|
||||
U_CAPI char* U_EXPORT2
|
||||
T_CString_toLowerCase(char* str);
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
*/
|
||||
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "ucmp8.h"
|
||||
#include "ucmp16.h"
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
#ifndef DIGITLST_H
|
||||
#define DIGITLST_H
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include <float.h>
|
||||
|
||||
// Decimal digits in a 32-bit int
|
||||
|
|
|
@ -57,18 +57,18 @@ T_FileStream_wopen(const wchar_t* filename, const wchar_t* mode)
|
|||
|
||||
/* convert from wchar_t to char */
|
||||
fnMbsSize = wcstombs(NULL, filename, ((size_t)-1) >> 1);
|
||||
fn = (char*)icu_malloc(fnMbsSize+2);
|
||||
fn = (char*)uprv_malloc(fnMbsSize+2);
|
||||
wcstombs(fn, filename, fnMbsSize);
|
||||
fn[fnMbsSize] = 0;
|
||||
|
||||
mdMbsSize = wcstombs(NULL, mode, ((size_t)-1) >> 1);
|
||||
md = (char*)icu_malloc(mdMbsSize+2);
|
||||
md = (char*)uprv_malloc(mdMbsSize+2);
|
||||
wcstombs(md, mode, mdMbsSize);
|
||||
md[mdMbsSize] = 0;
|
||||
|
||||
result = fopen(fn, md);
|
||||
icu_free(fn);
|
||||
icu_free(md);
|
||||
uprv_free(fn);
|
||||
uprv_free(md);
|
||||
return (FileStream*)result;
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
#define FILESTRM_H
|
||||
|
||||
#ifndef _UTYPES
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
#endif
|
||||
|
||||
#include <wchar.h>
|
||||
|
|
|
@ -30,11 +30,11 @@
|
|||
|
||||
|
||||
#include "uhash.h"
|
||||
#include "locid.h"
|
||||
#include "uloc.h"
|
||||
#include "resbund.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/uloc.h"
|
||||
#include "unicode/resbund.h"
|
||||
#include "mutex.h"
|
||||
#include "unicode.h"
|
||||
#include "unicode/unicode.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
|
||||
|
@ -212,17 +212,17 @@ Locale::Locale(const Locale& other)
|
|||
{
|
||||
int j;
|
||||
/*Copy the language and country fields*/
|
||||
icu_strcpy(language, other.language);
|
||||
icu_strcpy(country, other.country);
|
||||
uprv_strcpy(language, other.language);
|
||||
uprv_strcpy(country, other.country);
|
||||
|
||||
/*make fullName point to the heap if necessary*/
|
||||
if ((j=icu_strlen(other.fullName)) > ULOC_FULLNAME_CAPACITY)
|
||||
if ((j=uprv_strlen(other.fullName)) > ULOC_FULLNAME_CAPACITY)
|
||||
{
|
||||
fullName = new char[j+1];
|
||||
}
|
||||
else fullName = fullNameBuffer;
|
||||
|
||||
icu_strcpy(fullName, other.fullName);
|
||||
uprv_strcpy(fullName, other.fullName);
|
||||
|
||||
/*Make the variant point to the same offset as the copied*/
|
||||
variant = fullName + (other.variant - other.fullName) ;
|
||||
|
@ -232,11 +232,11 @@ Locale::Locale(const Locale& other)
|
|||
bool_t
|
||||
Locale::operator==( const Locale& other) const
|
||||
{
|
||||
if (icu_strcmp(other.language, language) == 0)
|
||||
if (uprv_strcmp(other.language, language) == 0)
|
||||
{
|
||||
if (icu_strcmp(other.country, country) == 0)
|
||||
if (uprv_strcmp(other.country, country) == 0)
|
||||
{
|
||||
if (icu_strcmp(other.variant, variant) == 0) return TRUE;
|
||||
if (uprv_strcmp(other.variant, variant) == 0) return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -265,13 +265,13 @@ Locale& Locale::init(const char* localeID)
|
|||
|
||||
/*Go to heap for the fullName if necessary*/
|
||||
int j;
|
||||
if ((j=icu_strlen(localeID)) > ULOC_FULLNAME_CAPACITY)
|
||||
if ((j=uprv_strlen(localeID)) > ULOC_FULLNAME_CAPACITY)
|
||||
{
|
||||
this->fullName = new char[j+1];
|
||||
}
|
||||
else this->fullName = this->fullNameBuffer;
|
||||
|
||||
icu_strcpy(this->fullName, localeID);
|
||||
uprv_strcpy(this->fullName, localeID);
|
||||
|
||||
/*Setting up the variant:
|
||||
-point to the zero terminator of fullName if there is none
|
||||
|
@ -293,17 +293,17 @@ Locale& Locale::init(const char* localeID)
|
|||
|
||||
Locale& Locale::operator=(const Locale& other)
|
||||
{
|
||||
icu_strcpy(language, other.language);
|
||||
icu_strcpy(country, other.country);
|
||||
uprv_strcpy(language, other.language);
|
||||
uprv_strcpy(country, other.country);
|
||||
if (other.fullName == other.fullNameBuffer) fullName = fullNameBuffer;
|
||||
else
|
||||
{
|
||||
/*In case the assigner has some of its data on the heap
|
||||
* we need to do the same*/
|
||||
if (fullName != fullNameBuffer) delete []fullName;
|
||||
fullName = new char[(icu_strlen(other.fullName)+1)];
|
||||
fullName = new char[(uprv_strlen(other.fullName)+1)];
|
||||
}
|
||||
icu_strcpy(fullName, other.fullName);
|
||||
uprv_strcpy(fullName, other.fullName);
|
||||
/*Make the variant point to the same offset as the assigner*/
|
||||
variant = fullName + (other.variant - other.fullName) ;
|
||||
|
||||
|
|
|
@ -1,570 +1 @@
|
|||
/*
|
||||
*****************************************************************************************
|
||||
*
|
||||
* Copyright (C) 1996-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*****************************************************************************************
|
||||
*
|
||||
* File locid.h
|
||||
*
|
||||
* Created by: Helena Shih
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 02/11/97 aliu Changed gLocPath to fgLocPath and added methods to
|
||||
* get and set it.
|
||||
* 04/02/97 aliu Made operator!= inline; fixed return value of getName().
|
||||
* 04/15/97 aliu Cleanup for AIX/Win32.
|
||||
* 04/24/97 aliu Numerous changes per code review.
|
||||
* 08/18/98 stephen Added tokenizeString(),changed getDisplayName()
|
||||
* 09/08/98 stephen Moved definition of kEmptyString for Mac Port
|
||||
* 11/09/99 weiv Added const char * getName() const;
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef LOCID_H
|
||||
#define LOCID_H
|
||||
|
||||
|
||||
#include "unistr.h"
|
||||
|
||||
typedef struct ULocale ULocale;
|
||||
typedef struct UHashtable UHashtable;
|
||||
|
||||
#define ULOC_LANG_CAPACITY 3
|
||||
#define ULOC_COUNTRY_CAPACITY 3
|
||||
#define ULOC_FULLNAME_CAPACITY 50
|
||||
|
||||
/**
|
||||
*
|
||||
* A <code>Locale</code> object represents a specific geographical, political,
|
||||
* or cultural region. An operation that requires a <code>Locale</code> to perform
|
||||
* its task is called <em>locale-sensitive</em> and uses the <code>Locale</code>
|
||||
* to tailor information for the user. For example, displaying a number
|
||||
* is a locale-sensitive operation--the number should be formatted
|
||||
* according to the customs/conventions of the user's native country,
|
||||
* region, or culture.
|
||||
*
|
||||
* <P>
|
||||
* You create a <code>Locale</code> object using one of the three constructors in
|
||||
* this class:
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* . Locale( const UnicodeString& newLanguage);
|
||||
* .
|
||||
* . Locale( const UnicodeString& language,
|
||||
* . const UnicodeString& country);
|
||||
* .
|
||||
* . Locale( const UnicodeString& language,
|
||||
* . const UnicodeString& country,
|
||||
* . const UnicodeString& variant);
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
* The first argument to the constructors is a valid <STRONG>ISO
|
||||
* Language Code.</STRONG> These codes are the lower-case two-letter
|
||||
* codes as defined by ISO-639.
|
||||
* You can find a full list of these codes at a number of sites, such as:
|
||||
* <BR><a href ="http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt">
|
||||
* <code>http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt</code></a>
|
||||
*
|
||||
* <P>
|
||||
* The second argument to the constructors is a valid <STRONG>ISO Country
|
||||
* Code.</STRONG> These codes are the upper-case two-letter codes
|
||||
* as defined by ISO-3166.
|
||||
* You can find a full list of these codes at a number of sites, such as:
|
||||
* <BR><a href="http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html">
|
||||
* <code>http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html</code></a>
|
||||
*
|
||||
* <P>
|
||||
* The third constructor requires a third argument--the <STRONG>Variant.</STRONG>
|
||||
* The Variant codes are vendor and browser-specific.
|
||||
* For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX.
|
||||
* Where there are two variants, separate them with an underscore, and
|
||||
* put the most important one first. For
|
||||
* example, a Traditional Spanish collation might be referenced, with
|
||||
* "ES", "ES", "Traditional_WIN".
|
||||
*
|
||||
* <P>
|
||||
* Because a <code>Locale</code> object is just an identifier for a region,
|
||||
* no validity check is performed when you construct a <code>Locale</code>.
|
||||
* If you want to see whether particular resources are available for the
|
||||
* <code>Locale</code> you construct, you must query those resources. For
|
||||
* example, ask the <code>NumberFormat</code> for the locales it supports
|
||||
* using its <code>getAvailableLocales</code> method.
|
||||
* <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular
|
||||
* locale, you get back the best available match, not necessarily
|
||||
* precisely what you asked for. For more information, look at
|
||||
* <a href="java.util.ResourceBundle.html"><code>ResourceBundle</code></a>.
|
||||
*
|
||||
* <P>
|
||||
* The <code>Locale</code> class provides a number of convenient constants
|
||||
* that you can use to create <code>Locale</code> objects for commonly used
|
||||
* locales. For example, the following refers to a <code>Locale</code> object
|
||||
* for the United States:
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* . Locale::US
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
*
|
||||
* <P>
|
||||
* Once you've created a <code>Locale</code> you can query it for information about
|
||||
* itself. Use <code>getCountry</code> to get the ISO Country Code and
|
||||
* <code>getLanguage</code> to get the ISO Language Code. You can
|
||||
* use <code>getDisplayCountry</code> to get the
|
||||
* name of the country suitable for displaying to the user. Similarly,
|
||||
* you can use <code>getDisplayLanguage</code> to get the name of
|
||||
* the language suitable for displaying to the user. Interestingly,
|
||||
* the <code>getDisplayXXX</code> methods are themselves locale-sensitive
|
||||
* and have two versions: one that uses the default locale and one
|
||||
* that takes a locale as an argument and displays the name or country in
|
||||
* a language appropriate to that locale.
|
||||
*
|
||||
* <P>
|
||||
* The TIFC provides a number of classes that perform locale-sensitive
|
||||
* operations. For example, the <code>NumberFormat</code> class formats
|
||||
* numbers, currency, or percentages in a locale-sensitive manner. Classes
|
||||
* such as <code>NumberFormat</code> have a number of convenience methods
|
||||
* for creating a default object of that type. For example, the
|
||||
* <code>NumberFormat</code> class provides these three convenience methods
|
||||
* for creating a default <code>NumberFormat</code> object:
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* . UErrorCode success = U_ZERO_ERROR;
|
||||
* . Locale myLocale;
|
||||
* . NumberFormat *nf;
|
||||
* .
|
||||
* . nf = NumberFormat::createInstance( success ); delete nf;
|
||||
* . nf = NumberFormat::createCurrencyInstance( success ); delete nf;
|
||||
* . nf = NumberFormat::createPercentInstance( success ); delete nf;
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
* Each of these methods has two variants; one with an explicit locale
|
||||
* and one without; the latter using the default locale.
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* . nf = NumberFormat::createInstance( myLocale, success ); delete nf;
|
||||
* . nf = NumberFormat::createCurrencyInstance( myLocale, success ); delete nf;
|
||||
* . nf = NumberFormat::createPercentInstance( myLocale, success ); delete nf;
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
* A <code>Locale</code> is the mechanism for identifying the kind of object
|
||||
* (<code>NumberFormat</code>) that you would like to get. The locale is
|
||||
* <STRONG>just</STRONG> a mechanism for identifying objects,
|
||||
* <STRONG>not</STRONG> a container for the objects themselves.
|
||||
*
|
||||
* <P>
|
||||
* Each class that performs locale-sensitive operations allows you
|
||||
* to get all the available objects of that type. You can sift
|
||||
* through these objects by language, country, or variant,
|
||||
* and use the display names to present a menu to the user.
|
||||
* For example, you can create a menu of all the collation objects
|
||||
* suitable for a given language. Such classes implement these
|
||||
* three class methods:
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* . static Locale* getAvailableLocales(int32_t& numLocales)
|
||||
* . static UnicodeString& getDisplayName(const Locale& objectLocale,
|
||||
* . const Locale& displayLocale,
|
||||
* . UnicodeString& displayName)
|
||||
* . static UnicodeString& getDisplayName(const Locale& objectLocale,
|
||||
* . UnicodeString& displayName)
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
*/
|
||||
class U_COMMON_API Locale
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Useful constants for language.
|
||||
*/
|
||||
static const Locale ENGLISH;
|
||||
static const Locale FRENCH;
|
||||
static const Locale GERMAN;
|
||||
static const Locale ITALIAN;
|
||||
static const Locale JAPANESE;
|
||||
static const Locale KOREAN;
|
||||
static const Locale CHINESE;
|
||||
static const Locale SIMPLIFIED_CHINESE;
|
||||
static const Locale TRADITIONAL_CHINESE;
|
||||
|
||||
/**
|
||||
* Useful constants for country.
|
||||
*/
|
||||
static const Locale FRANCE;
|
||||
static const Locale GERMANY;
|
||||
static const Locale ITALY;
|
||||
static const Locale JAPAN;
|
||||
static const Locale KOREA;
|
||||
static const Locale CHINA; // Alias for PRC
|
||||
static const Locale PRC; // Peoples Republic of China
|
||||
static const Locale TAIWAN; // Republic of China
|
||||
static const Locale UK;
|
||||
static const Locale US;
|
||||
static const Locale CANADA;
|
||||
static const Locale CANADA_FRENCH;
|
||||
|
||||
/**
|
||||
* Construct an empty locale. It's only used when a fill-in parameter is
|
||||
* needed.
|
||||
*/
|
||||
Locale();
|
||||
|
||||
/**
|
||||
* Construct a locale from language, country, variant.
|
||||
*
|
||||
* @param language Lowercase two-letter ISO-639 code.
|
||||
* @param country Uppercase two-letter ISO-3166 code. (optional)
|
||||
* @param variant Uppercase vendor and browser specific code. See class
|
||||
* description. (optional)
|
||||
*/
|
||||
Locale( const UnicodeString& language,
|
||||
const UnicodeString& country ,
|
||||
const UnicodeString& variant );
|
||||
|
||||
Locale( const UnicodeString& language,
|
||||
const UnicodeString& country );
|
||||
|
||||
Locale( const UnicodeString& language);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Initializes a Locale object from another Locale object.
|
||||
*
|
||||
* @param other The Locale object being copied in.
|
||||
*/
|
||||
Locale(const Locale& other);
|
||||
|
||||
|
||||
/**
|
||||
* Destructor
|
||||
*/
|
||||
~Locale() ;
|
||||
|
||||
/**
|
||||
* Replaces the entire contents of *this with the specified value.
|
||||
*
|
||||
* @param other The Locale object being copied in.
|
||||
* @return *this
|
||||
*/
|
||||
Locale& operator=(const Locale& other);
|
||||
|
||||
/**
|
||||
* Checks if two locale keys are the same.
|
||||
*
|
||||
* @param other The locale key object to be compared with this.
|
||||
* @return True if the two locale keys are the same, false otherwise.
|
||||
*/
|
||||
bool_t operator==(const Locale& other) const;
|
||||
|
||||
/**
|
||||
* Checks if two locale keys are not the same.
|
||||
*
|
||||
* @param other The locale key object to be compared with this.
|
||||
* @return True if the two locale keys are not the same, false
|
||||
* otherwise.
|
||||
*/
|
||||
bool_t operator!=(const Locale& other) const;
|
||||
|
||||
/**
|
||||
* Common methods of getting the current default Locale. Used for the
|
||||
* presentation: menus, dialogs, etc. Generally set once when your applet or
|
||||
* application is initialized, then never reset. (If you do reset the
|
||||
* default locale, you probably want to reload your GUI, so that the change
|
||||
* is reflected in your interface.)
|
||||
*
|
||||
* More advanced programs will allow users to use different locales for
|
||||
* different fields, e.g. in a spreadsheet.
|
||||
*
|
||||
* Note that the initial setting will match the host system.
|
||||
*/
|
||||
static Locale& getDefault(void);
|
||||
|
||||
/**
|
||||
* Sets the default. Normally set once at the beginning of applet or
|
||||
* application, then never reset. setDefault does NOT reset the host locale.
|
||||
*
|
||||
* @param newLocale Locale to set to.
|
||||
*/
|
||||
static void setDefault(const Locale& newLocale,
|
||||
UErrorCode& success);
|
||||
|
||||
/**
|
||||
* Fills in "lang" with the locale's two-letter ISO-639 language code.
|
||||
* @param lang Receives the language code.
|
||||
* @return A reference to "lang".
|
||||
*/
|
||||
UnicodeString& getLanguage( UnicodeString& lang) const;
|
||||
/**
|
||||
* Fills in "cntry" with the locale's two-letter ISO-3166 country code.
|
||||
* @param cntry Receives the country code.
|
||||
* @return A reference to "cntry".
|
||||
*/
|
||||
UnicodeString& getCountry( UnicodeString& cntry) const;
|
||||
/**
|
||||
* Fills in "var" with the locale's variant code.
|
||||
* @param var Receives the variant code.
|
||||
* @return A reference to "var".
|
||||
*/
|
||||
UnicodeString& getVariant( UnicodeString& var) const;
|
||||
|
||||
/**
|
||||
* Fills in "name" the programmatic name of the entire locale, with the language,
|
||||
* country and variant separated by underbars. If a field is missing, at
|
||||
* most one underbar will occur. Example: "en", "de_DE", "en_US_WIN",
|
||||
* "de_POSIX", "fr_MAC"
|
||||
* @param var Receives the programmatic locale name.
|
||||
* @return A reference to "name".
|
||||
*/
|
||||
UnicodeString& getName( UnicodeString& name) const;
|
||||
|
||||
/**
|
||||
* Returns the programmatic name of the entire locale, with the language,
|
||||
* country and variant separated by underbars. If a field is missing, at
|
||||
* most one underbar will occur. Example: "en", "de_DE", "en_US_WIN",
|
||||
* "de_POSIX", "fr_MAC"
|
||||
* @return A pointer to "name".
|
||||
*/
|
||||
const char * getName() const;
|
||||
|
||||
/**
|
||||
* Fills in "name" with the locale's three-letter language code, as specified
|
||||
* in ISO draft standard ISO-639-2..
|
||||
* @param name Receives the three-letter language code.
|
||||
* @param status An UErrorCode to receive any MISSING_RESOURCE_ERRORs
|
||||
* @return A reference to "name".
|
||||
*/
|
||||
UnicodeString& getISO3Language(UnicodeString& name, UErrorCode& status) const;
|
||||
|
||||
// this version is deprecated, use getISO3Language(UnicodeString&, UErrorCode&)
|
||||
UnicodeString& getISO3Language(UnicodeString& name) const;
|
||||
|
||||
/**
|
||||
* Fills in "name" with the locale's three-letter ISO-3166 country code.
|
||||
* @param name Receives the three-letter country code.
|
||||
* @param status An UErrorCode to receive any MISSING_RESOURCE_ERRORs
|
||||
* @return A reference to "name".
|
||||
*/
|
||||
UnicodeString& getISO3Country( UnicodeString& name, UErrorCode& status) const;
|
||||
|
||||
// this version is deprecated, use getISO3Country(UnicodeString&, UErrorCode&);
|
||||
UnicodeString& getISO3Country( UnicodeString& name) const;
|
||||
|
||||
/**
|
||||
* Returns the Windows LCID value corresponding to this locale.
|
||||
* This value is stored in the resource data for the locale as a one-to-four-digit
|
||||
* hexadecimal number. If the resource is missing, in the wrong format, or
|
||||
* there is no Windows LCID value that corresponds to this locale, returns 0.
|
||||
*/
|
||||
uint32_t getLCID(void) const;
|
||||
|
||||
/**
|
||||
* Fills in "dispLang" with the name of this locale's language in a format suitable for
|
||||
* user display in the default locale. For example, if the locale's language code is
|
||||
* "fr" and the default locale's language code is "en", this function would set
|
||||
* dispLang to "French".
|
||||
* @param dispLang Receives the language's display name.
|
||||
* @return A reference to "dispLang".
|
||||
*/
|
||||
UnicodeString& getDisplayLanguage(UnicodeString& dispLang) const;
|
||||
|
||||
/**
|
||||
* Fills in "dispLang" with the name of this locale's language in a format suitable for
|
||||
* user display in the locale specified by "inLocale". For example, if the locale's
|
||||
* language code is "en" and inLocale's language code is "fr", this function would set
|
||||
* dispLang to "Anglais".
|
||||
* @param inLocale Specifies the locale to be used to display the name. In other words,
|
||||
* if the locale's language code is "en", passing Locale::FRENCH for
|
||||
* inLocale would result in "Anglais", while passing Locale::GERMAN
|
||||
* for inLocale would result in "Englisch".
|
||||
* @param dispLang Receives the language's display name.
|
||||
* @return A reference to "dispLang".
|
||||
*/
|
||||
UnicodeString& getDisplayLanguage( const Locale& inLocale,
|
||||
UnicodeString& dispLang) const;
|
||||
/**
|
||||
* Fills in "dispCountry" with the name of this locale's country in a format suitable
|
||||
* for user display in the default locale. For example, if the locale's country code
|
||||
* is "FR" and the default locale's language code is "en", this function would set
|
||||
* dispCountry to "France".
|
||||
* @param dispCountry Receives the country's display name.
|
||||
* @return A reference to "dispCountry".
|
||||
*/
|
||||
UnicodeString& getDisplayCountry( UnicodeString& dispCountry) const;
|
||||
/**
|
||||
* Fills in "dispCountry" with the name of this locale's country in a format suitable
|
||||
* for user display in the locale specified by "inLocale". For example, if the locale's
|
||||
* country code is "US" and inLocale's language code is "fr", this function would set
|
||||
* dispCountry to "Etats-Unis".
|
||||
* @param inLocale Specifies the locale to be used to display the name. In other
|
||||
* words, if the locale's country code is "US", passing
|
||||
* Locale::FRENCH for inLocale would result in "États-Unis", while
|
||||
* passing Locale::GERMAN for inLocale would result in
|
||||
* "Vereinigte Staaten".
|
||||
* @param dispCountry Receives the country's display name.
|
||||
* @return A reference to "dispCountry".
|
||||
*/
|
||||
UnicodeString& getDisplayCountry( const Locale& inLocale,
|
||||
UnicodeString& dispCountry) const;
|
||||
|
||||
/**
|
||||
* Fills in "dispVar" with the name of this locale's variant code in a format suitable
|
||||
* for user display in the default locale.
|
||||
* @param dispVar Receives the variant's name.
|
||||
* @return A reference to "dispVar".
|
||||
*/
|
||||
UnicodeString& getDisplayVariant( UnicodeString& dispVar) const;
|
||||
/**
|
||||
* Fills in "dispVar" with the name of this locale's variant code in a format
|
||||
* suitable for user display in the locale specified by "inLocale".
|
||||
* @param inLocale Specifies the locale to be used to display the name.
|
||||
* @param dispVar Receives the variant's display name.
|
||||
* @return A reference to "dispVar".
|
||||
*/
|
||||
UnicodeString& getDisplayVariant( const Locale& inLocale,
|
||||
UnicodeString& dispVar) const;
|
||||
/**
|
||||
* Fills in "name" with the name of this locale in a format suitable for user display
|
||||
* in the default locale. This function uses getDisplayLanguage(), getDisplayCountry(),
|
||||
* and getDisplayVariant() to do its work, and outputs the display name in the format
|
||||
* "language (country[,variant])". For example, if the default locale is en_US, then
|
||||
* fr_FR's display name would be "French (France)", and es_MX_Traditional's display name
|
||||
* would be "Spanish (Mexico,Traditional)".
|
||||
* @param name Receives the locale's display name.
|
||||
* @return A reference to "name".
|
||||
*/
|
||||
UnicodeString& getDisplayName( UnicodeString& name) const;
|
||||
/**
|
||||
* Fills in "name" with the name of this locale in a format suitable for user display
|
||||
* in the locale specfied by "inLocale". This function uses getDisplayLanguage(),
|
||||
* getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display
|
||||
* name in the format "language (country[,variant])". For example, if inLocale is
|
||||
* fr_FR, then en_US's display name would be "Anglais (États-Unis)", and no_NO_NY's
|
||||
* display name would be "norvégien (Norvège,NY)".
|
||||
* @param inLocale Specifies the locale to be used to display the name.
|
||||
* @param name Receives the locale's display name.
|
||||
* @return A reference to "name".
|
||||
*/
|
||||
UnicodeString& getDisplayName( const Locale& inLocale,
|
||||
UnicodeString& name) const;
|
||||
/**
|
||||
* Generates a hash code for the locale. Since Locales are often used in hashtables,
|
||||
* caches the value for speed.
|
||||
*/
|
||||
int32_t hashCode(void) const;
|
||||
|
||||
/**
|
||||
* Returns a list of all installed locales.
|
||||
* @param count Receives the number of locales in the list.
|
||||
* @return A pointer to an array of Locale objects. This array is the list
|
||||
* of all locales with installed resource files. The called does NOT
|
||||
* get ownership of this list, and must NOT delete it.
|
||||
*/
|
||||
static const Locale* getAvailableLocales(int32_t& count);
|
||||
|
||||
/**
|
||||
* Returns a list of all 2-letter country codes defined in ISO 3166.
|
||||
* Can be used to create Locales.
|
||||
* @param count Receives the number of countries in the list.
|
||||
* @return A pointer to an array of UnicodeString objects. The caller does NOT
|
||||
* get ownership of this list, and must NOT delete it.
|
||||
*/
|
||||
static const UnicodeString* getISOCountries(int32_t& count);
|
||||
|
||||
/**
|
||||
* Returns a list of all 2-letter language codes defined in ISO 639.
|
||||
* Can be used to create Locales.
|
||||
* [NOTE: ISO 639 is not a stable standard-- some languages' codes have changed.
|
||||
* The list this function returns includes both the new and the old codes for the
|
||||
* languages whose codes have changed.]
|
||||
* @param count Receives the number of languages in the list.
|
||||
* @return A pointer to an array of UnicodeString objects. The caller does NOT
|
||||
* get ownership of this list, and must NOT delete it.
|
||||
*/
|
||||
static const UnicodeString* getISOLanguages(int32_t& count);
|
||||
|
||||
/**
|
||||
* Deprecated 1999dec14 - Get the path to the ResourceBundle locale files. This path will be a
|
||||
* platform-specific path name ending in a directory separator, so that file
|
||||
* names may be concatenated to it. This path may be changed by calling
|
||||
* setDataDirectory(). If setDataDirectory() has not been called yet,
|
||||
* getDataDirectory() will return a platform-dependent default path as
|
||||
* specified by TPlatformUtilities::getDefaultDataDirectory().
|
||||
*
|
||||
* @return Current data path.
|
||||
*/
|
||||
static const char* getDataDirectory(void);
|
||||
|
||||
/**
|
||||
* Deprecated 1999dec14 - Set the path to the ResourceBundle locale files. After making this call,
|
||||
* all objects in the Unicode Analytics package will read ResourceBundle
|
||||
* data files in the specified directory in order to obtain locale data.
|
||||
*
|
||||
* @param path The new data path to be set to.
|
||||
*/
|
||||
static void setDataDirectory(const char* path);
|
||||
|
||||
Locale& init(const char* cLocaleID);
|
||||
|
||||
protected: // only protected for testing purposes. DO NOT USE.
|
||||
void setFromPOSIXID(const UnicodeString& posixID); // set it from a single string.
|
||||
void setFromPOSIXID(const char *posixID); // set it from a single string.
|
||||
|
||||
/**
|
||||
* Given an ISO country code, returns an array of Strings containing the ISO
|
||||
* codes of the languages spoken in that country. Official languages are listed
|
||||
* in the returned table before unofficial languages, but other than that, the
|
||||
* order of the returned list is indeterminate. If the value the user passes in
|
||||
* for "country" is not a valid ISO 316 country code, or if we don't have language
|
||||
* information for the specified country, this function returns an empty array.
|
||||
*
|
||||
* [This function is not currently part of Locale's API, but is needed in the
|
||||
* implementation. We hope to add it to the API in a future release.]
|
||||
* @param country The ISO 2-letter country code of the desired country
|
||||
* @param count Receives the number of languages in the list.
|
||||
* @return A pointer to an array of UnicodeString objects. The caller does NOT
|
||||
* get ownership of this list, and must NOT delete it.
|
||||
*/
|
||||
static const UnicodeString* getLanguagesForCountry( const UnicodeString& country,
|
||||
int32_t& count);
|
||||
|
||||
|
||||
private:
|
||||
|
||||
/**
|
||||
* Initializes a Locale object from a ULocale struct, which is the C locale object,
|
||||
* and where the actual implementation is.
|
||||
*/
|
||||
|
||||
void setHashCode(void);
|
||||
char language[ULOC_LANG_CAPACITY];
|
||||
char country[ULOC_COUNTRY_CAPACITY];
|
||||
char* variant;
|
||||
char* fullName;
|
||||
char fullNameBuffer[ULOC_FULLNAME_CAPACITY];
|
||||
int32_t khashCode;
|
||||
|
||||
static Locale *localeList;
|
||||
static int32_t localeListCount;
|
||||
static UnicodeString *isoLanguages;
|
||||
static int32_t isoLanguagesCount;
|
||||
static UnicodeString *isoCountries;
|
||||
static int32_t isoCountriesCount;
|
||||
static UHashtable *ctry2LangMapping;
|
||||
static const UnicodeString compressedCtry2LangMapping;
|
||||
|
||||
static Locale fgDefaultLocale;
|
||||
};
|
||||
|
||||
inline bool_t
|
||||
Locale::operator!=(const Locale& other) const
|
||||
{
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#error Please include unicode/locid.h instead
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
// $Revision: 1.4 $
|
||||
// $Revision: 1.5 $
|
||||
//
|
||||
// Provides functionality for mapping between
|
||||
// LCID and Posix IDs.
|
||||
|
@ -29,7 +29,7 @@
|
|||
#include <math.h>
|
||||
|
||||
#include "locmap.h"
|
||||
#include "locid.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "mutex.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
|
@ -244,7 +244,7 @@ IGlobalLocales::convertToLCID(const char* posixID)
|
|||
|
||||
mid = (low + high) / 2;
|
||||
|
||||
int32_t compVal = icu_strcmp(langID, fgPosixIDmap[mid].posixLangID());
|
||||
int32_t compVal = uprv_strcmp(langID, fgPosixIDmap[mid].posixLangID());
|
||||
|
||||
if (mid == 0) // not found
|
||||
break;
|
||||
|
@ -446,7 +446,7 @@ ILcidPosixMap::hostID(const char* posixID) const
|
|||
|
||||
mid = (low + high) / 2;
|
||||
|
||||
int32_t compVal = icu_strcmp(posixID, fRegionMaps[mid].fPosixID);
|
||||
int32_t compVal = uprv_strcmp(posixID, fRegionMaps[mid].fPosixID);
|
||||
|
||||
if (compVal < 0)
|
||||
high = mid - 1;
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
*
|
||||
*****************************************************************************************
|
||||
*/
|
||||
// $Revision: 1.4 $
|
||||
// $Revision: 1.5 $
|
||||
//===============================================================================
|
||||
//
|
||||
// File locmap.hpp : Locale Mapping Classes
|
||||
|
@ -24,7 +24,7 @@
|
|||
#ifndef LOCMAP_H
|
||||
#define LOCMAP_H
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
#ifdef XP_CPLUSPLUS
|
||||
class Locale;
|
||||
/////////////////////////////////////////////////
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
//------------------------------------------------------------------------------
|
||||
#ifndef MUTEX_H
|
||||
#define MUTEX_H
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#include "umutex.h"
|
||||
|
||||
|
|
|
@ -10,12 +10,12 @@
|
|||
#include "dcmpdata.h"
|
||||
#include "compdata.h"
|
||||
|
||||
#include "normlzr.h"
|
||||
#include "utypes.h"
|
||||
#include "unistr.h"
|
||||
#include "chariter.h"
|
||||
#include "schriter.h"
|
||||
#include "unicode.h"
|
||||
#include "unicode/normlzr.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/schriter.h"
|
||||
#include "unicode/unicode.h"
|
||||
#include "mutex.h"
|
||||
|
||||
|
||||
|
|
|
@ -1,717 +1 @@
|
|||
/*
|
||||
********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1996-1999, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef NORMLZR_H
|
||||
#define NORMLZR_H
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unistr.h"
|
||||
#include "chariter.h"
|
||||
|
||||
/**
|
||||
* <tt>Normalizer</tt> transforms Unicode text into an equivalent composed or
|
||||
* decomposed form, allowing for easier sorting and searching of text.
|
||||
* <tt>Normalizer</tt> supports the standard normalization forms described in
|
||||
* <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
|
||||
* Unicode Technical Report #15</a>.
|
||||
* <p>
|
||||
* Characters with accents or other adornments can be encoded in
|
||||
* several different ways in Unicode. For example, take the character "Á"
|
||||
* (A-acute). In Unicode, this can be encoded as a single character (the
|
||||
* "composed" form):
|
||||
* <pre>
|
||||
* 00C1 LATIN CAPITAL LETTER A WITH ACUTE</pre>
|
||||
* or as two separate characters (the "decomposed" form):
|
||||
* <pre>
|
||||
* 0041 LATIN CAPITAL LETTER A
|
||||
* 0301 COMBINING ACUTE ACCENT</pre>
|
||||
* <p>
|
||||
* To a user of your program, however, both of these sequences should be
|
||||
* treated as the same "user-level" character "Á". When you are searching or
|
||||
* comparing text, you must ensure that these two sequences are treated
|
||||
* equivalently. In addition, you must handle characters with more than one
|
||||
* accent. Sometimes the order of a character's combining accents is
|
||||
* significant, while in other cases accent sequences in different orders are
|
||||
* really equivalent.
|
||||
* <p>
|
||||
* Similarly, the string "ffi" can be encoded as three separate letters:
|
||||
* <pre>
|
||||
* 0066 LATIN SMALL LETTER F
|
||||
* 0066 LATIN SMALL LETTER F
|
||||
* 0069 LATIN SMALL LETTER I</pre>
|
||||
* or as the single character
|
||||
* <pre>
|
||||
* FB03 LATIN SMALL LIGATURE FFI</pre>
|
||||
* <p>
|
||||
* The ffi ligature is not a distinct semantic character, and strictly speaking
|
||||
* it shouldn't be in Unicode at all, but it was included for compatibility
|
||||
* with existing character sets that already provided it. The Unicode standard
|
||||
* identifies such characters by giving them "compatibility" decompositions
|
||||
* into the corresponding semantic characters. When sorting and searching, you
|
||||
* will often want to use these mappings.
|
||||
* <p>
|
||||
* <tt>Normalizer</tt> helps solve these problems by transforming text into the
|
||||
* canonical composed and decomposed forms as shown in the first example above.
|
||||
* In addition, you can have it perform compatibility decompositions so that
|
||||
* you can treat compatibility characters the same as their equivalents.
|
||||
* Finally, <tt>Normalizer</tt> rearranges accents into the proper canonical
|
||||
* order, so that you do not have to worry about accent rearrangement on your
|
||||
* own.
|
||||
* <p>
|
||||
* <tt>Normalizer</tt> adds one optional behavior, {@link #IGNORE_HANGUL},
|
||||
* that differs from
|
||||
* the standard Unicode Normalization Forms. This option can be passed
|
||||
* to the {@link #Normalizer constructors} and to the static
|
||||
* {@link #compose compose} and {@link #decompose decompose} methods. This
|
||||
* option, and any that are added in the future, will be turned off by default.
|
||||
* <p>
|
||||
* There are three common usage models for <tt>Normalizer</tt>. In the first,
|
||||
* the static {@link #normalize normalize()} method is used to process an
|
||||
* entire input string at once. Second, you can create a <tt>Normalizer</tt>
|
||||
* object and use it to iterate through the normalized form of a string by
|
||||
* calling {@link #first} and {@link #next}. Finally, you can use the
|
||||
* {@link #setIndex setIndex()} and {@link #getIndex} methods to perform
|
||||
* random-access iteration, which is very useful for searching.
|
||||
* <p>
|
||||
* <b>Note:</b> <tt>Normalizer</tt> objects behave like iterators and have
|
||||
* methods such as <tt>setIndex</tt>, <tt>next</tt>, <tt>previous</tt>, etc.
|
||||
* You should note that while the <tt>setIndex</tt> and <tt>getIndex</tt> refer
|
||||
* to indices in the underlying <em>input</em> text being processed, the
|
||||
* <tt>next</tt> and <tt>previous</tt> methods it iterate through characters
|
||||
* in the normalized <em>output</em>. This means that there is not
|
||||
* necessarily a one-to-one correspondence between characters returned
|
||||
* by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
|
||||
* returned from <tt>setIndex</tt> and <tt>getIndex</tt>. It is for this
|
||||
* reason that <tt>Normalizer</tt> does not implement the
|
||||
* {@link CharacterIterator} interface.
|
||||
* <p>
|
||||
* <b>Note:</b> <tt>Normalizer</tt> is currently based on version 2.1.8
|
||||
* of the <a href="http://www.unicode.org" target="unicode">Unicode Standard</a>.
|
||||
* It will be updated as later versions of Unicode are released. If you are
|
||||
* using this class on a JDK that supports an earlier version of Unicode, it
|
||||
* is possible that <tt>Normalizer</tt> may generate composed or dedecomposed
|
||||
* characters for which your JDK's {@link java.lang.Character} class does not
|
||||
* have any data.
|
||||
* <p>
|
||||
* @author Laura Werner, Mark Davis
|
||||
*/
|
||||
class U_COMMON_API Normalizer
|
||||
{
|
||||
|
||||
public:
|
||||
// This tells us what the bits in the "mode" mean.
|
||||
enum {
|
||||
COMPAT_BIT = 1,
|
||||
DECOMP_BIT = 2,
|
||||
COMPOSE_BIT = 4
|
||||
};
|
||||
|
||||
|
||||
|
||||
/** */
|
||||
static const UChar DONE;
|
||||
|
||||
/** The mode of a Normalizer object */
|
||||
enum EMode {
|
||||
|
||||
/**
|
||||
* Null operation for use with the {@link #Normalizer constructors}
|
||||
* and the static {@link #normalize normalize} method. This value tells
|
||||
* the <tt>Normalizer</tt> to do nothing but return unprocessed characters
|
||||
* from the underlying String or CharacterIterator. If you have code which
|
||||
* requires raw text at some times and normalized text at others, you can
|
||||
* use <tt>NO_OP</tt> for the cases where you want raw text, rather
|
||||
* than having a separate code path that bypasses <tt>Normalizer</tt>
|
||||
* altogether.
|
||||
* <p>
|
||||
* @see #setMode
|
||||
*/
|
||||
NO_OP = 0,
|
||||
|
||||
/**
|
||||
* Canonical decomposition followed by canonical composition. Used with
|
||||
* the {@link #Normalizer constructors} and the static
|
||||
* {@link #normalize normalize}
|
||||
* method to determine the operation to be performed.
|
||||
* <p>
|
||||
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
|
||||
* off, this operation produces output that is in
|
||||
* <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
|
||||
* Form</a>
|
||||
* <b>C</b>.
|
||||
* <p>
|
||||
* @see #setMode
|
||||
*/
|
||||
COMPOSE = COMPOSE_BIT,
|
||||
|
||||
/**
|
||||
* Compatibility decomposition followed by canonical composition.
|
||||
* Used with the {@link #Normalizer constructors} and the static
|
||||
* {@link #normalize normalize} method to determine the operation to be
|
||||
* performed.
|
||||
* <p>
|
||||
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
|
||||
* off, this operation produces output that is in
|
||||
* <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
|
||||
* Form</a>
|
||||
* <b>KC</b>.
|
||||
* <p>
|
||||
* @see #setMode
|
||||
*/
|
||||
COMPOSE_COMPAT = COMPOSE_BIT | COMPAT_BIT,
|
||||
|
||||
/**
|
||||
* Canonical decomposition. This value is passed to the
|
||||
* {@link #Normalizer constructors} and the static
|
||||
* {@link #normalize normalize}
|
||||
* method to determine the operation to be performed.
|
||||
* <p>
|
||||
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
|
||||
* off, this operation produces output that is in
|
||||
* <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
|
||||
* Form</a>
|
||||
* <b>D</b>.
|
||||
* <p>
|
||||
* @see #setMode
|
||||
*/
|
||||
DECOMP = DECOMP_BIT,
|
||||
|
||||
/**
|
||||
* Compatibility decomposition. This value is passed to the
|
||||
* {@link #Normalizer constructors} and the static
|
||||
* {@link #normalize normalize}
|
||||
* method to determine the operation to be performed.
|
||||
* <p>
|
||||
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
|
||||
* off, this operation produces output that is in
|
||||
* <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
|
||||
* Form</a>
|
||||
* <b>KD</b>.
|
||||
* <p>
|
||||
* @see #setMode
|
||||
*/
|
||||
DECOMP_COMPAT = DECOMP_BIT | COMPAT_BIT
|
||||
};
|
||||
|
||||
/** The options for a Normalizer object */
|
||||
enum {
|
||||
|
||||
/**
|
||||
* Option to disable Hangul/Jamo composition and decomposition.
|
||||
* This option applies to Korean text,
|
||||
* which can be represented either in the Jamo alphabet or in Hangul
|
||||
* characters, which are really just two or three Jamo combined
|
||||
* into one visual glyph. Since Jamo takes up more storage space than
|
||||
* Hangul, applications that process only Hangul text may wish to turn
|
||||
* this option on when decomposing text.
|
||||
* <p>
|
||||
* The Unicode standard treates Hangul to Jamo conversion as a
|
||||
* canonical decomposition, so this option must be turned <b>off</b> if you
|
||||
* wish to transform strings into one of the standard
|
||||
* <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
|
||||
* Unicode Normalization Forms</a>.
|
||||
* <p>
|
||||
* @see #setOption
|
||||
*/
|
||||
IGNORE_HANGUL = 0x001
|
||||
};
|
||||
|
||||
// Constructors
|
||||
|
||||
/**
|
||||
* Creates a new <tt>Normalizer</tt> object for iterating over the
|
||||
* normalized form of a given string.
|
||||
* <p>
|
||||
* @param str The string to be normalized. The normalization
|
||||
* will start at the beginning of the string.
|
||||
*
|
||||
* @param mode The normalization mode.
|
||||
*/
|
||||
Normalizer(const UnicodeString& str,
|
||||
EMode mode);
|
||||
|
||||
/**
|
||||
* Creates a new <tt>Normalizer</tt> object for iterating over the
|
||||
* normalized form of a given string.
|
||||
* <p>
|
||||
* The <tt>options</tt> parameter specifies which optional
|
||||
* <tt>Normalizer</tt> features are to be enabled for this object.
|
||||
* <p>
|
||||
* @param str The string to be normalized. The normalization
|
||||
* will start at the beginning of the string.
|
||||
*
|
||||
* @param mode The normalization mode.
|
||||
*
|
||||
* @param opt Any optional features to be enabled.
|
||||
* Currently the only available option is {@link #IGNORE_HANGUL}
|
||||
* If you want the default behavior corresponding to one of the
|
||||
* standard Unicode Normalization Forms, use 0 for this argument
|
||||
*/
|
||||
Normalizer(const UnicodeString& str,
|
||||
EMode mode,
|
||||
int32_t opt);
|
||||
|
||||
/**
|
||||
* Creates a new <tt>Normalizer</tt> object for iterating over the
|
||||
* normalized form of a given UChar string.
|
||||
* <p>
|
||||
* @param str The string to be normalized. The normalization
|
||||
* will start at the beginning of the string.
|
||||
*
|
||||
* @param length Lenght of the string
|
||||
*
|
||||
*/
|
||||
Normalizer(const UChar* str,
|
||||
int32_t length,
|
||||
EMode mode);
|
||||
|
||||
/**
|
||||
* Creates a new <tt>Normalizer</tt> object for iterating over the
|
||||
* normalized form of the given text.
|
||||
* <p>
|
||||
* @param iter The input text to be normalized. The normalization
|
||||
* will start at the beginning of the string.
|
||||
*
|
||||
* @param mode The normalization mode.
|
||||
*
|
||||
*/
|
||||
Normalizer(const CharacterIterator& iter,
|
||||
EMode mode);
|
||||
|
||||
/**
|
||||
* Creates a new <tt>Normalizer</tt> object for iterating over the
|
||||
* normalized form of the given text.
|
||||
* <p>
|
||||
* @param iter The input text to be normalized. The normalization
|
||||
* will start at the beginning of the string.
|
||||
*
|
||||
* @param mode The normalization mode.
|
||||
*
|
||||
* @param opt Any optional features to be enabled.
|
||||
* Currently the only available option is {@link #IGNORE_HANGUL}
|
||||
* If you want the default behavior corresponding to one of the
|
||||
* standard Unicode Normalization Forms, use 0 for this argument
|
||||
*/
|
||||
Normalizer(const CharacterIterator& iter,
|
||||
EMode mode,
|
||||
int32_t opt);
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*/
|
||||
Normalizer(const Normalizer& copy);
|
||||
|
||||
/**
|
||||
* Destructor
|
||||
*/
|
||||
~Normalizer();
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Static utility methods
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Normalizes a <tt>String</tt> using the given normalization operation.
|
||||
* <p>
|
||||
* The <tt>options</tt> parameter specifies which optional
|
||||
* <tt>Normalizer</tt> features are to be enabled for this operation.
|
||||
* Currently the only available option is {@link #IGNORE_HANGUL}.
|
||||
* If you want the default behavior corresponding to one of the standard
|
||||
* Unicode Normalization Forms, use 0 for this argument.
|
||||
* <p>
|
||||
* @param source the input string to be normalized.
|
||||
*
|
||||
* @param aMode the normalization mode
|
||||
*
|
||||
* @param options the optional features to be enabled.
|
||||
*
|
||||
* @param result The normalized string (on output).
|
||||
*
|
||||
* @param status The error code.
|
||||
*/
|
||||
static void normalize(const UnicodeString& source,
|
||||
EMode mode,
|
||||
int32_t options,
|
||||
UnicodeString& result,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Compose a <tt>String</tt>.
|
||||
* <p>
|
||||
* The <tt>options</tt> parameter specifies which optional
|
||||
* <tt>Normalizer</tt> features are to be enabled for this operation.
|
||||
* Currently the only available option is {@link #IGNORE_HANGUL}.
|
||||
* If you want the default behavior corresponding
|
||||
* to Unicode Normalization Form <b>C</b> or <b>KC</b>,
|
||||
* use 0 for this argument.
|
||||
* <p>
|
||||
* @param source the string to be composed.
|
||||
*
|
||||
* @param compat Perform compatibility decomposition before composition.
|
||||
* If this argument is <tt>false</tt>, only canonical
|
||||
* decomposition will be performed.
|
||||
*
|
||||
* @param options the optional features to be enabled.
|
||||
*
|
||||
* @param result The composed string (on output).
|
||||
*
|
||||
* @param status The error code.
|
||||
*/
|
||||
static void compose(const UnicodeString& source,
|
||||
bool_t compat,
|
||||
int32_t options,
|
||||
UnicodeString& result,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Static method to decompose a <tt>String</tt>.
|
||||
* <p>
|
||||
* The <tt>options</tt> parameter specifies which optional
|
||||
* <tt>Normalizer</tt> features are to be enabled for this operation.
|
||||
* Currently the only available option is {@link #IGNORE_HANGUL}.
|
||||
* The desired options should be OR'ed together to determine the value
|
||||
* of this argument. If you want the default behavior corresponding
|
||||
* to Unicode Normalization Form <b>D</b> or <b>KD</b>,
|
||||
* use 0 for this argument.
|
||||
* <p>
|
||||
* @param str the string to be decomposed.
|
||||
*
|
||||
* @param compat Perform compatibility decomposition.
|
||||
* If this argument is <tt>false</tt>, only canonical
|
||||
* decomposition will be performed.
|
||||
*
|
||||
* @param options the optional features to be enabled.
|
||||
*
|
||||
* @param result The composed string (on output).
|
||||
*
|
||||
* @param status The error code.
|
||||
*
|
||||
* @return the decomposed string.
|
||||
*/
|
||||
static void decompose(const UnicodeString& source,
|
||||
bool_t compat,
|
||||
int32_t options,
|
||||
UnicodeString& result,
|
||||
UErrorCode &status);
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// CharacterIterator overrides
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Return the current character in the normalized text.
|
||||
*/
|
||||
UChar current(void) const;
|
||||
|
||||
/**
|
||||
* Return the first character in the normalized text. This resets
|
||||
* the <tt>Normalizer's</tt> position to the beginning of the text.
|
||||
*/
|
||||
UChar first(void);
|
||||
|
||||
/**
|
||||
* Return the last character in the normalized text. This resets
|
||||
* the <tt>Normalizer's</tt> position to be just before the
|
||||
* the input text corresponding to that normalized character.
|
||||
*/
|
||||
UChar last(void);
|
||||
|
||||
/**
|
||||
* Return the next character in the normalized text and advance
|
||||
* the iteration position by one. If the end
|
||||
* of the text has already been reached, {@link #DONE} is returned.
|
||||
*/
|
||||
UChar next(void);
|
||||
|
||||
/**
|
||||
* Return the previous character in the normalized text and decrement
|
||||
* the iteration position by one. If the beginning
|
||||
* of the text has already been reached, {@link #DONE} is returned.
|
||||
*/
|
||||
UChar previous(void);
|
||||
|
||||
/**
|
||||
* Set the iteration position in the input text that is being normalized
|
||||
* and return the first normalized character at that position.
|
||||
* <p>
|
||||
* <b>Note:</b> This method sets the position in the <em>input</em> text,
|
||||
* while {@link #next} and {@link #previous} iterate through characters
|
||||
* in the normalized <em>output</em>. This means that there is not
|
||||
* necessarily a one-to-one correspondence between characters returned
|
||||
* by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
|
||||
* returned from <tt>setIndex</tt> and {@link #getIndex}.
|
||||
* <p>
|
||||
* @param index the desired index in the input text.
|
||||
*
|
||||
* @return the first normalized character that is the result of iterating
|
||||
* forward starting at the given index.
|
||||
*
|
||||
* @throws IllegalArgumentException if the given index is less than
|
||||
* {@link #getBeginIndex} or greater than {@link #getEndIndex}.
|
||||
*/
|
||||
UChar setIndex(UTextOffset index);
|
||||
|
||||
/**
|
||||
* Reset the iterator so that it is in the same state that it was just after
|
||||
* it was constructed. A subsequent call to <tt>next</tt> will return the first
|
||||
* character in the normalized text. In contrast, calling <tt>setIndex(0)</tt> followed
|
||||
* by <tt>next</tt> will return the <em>second</em> character in the normalized text,
|
||||
* because <tt>setIndex</tt> itself returns the first character
|
||||
*/
|
||||
void reset(void);
|
||||
|
||||
/**
|
||||
* Retrieve the current iteration position in the input text that is
|
||||
* being normalized. This method is useful in applications such as
|
||||
* searching, where you need to be able to determine the position in
|
||||
* the input text that corresponds to a given normalized output character.
|
||||
* <p>
|
||||
* <b>Note:</b> This method sets the position in the <em>input</em>, while
|
||||
* {@link #next} and {@link #previous} iterate through characters in the
|
||||
* <em>output</em>. This means that there is not necessarily a one-to-one
|
||||
* correspondence between characters returned by <tt>next</tt> and
|
||||
* <tt>previous</tt> and the indices passed to and returned from
|
||||
* <tt>setIndex</tt> and {@link #getIndex}.
|
||||
*
|
||||
*/
|
||||
UTextOffset getIndex(void) const;
|
||||
|
||||
/**
|
||||
* Retrieve the index of the start of the input text. This is the begin index
|
||||
* of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
|
||||
* over which this <tt>Normalizer</tt> is iterating
|
||||
*/
|
||||
UTextOffset startIndex(void) const;
|
||||
|
||||
/**
|
||||
* Retrieve the index of the end of the input text. This is the end index
|
||||
* of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
|
||||
* over which this <tt>Normalizer</tt> is iterating
|
||||
*/
|
||||
UTextOffset endIndex(void) const;
|
||||
|
||||
|
||||
/**
|
||||
* Returns true when both iterators refer to the same character in the same
|
||||
* character-storage object.
|
||||
*/
|
||||
// virtual bool_t operator==(const CharacterIterator& that) const;
|
||||
bool_t operator==(const Normalizer& that) const;
|
||||
inline bool_t operator!=(const Normalizer& that) const;
|
||||
|
||||
/**
|
||||
* Returns a pointer to a new Normalizer that is a clone of this one.
|
||||
* The caller is responsible for deleting the new clone.
|
||||
*/
|
||||
Normalizer* clone(void) const;
|
||||
|
||||
/**
|
||||
* Generates a hash code for this iterator.
|
||||
*/
|
||||
int32_t hashCode(void) const;
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Property access methods
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Set the normalization mode for this object.
|
||||
* <p>
|
||||
* <b>Note:</b>If the normalization mode is changed while iterating
|
||||
* over a string, calls to {@link #next} and {@link #previous} may
|
||||
* return previously buffers characters in the old normalization mode
|
||||
* until the iteration is able to re-sync at the next base character.
|
||||
* It is safest to call {@link #setText setText()}, {@link #first},
|
||||
* {@link #last}, etc. after calling <tt>setMode</tt>.
|
||||
* <p>
|
||||
* @param newMode the new mode for this <tt>Normalizer</tt>.
|
||||
* The supported modes are:
|
||||
* <ul>
|
||||
* <li>{@link #COMPOSE} - Unicode canonical decompositiion
|
||||
* followed by canonical composition.
|
||||
* <li>{@link #COMPOSE_COMPAT} - Unicode compatibility decompositiion
|
||||
* follwed by canonical composition.
|
||||
* <li>{@link #DECOMP} - Unicode canonical decomposition
|
||||
* <li>{@link #DECOMP_COMPAT} - Unicode compatibility decomposition.
|
||||
* <li>{@link #NO_OP} - Do nothing but return characters
|
||||
* from the underlying input text.
|
||||
* </ul>
|
||||
*
|
||||
* @see #getMode
|
||||
*/
|
||||
void setMode(EMode newMode);
|
||||
|
||||
/**
|
||||
* Return the basic operation performed by this <tt>Normalizer</tt>
|
||||
*
|
||||
* @see #setMode
|
||||
*/
|
||||
EMode getMode(void) const;
|
||||
|
||||
/**
|
||||
* Set options that affect this <tt>Normalizer</tt>'s operation.
|
||||
* Options do not change the basic composition or decomposition operation
|
||||
* that is being performed , but they control whether
|
||||
* certain optional portions of the operation are done.
|
||||
* Currently the only available option is:
|
||||
* <p>
|
||||
* <ul>
|
||||
* <li>{@link #IGNORE_HANGUL} - Do not decompose Hangul syllables into the
|
||||
* Jamo alphabet and vice-versa. This option is off by default
|
||||
* (<i>i.e.</i> Hangul processing is enabled) since the Unicode
|
||||
* standard specifies that Hangul to Jamo is a canonical decomposition.
|
||||
* For any of the standard Unicode Normalization
|
||||
* Forms, you should leave this option off.
|
||||
* </ul>
|
||||
* <p>
|
||||
* @param option the option whose value is to be set.
|
||||
* @param value the new setting for the option. Use <tt>true</tt> to
|
||||
* turn the option on and <tt>false</tt> to turn it off.
|
||||
*
|
||||
* @see #getOption
|
||||
*/
|
||||
void setOption(int32_t option,
|
||||
bool_t value);
|
||||
|
||||
/**
|
||||
* Determine whether an option is turned on or off.
|
||||
* <p>
|
||||
* @see #setOption
|
||||
*/
|
||||
bool_t getOption(int32_t option) const;
|
||||
|
||||
/**
|
||||
* Set the input text over which this <tt>Normalizer</tt> will iterate.
|
||||
* The iteration position is set to the beginning.
|
||||
*/
|
||||
void setText(const UnicodeString& newText,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Set the input text over which this <tt>Normalizer</tt> will iterate.
|
||||
* The iteration position is set to the beginning.
|
||||
*/
|
||||
void setText(const CharacterIterator& newText,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Set the input text over which this <tt>Normalizer</tt> will iterate.
|
||||
* The iteration position is set to the beginning.
|
||||
*/
|
||||
void setText(const UChar* newText,
|
||||
int32_t length,
|
||||
UErrorCode &status);
|
||||
/**
|
||||
* Copies the text under iteration into the UnicodeString referred to by
|
||||
* "result".
|
||||
* @param result Receives a copy of the text under iteration.
|
||||
*/
|
||||
void getText(UnicodeString& result);
|
||||
|
||||
private:
|
||||
// Private utility methods for iteration
|
||||
// For documentation, see the source code
|
||||
UChar nextCompose(void);
|
||||
UChar prevCompose(void);
|
||||
UChar nextDecomp(void);
|
||||
UChar prevDecomp(void);
|
||||
|
||||
UChar curForward(void);
|
||||
UChar curBackward(void);
|
||||
|
||||
void init(CharacterIterator* iter,
|
||||
EMode mode,
|
||||
int32_t option);
|
||||
void initBuffer(void);
|
||||
void clearBuffer(void);
|
||||
|
||||
// Utilities used by Compose
|
||||
static void bubbleAppend(UnicodeString& target,
|
||||
UChar ch,
|
||||
uint32_t cclass);
|
||||
static uint32_t getComposeClass(UChar ch);
|
||||
static uint16_t composeLookup(UChar ch);
|
||||
static uint16_t composeAction(uint16_t baseIndex,
|
||||
uint16_t comIndex);
|
||||
static void explode(UnicodeString& target,
|
||||
uint16_t index);
|
||||
static UChar pairExplode(UnicodeString& target,
|
||||
uint16_t action);
|
||||
|
||||
// Utilities used by Decompose
|
||||
static void fixCanonical(UnicodeString& result); // Reorders combining marks
|
||||
static uint8_t getClass(UChar ch); // Gets char's combining class
|
||||
|
||||
// Other static utility methods
|
||||
static void doAppend(const UChar source[],
|
||||
uint16_t offset,
|
||||
UnicodeString& dest);
|
||||
static void doInsert(const UChar source[],
|
||||
uint16_t offset,
|
||||
UnicodeString& dest,
|
||||
UTextOffset pos);
|
||||
|
||||
static void hangulToJamo(UChar ch,
|
||||
UnicodeString& result,
|
||||
uint16_t decompLimit);
|
||||
static void jamoAppend(UChar ch,
|
||||
uint16_t decompLimit,
|
||||
UnicodeString& dest);
|
||||
static void jamoToHangul(UnicodeString& buffer,
|
||||
UTextOffset start);
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Private data
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
EMode fMode;
|
||||
int32_t fOptions;
|
||||
int16_t minDecomp;
|
||||
|
||||
// The input text and our position in it
|
||||
CharacterIterator* text;
|
||||
|
||||
// A buffer for holding intermediate results
|
||||
UnicodeString buffer;
|
||||
UTextOffset bufferPos;
|
||||
UTextOffset bufferLimit;
|
||||
UChar currentChar;
|
||||
|
||||
// Another buffer for use during iterative composition
|
||||
UnicodeString explodeBuf;
|
||||
|
||||
enum {
|
||||
EMPTY = -1,
|
||||
STR_INDEX_SHIFT = 2, //Must agree with the constants used in NormalizerBuilder
|
||||
STR_LENGTH_MASK = 0x0003
|
||||
};
|
||||
|
||||
static const UChar HANGUL_BASE;
|
||||
static const UChar HANGUL_LIMIT;
|
||||
static const UChar JAMO_LBASE;
|
||||
static const UChar JAMO_VBASE;
|
||||
static const UChar JAMO_TBASE;
|
||||
static const int16_t JAMO_LCOUNT;
|
||||
static const int16_t JAMO_VCOUNT;
|
||||
static const int16_t JAMO_TCOUNT;
|
||||
static const int16_t JAMO_NCOUNT;
|
||||
|
||||
friend class ComposedCharIter;
|
||||
};
|
||||
|
||||
inline bool_t
|
||||
Normalizer::operator!= (const Normalizer& other) const
|
||||
{ return ! operator==(other); }
|
||||
|
||||
#endif // _NORMLZR
|
||||
|
||||
|
||||
|
||||
|
||||
#error Please include unicode/normlzr.h instead
|
||||
|
|
|
@ -1,90 +1 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* FILE NAME : platform.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/13/98 nos Creation (content moved here from ptypes.h).
|
||||
* 03/02/99 stephen Added AS400 support.
|
||||
* 03/30/99 stephen Added Linux support.
|
||||
* 04/13/99 stephen Reworked for autoconf.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/* Define the platform we're on. */
|
||||
#ifndef OS2
|
||||
#define OS2 1
|
||||
#endif
|
||||
|
||||
/* Define whether inttypes.h is available */
|
||||
#define HAVE_INTTYPES_H 0
|
||||
|
||||
/* Determines whether specific types are available */
|
||||
#define HAVE_INT8_T 0
|
||||
#define HAVE_UINT8_T 0
|
||||
#define HAVE_INT16_T 0
|
||||
#define HAVE_UINT16_T 0
|
||||
#define HAVE_INT32_T 0
|
||||
#define HAVE_UINT32_T 0
|
||||
#define HAVE_BOOL_T 0
|
||||
|
||||
/* Determines the endianness of the platform */
|
||||
#define U_IS_BIG_ENDIAN 0
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Generic data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
/* If your platform does not have the <inttypes.h> header, you may
|
||||
need to edit the typedefs below. */
|
||||
#if HAVE_INTTYPES_H
|
||||
#include <inttypes.h>
|
||||
#else
|
||||
|
||||
#if ! HAVE_INT8_T
|
||||
typedef signed char int8_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_UINT8_T
|
||||
typedef unsigned char uint8_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_INT16_T
|
||||
typedef signed short int16_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_UINT16_T
|
||||
typedef unsigned short uint16_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_INT32_T
|
||||
typedef signed long int32_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_UINT32_T
|
||||
typedef unsigned long uint32_t;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#include <limits.h>
|
||||
#define T_INT32_MAX (LONG_MAX)
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Character data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
#define U_SIZEOF_WCHAR_T 2
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Symbol import-export control */
|
||||
/*===========================================================================*/
|
||||
|
||||
#define U_EXPORT
|
||||
#define U_EXPORT2
|
||||
#define U_IMPORT
|
||||
#error Please include unicode/pos2.h instead
|
||||
|
|
|
@ -1,92 +1 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* FILE NAME : platform.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/13/98 nos Creation (content moved here from ptypes.h).
|
||||
* 03/02/99 stephen Added AS400 support.
|
||||
* 03/30/99 stephen Added Linux support.
|
||||
* 04/13/99 stephen Reworked for autoconf.
|
||||
* 09/21/99 barry Created new for OS/400 platform.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/* Define the platform we're on. */
|
||||
#ifndef OS400
|
||||
#define OS400
|
||||
#endif
|
||||
|
||||
/* Define whether inttypes.h is available */
|
||||
#define HAVE_INTTYPES_H 0
|
||||
|
||||
/* Determines whether specific types are available */
|
||||
#define HAVE_INT8_T 0
|
||||
#define HAVE_UINT8_T 0
|
||||
#define HAVE_INT16_T 0
|
||||
#define HAVE_UINT16_T 0
|
||||
#define HAVE_INT32_T 0
|
||||
#define HAVE_UINT32_T 0
|
||||
#define HAVE_BOOL_T 0
|
||||
|
||||
/* Determines the endianness of the platform */
|
||||
#define U_IS_BIG_ENDIAN 1
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Generic data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
/* If your platform does not have the <inttypes.h> header, you may
|
||||
need to edit the typedefs below. */
|
||||
#if HAVE_INTTYPES_H
|
||||
#include <inttypes.h>
|
||||
#else
|
||||
|
||||
#if ! HAVE_INT8_T
|
||||
typedef signed char int8_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_UINT8_T
|
||||
typedef unsigned char uint8_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_INT16_T
|
||||
typedef signed short int16_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_UINT16_T
|
||||
typedef unsigned short uint16_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_INT32_T
|
||||
typedef signed long int32_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_UINT32_T
|
||||
typedef unsigned long uint32_t;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#include <limits.h>
|
||||
#define T_INT32_MAX (LONG_MAX)
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Character data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
#define U_CHARSET_FAMILY 1
|
||||
#define U_SIZEOF_WCHAR_T 2
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Symbol import-export control */
|
||||
/*===========================================================================*/
|
||||
|
||||
#define U_EXPORT
|
||||
#define U_EXPORT2
|
||||
#define U_IMPORT
|
||||
#error Please include unicode/pos400.h instead
|
||||
|
|
|
@ -48,7 +48,7 @@
|
|||
#include <locale.h>
|
||||
|
||||
/* include ICU headers */
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "umutex.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
|
@ -132,7 +132,7 @@ static char* u_bottomNBytesOfDouble(double* d, int n);
|
|||
|
||||
/* Get UTC (GMT) time measured in seconds since 0:00 on 1/1/70.*/
|
||||
int32_t
|
||||
icu_getUTCtime()
|
||||
uprv_getUTCtime()
|
||||
{
|
||||
#ifdef XP_MAC
|
||||
time_t t, t1, t2;
|
||||
|
@ -166,7 +166,7 @@ icu_getUTCtime()
|
|||
---------------------------------------------------------------------------*/
|
||||
|
||||
bool_t
|
||||
icu_isNaN(double number)
|
||||
uprv_isNaN(double number)
|
||||
{
|
||||
#ifdef IEEE_754
|
||||
/* This should work in theory, but it doesn't, so we resort to the more*/
|
||||
|
@ -209,7 +209,7 @@ icu_isNaN(double number)
|
|||
}
|
||||
|
||||
bool_t
|
||||
icu_isInfinite(double number)
|
||||
uprv_isInfinite(double number)
|
||||
{
|
||||
#ifdef IEEE_754
|
||||
/* We know the top bit is the sign bit, so we mask that off in a copy of */
|
||||
|
@ -218,7 +218,7 @@ icu_isInfinite(double number)
|
|||
/* scrutinize the pattern itself. */
|
||||
/* double a = number; */
|
||||
/* *(int8_t*)u_topNBytesOfDouble(&a, 1) &= 0x7F;*/
|
||||
/* return a == icu_getInfinity();*/
|
||||
/* return a == uprv_getInfinity();*/
|
||||
/* Instead, We want to see either:*/
|
||||
|
||||
/* 7FF0 0000 0000 0000*/
|
||||
|
@ -239,32 +239,32 @@ icu_isInfinite(double number)
|
|||
}
|
||||
|
||||
bool_t
|
||||
icu_isPositiveInfinity(double number)
|
||||
uprv_isPositiveInfinity(double number)
|
||||
{
|
||||
#ifdef IEEE_754
|
||||
return (number > 0 && icu_isInfinite(number));
|
||||
return (number > 0 && uprv_isInfinite(number));
|
||||
#else
|
||||
return icu_isInfinite(number);
|
||||
return uprv_isInfinite(number);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool_t
|
||||
icu_isNegativeInfinity(double number)
|
||||
uprv_isNegativeInfinity(double number)
|
||||
{
|
||||
#ifdef IEEE_754
|
||||
return (number < 0 && icu_isInfinite(number));
|
||||
return (number < 0 && uprv_isInfinite(number));
|
||||
#else
|
||||
#ifdef OS390
|
||||
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
|
||||
sizeof(uint32_t));
|
||||
return((highBits & SIGN) && icu_isInfinite(number));
|
||||
return((highBits & SIGN) && uprv_isInfinite(number));
|
||||
#endif
|
||||
return icu_isInfinite(number);
|
||||
return uprv_isInfinite(number);
|
||||
#endif
|
||||
}
|
||||
|
||||
double
|
||||
icu_getNaN()
|
||||
uprv_getNaN()
|
||||
{
|
||||
#if defined(IEEE_754) || defined(OS390)
|
||||
if( ! fgNaNInitialized) {
|
||||
|
@ -289,7 +289,7 @@ icu_getNaN()
|
|||
}
|
||||
|
||||
double
|
||||
icu_getInfinity()
|
||||
uprv_getInfinity()
|
||||
{
|
||||
#ifdef IEEE_754
|
||||
if (!fgInfInitialized)
|
||||
|
@ -311,37 +311,37 @@ icu_getInfinity()
|
|||
}
|
||||
|
||||
double
|
||||
icu_floor(double x)
|
||||
uprv_floor(double x)
|
||||
{
|
||||
return floor(x);
|
||||
}
|
||||
|
||||
double
|
||||
icu_ceil(double x)
|
||||
uprv_ceil(double x)
|
||||
{
|
||||
return ceil(x);
|
||||
}
|
||||
|
||||
double
|
||||
icu_fabs(double x)
|
||||
uprv_fabs(double x)
|
||||
{
|
||||
return fabs(x);
|
||||
}
|
||||
|
||||
double
|
||||
icu_modf(double x, double* y)
|
||||
uprv_modf(double x, double* y)
|
||||
{
|
||||
return modf(x, y);
|
||||
}
|
||||
|
||||
double
|
||||
icu_fmod(double x, double y)
|
||||
uprv_fmod(double x, double y)
|
||||
{
|
||||
return fmod(x, y);
|
||||
}
|
||||
|
||||
double
|
||||
icu_pow10(int32_t x)
|
||||
uprv_pow10(int32_t x)
|
||||
{
|
||||
#ifdef XP_MAC
|
||||
return pow(10.0, (double)x);
|
||||
|
@ -351,7 +351,7 @@ icu_pow10(int32_t x)
|
|||
}
|
||||
|
||||
double
|
||||
icu_IEEEremainder(double x, double p)
|
||||
uprv_IEEEremainder(double x, double p)
|
||||
{
|
||||
#ifdef IEEE_754
|
||||
int32_t hx, hp;
|
||||
|
@ -379,11 +379,11 @@ icu_IEEEremainder(double x, double p)
|
|||
|
||||
|
||||
if(hp <= 0x7fdfffff)
|
||||
x = icu_fmod(x, p + p); /* now x < 2p */
|
||||
x = uprv_fmod(x, p + p); /* now x < 2p */
|
||||
if(((hx-hp)|(lx-lp)) == 0)
|
||||
return 0.0 * x;
|
||||
x = icu_fabs(x);
|
||||
p = icu_fabs(p);
|
||||
x = uprv_fabs(x);
|
||||
p = uprv_fabs(p);
|
||||
if (hp < 0x00200000) {
|
||||
if(x + x > p) {
|
||||
x -= p;
|
||||
|
@ -405,19 +405,19 @@ icu_IEEEremainder(double x, double p)
|
|||
return x;
|
||||
#else
|
||||
/* {sfb} need to fix this*/
|
||||
return icu_fmod(x, p);
|
||||
return uprv_fmod(x, p);
|
||||
#endif
|
||||
}
|
||||
|
||||
double
|
||||
icu_fmax(double x, double y)
|
||||
uprv_fmax(double x, double y)
|
||||
{
|
||||
#ifdef IEEE_754
|
||||
int32_t lowBits;
|
||||
|
||||
/* first handle NaN*/
|
||||
if(icu_isNaN(x) || icu_isNaN(y))
|
||||
return icu_getNaN();
|
||||
if(uprv_isNaN(x) || uprv_isNaN(y))
|
||||
return uprv_getNaN();
|
||||
|
||||
/* check for -0 and 0*/
|
||||
lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&x, sizeof(uint32_t));
|
||||
|
@ -437,20 +437,20 @@ icu_fmax(double x, double y)
|
|||
}
|
||||
|
||||
int32_t
|
||||
icu_max(int32_t x, int32_t y)
|
||||
uprv_max(int32_t x, int32_t y)
|
||||
{
|
||||
return (x > y ? x : y);
|
||||
}
|
||||
|
||||
double
|
||||
icu_fmin(double x, double y)
|
||||
uprv_fmin(double x, double y)
|
||||
{
|
||||
#ifdef IEEE_754
|
||||
int32_t lowBits;
|
||||
|
||||
/* first handle NaN*/
|
||||
if(icu_isNaN(x) || icu_isNaN(y))
|
||||
return icu_getNaN();
|
||||
if(uprv_isNaN(x) || uprv_isNaN(y))
|
||||
return uprv_getNaN();
|
||||
|
||||
/* check for -0 and 0*/
|
||||
lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&y, sizeof(uint32_t));
|
||||
|
@ -471,7 +471,7 @@ icu_fmin(double x, double y)
|
|||
}
|
||||
|
||||
int32_t
|
||||
icu_min(int32_t x, int32_t y)
|
||||
uprv_min(int32_t x, int32_t y)
|
||||
{
|
||||
return (x > y ? y : x);
|
||||
}
|
||||
|
@ -484,15 +484,15 @@ icu_min(int32_t x, int32_t y)
|
|||
* ceil(3.3) = 4, ceil(-3.3) = -3
|
||||
*/
|
||||
double
|
||||
icu_trunc(double d)
|
||||
uprv_trunc(double d)
|
||||
{
|
||||
#ifdef IEEE_754
|
||||
|
||||
int32_t lowBits;
|
||||
|
||||
/* handle error cases*/
|
||||
if(icu_isNaN(d)) return icu_getNaN();
|
||||
if(icu_isInfinite(d)) return icu_getInfinity();
|
||||
if(uprv_isNaN(d)) return uprv_getNaN();
|
||||
if(uprv_isInfinite(d)) return uprv_getInfinity();
|
||||
|
||||
lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
|
||||
if( (d == 0.0 && (lowBits & SIGN)) || d < 0)
|
||||
|
@ -505,7 +505,7 @@ icu_trunc(double d)
|
|||
}
|
||||
|
||||
void
|
||||
icu_longBitsFromDouble(double d, int32_t *hi, uint32_t *lo)
|
||||
uprv_longBitsFromDouble(double d, int32_t *hi, uint32_t *lo)
|
||||
{
|
||||
*hi = *(int32_t*)u_topNBytesOfDouble(&d, sizeof(int32_t));
|
||||
*lo = *(uint32_t*)u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
|
||||
|
@ -520,7 +520,7 @@ icu_longBitsFromDouble(double d, int32_t *hi, uint32_t *lo)
|
|||
* (Thanks to Alan Liu for supplying this function.)
|
||||
*/
|
||||
int16_t
|
||||
icu_log10(double d)
|
||||
uprv_log10(double d)
|
||||
{
|
||||
/* The reason this routine is needed is that simply taking the*/
|
||||
/* log and dividing by log10 yields a result which may be off*/
|
||||
|
@ -541,7 +541,7 @@ icu_log10(double d)
|
|||
}
|
||||
|
||||
int32_t
|
||||
icu_digitsAfterDecimal(double x)
|
||||
uprv_digitsAfterDecimal(double x)
|
||||
{
|
||||
char buffer[20];
|
||||
int16_t numDigits;
|
||||
|
@ -555,7 +555,7 @@ icu_digitsAfterDecimal(double x)
|
|||
/* (it handles mathematical inaccuracy better than we can), then find out */
|
||||
/* many characters are to the right of the decimal point */
|
||||
sprintf(buffer, "%.9g", x);
|
||||
p = icu_strchr(buffer, '.');
|
||||
p = uprv_strchr(buffer, '.');
|
||||
if (p == 0)
|
||||
return 0;
|
||||
|
||||
|
@ -565,7 +565,7 @@ icu_digitsAfterDecimal(double x)
|
|||
/* if the number's string representation is in scientific notation, find */
|
||||
/* the exponent and take it into account*/
|
||||
exponent = 0;
|
||||
p = icu_strchr(buffer, 'e');
|
||||
p = uprv_strchr(buffer, 'e');
|
||||
if (p != 0) {
|
||||
int16_t expPos = p - buffer;
|
||||
numDigits -= strlen(buffer) - expPos;
|
||||
|
@ -592,7 +592,7 @@ icu_digitsAfterDecimal(double x)
|
|||
|
||||
/* Time zone utilities */
|
||||
void
|
||||
icu_tzset()
|
||||
uprv_tzset()
|
||||
{
|
||||
#ifdef POSIX
|
||||
tzset();
|
||||
|
@ -608,7 +608,7 @@ icu_tzset()
|
|||
}
|
||||
|
||||
int32_t
|
||||
icu_timezone()
|
||||
uprv_timezone()
|
||||
{
|
||||
#ifdef POSIX
|
||||
#ifdef OS390
|
||||
|
@ -643,7 +643,7 @@ icu_timezone()
|
|||
}
|
||||
|
||||
char*
|
||||
icu_tzname(int index)
|
||||
uprv_tzname(int index)
|
||||
{
|
||||
#ifdef POSIX
|
||||
return tzname[index];
|
||||
|
@ -675,14 +675,14 @@ gDataDirectory[1024];
|
|||
U_CAPI void U_EXPORT2
|
||||
u_setDataDirectory(const char *directory) {
|
||||
if(directory!=NULL) {
|
||||
int length=icu_strlen(directory);
|
||||
int length=uprv_strlen(directory);
|
||||
|
||||
if(length<sizeof(gDataDirectory)-1) {
|
||||
umtx_lock(NULL);
|
||||
if(length==0) {
|
||||
*gDataDirectory=0;
|
||||
} else {
|
||||
icu_memcpy(gDataDirectory, directory, length);
|
||||
uprv_memcpy(gDataDirectory, directory, length);
|
||||
|
||||
/* terminate the directory with a separator (/ or \) */
|
||||
if(gDataDirectory[length-1]!=U_FILE_SEP_CHAR) {
|
||||
|
@ -715,7 +715,7 @@ getSystemPath(char *path, int size) {
|
|||
int length=(uint8_t)volName[0];
|
||||
if(length>0) {
|
||||
/* convert the Pascal string to a C string */
|
||||
icu_memmove(path, path+1, length);
|
||||
uprv_memmove(path, path+1, length);
|
||||
path[length]=0;
|
||||
}
|
||||
return length;
|
||||
|
@ -754,7 +754,7 @@ getLibraryPath(char *path, int size) {
|
|||
if(mod!=NULL) {
|
||||
if(GetModuleFileName(mod, path, size)>0) {
|
||||
/* remove the basename and the last file separator */
|
||||
char *lastSep=icu_strrchr(path, U_FILE_SEP_CHAR);
|
||||
char *lastSep=uprv_strrchr(path, U_FILE_SEP_CHAR);
|
||||
if(lastSep!=NULL) {
|
||||
*lastSep=0;
|
||||
return lastSep-path;
|
||||
|
@ -768,7 +768,7 @@ getLibraryPath(char *path, int size) {
|
|||
rc=DosQueryModuleName(mod, (LONG)size, path);
|
||||
if(rc==NO_ERROR) {
|
||||
/* remove the basename and the last file separator */
|
||||
char *lastSep=icu_strrchr(path, U_FILE_SEP_CHAR);
|
||||
char *lastSep=uprv_strrchr(path, U_FILE_SEP_CHAR);
|
||||
if(lastSep!=NULL) {
|
||||
*lastSep=0;
|
||||
return lastSep-path;
|
||||
|
@ -790,13 +790,13 @@ getLibraryPath(char *path, int size) {
|
|||
if(rc>=0) {
|
||||
/* search for the list item for the library itself */
|
||||
while(p!=NULL) {
|
||||
s=icu_strstr(p->l_name, U_COMMON_LIBNAME); /* "libicu-uc.so" */
|
||||
s=uprv_strstr(p->l_name, U_COMMON_LIBNAME); /* "libicu-uc.so" */
|
||||
if(s!=NULL) {
|
||||
if(s>p->l_name) {
|
||||
/* copy the path, without the basename and the last separator */
|
||||
length=(s-p->l_name)-1;
|
||||
if(0<length && length<size) {
|
||||
icu_memcpy(path, p->l_name, length);
|
||||
uprv_memcpy(path, p->l_name, length);
|
||||
path[length]=0;
|
||||
} else {
|
||||
length=0;
|
||||
|
@ -831,13 +831,13 @@ getLibraryPath(char *path, int size) {
|
|||
}
|
||||
p=(struct ld_info *)((uint8_t *)p+p->ldinfo_next);
|
||||
|
||||
s=icu_strstr(p->ldinfo_filename, U_COMMON_LIBNAME); /* "libicuuc.a" */
|
||||
s=uprv_strstr(p->ldinfo_filename, U_COMMON_LIBNAME); /* "libicuuc.a" */
|
||||
if(s!=NULL) {
|
||||
if(s>p->ldinfo_filename) {
|
||||
/* copy the path, without the basename and the last separator */
|
||||
length=(s-p->ldinfo_filename)-1;
|
||||
if(0<length && length<size) {
|
||||
icu_memcpy(path, p->ldinfo_filename, length);
|
||||
uprv_memcpy(path, p->ldinfo_filename, length);
|
||||
path[length]=0;
|
||||
} else {
|
||||
length=0;
|
||||
|
@ -865,13 +865,13 @@ getLibraryPath(char *path, int size) {
|
|||
break;
|
||||
}
|
||||
|
||||
s=icu_strstr(p->filename, U_COMMON_LIBNAME);
|
||||
s=uprv_strstr(p->filename, U_COMMON_LIBNAME);
|
||||
if(s!=NULL) {
|
||||
if(s>p->filename) {
|
||||
/* copy the path, without the basename and the last separator */
|
||||
length=(s-p->filename)-1;
|
||||
if(0<length && length<size) {
|
||||
icu_memcpy(path, p->filename, length);
|
||||
uprv_memcpy(path, p->filename, length);
|
||||
path[length]=0;
|
||||
} else {
|
||||
length=0;
|
||||
|
@ -948,8 +948,8 @@ findLibraryPath(char *path, int size) {
|
|||
}
|
||||
|
||||
/* copy the path and add the library filename */
|
||||
icu_memcpy(path, libPath, length);
|
||||
icu_strcpy(path+length, U_FILE_SEP_STRING LIB_FILENAME);
|
||||
uprv_memcpy(path, libPath, length);
|
||||
uprv_strcpy(path+length, U_FILE_SEP_STRING LIB_FILENAME);
|
||||
|
||||
/* does this file exist in this path? */
|
||||
f=T_FileStream_open(path, "rb");
|
||||
|
@ -1016,7 +1016,7 @@ u_getDataDirectory(void) {
|
|||
char temporaryPath[1024];
|
||||
|
||||
/* copy the path with variables to the temporary one */
|
||||
icu_memcpy(temporaryPath, pathBuffer, size);
|
||||
uprv_memcpy(temporaryPath, pathBuffer, size);
|
||||
|
||||
/* do the replacement and store it in the pathBuffer */
|
||||
size=ExpandEnvironmentStrings(temporaryPath, pathBuffer, sizeof(pathBuffer));
|
||||
|
@ -1036,7 +1036,7 @@ u_getDataDirectory(void) {
|
|||
if(path==NULL || *path==0) {
|
||||
length=getLibraryPath(pathBuffer, sizeof(pathBuffer));
|
||||
if(length>0) {
|
||||
icu_strcpy(pathBuffer+length, U_FILE_SEP_STRING ".." FALLBACK_PATH);
|
||||
uprv_strcpy(pathBuffer+length, U_FILE_SEP_STRING ".." FALLBACK_PATH);
|
||||
path=pathBuffer;
|
||||
}
|
||||
}
|
||||
|
@ -1045,7 +1045,7 @@ u_getDataDirectory(void) {
|
|||
if(path==NULL || *path==0) {
|
||||
length=findLibraryPath(pathBuffer, sizeof(pathBuffer));
|
||||
if(length>0) {
|
||||
icu_strcpy(pathBuffer+length, U_FILE_SEP_STRING ".." FALLBACK_PATH);
|
||||
uprv_strcpy(pathBuffer+length, U_FILE_SEP_STRING ".." FALLBACK_PATH);
|
||||
path=pathBuffer;
|
||||
}
|
||||
}
|
||||
|
@ -1058,7 +1058,7 @@ u_getDataDirectory(void) {
|
|||
# else
|
||||
length=getSystemPath(pathBuffer, sizeof(pathBuffer));
|
||||
if(length>0) {
|
||||
icu_strcpy(pathBuffer+length, FALLBACK_PATH);
|
||||
uprv_strcpy(pathBuffer+length, FALLBACK_PATH);
|
||||
path=pathBuffer;
|
||||
} else {
|
||||
path=FALLBACK_PATH;
|
||||
|
@ -1178,13 +1178,13 @@ mac_lc_rec mac_lc_recs[] = {
|
|||
#endif
|
||||
|
||||
const char*
|
||||
icu_getDefaultLocaleID()
|
||||
uprv_getDefaultLocaleID()
|
||||
{
|
||||
#ifdef POSIX
|
||||
char* posixID = getenv("LC_ALL");
|
||||
if (posixID == 0) posixID = getenv("LANG");
|
||||
if (posixID == 0) posixID = setlocale(LC_ALL, NULL);
|
||||
if (icu_strcmp("C", posixID) == 0) posixID = "en_US";
|
||||
if (uprv_strcmp("C", posixID) == 0) posixID = "en_US";
|
||||
return posixID;
|
||||
#endif
|
||||
|
||||
|
@ -1251,7 +1251,7 @@ icu_getDefaultLocaleID()
|
|||
/* end of platform-specific implementation */
|
||||
|
||||
double
|
||||
icu_nextDouble(double d, bool_t next)
|
||||
uprv_nextDouble(double d, bool_t next)
|
||||
{
|
||||
#ifdef IEEE_754
|
||||
int32_t highBits;
|
||||
|
@ -1263,7 +1263,7 @@ icu_nextDouble(double d, bool_t next)
|
|||
uint32_t signBit;
|
||||
|
||||
/* filter out NaN's */
|
||||
if (icu_isNaN(d)) {
|
||||
if (uprv_isNaN(d)) {
|
||||
return d;
|
||||
}
|
||||
|
||||
|
@ -1356,7 +1356,7 @@ static char* u_bottomNBytesOfDouble(double* d, int n)
|
|||
return U_IS_BIG_ENDIAN ? (char*)(d + 1) - n : (char*)d;
|
||||
}
|
||||
|
||||
const char* icu_getDefaultCodepage()
|
||||
const char* uprv_getDefaultCodepage()
|
||||
{
|
||||
#if defined(OS400)
|
||||
return "ibm-37";
|
||||
|
@ -1366,7 +1366,7 @@ const char* icu_getDefaultCodepage()
|
|||
/* TBD */
|
||||
#elif defined(WIN32)
|
||||
static char codepage[12]={ "cp" };
|
||||
icu_strcpy(codepage+2, _itoa(GetACP(), tempString, 10));
|
||||
uprv_strcpy(codepage+2, _itoa(GetACP(), tempString, 10));
|
||||
return codepage;
|
||||
#elif defined(POSIX)
|
||||
return "LATIN_1";
|
||||
|
|
|
@ -1,215 +1 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* FILE NAME : putil.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/14/98 nos Creation (content moved here from utypes.h).
|
||||
* 06/17/99 erm Added IEEE_754
|
||||
* 07/22/98 stephen Added IEEEremainder, max, min, trunc
|
||||
* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
|
||||
* 08/24/98 stephen Added longBitsFromDouble
|
||||
* 03/02/99 stephen Removed openFile(). Added AS400 support.
|
||||
* 04/15/99 stephen Converted to C
|
||||
* 11/15/99 helena Integrated S/390 changes for IEEE support.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef PUTIL_H
|
||||
#define PUTIL_H
|
||||
|
||||
#include "utypes.h"
|
||||
|
||||
/* Define this if your platform supports IEEE 754 floating point */
|
||||
#define IEEE_754
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Platform utilities */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* Platform utilities isolates the platform dependencies of the
|
||||
* libarary. For each platform which this code is ported to, these
|
||||
* functions may have to be re-implemented. */
|
||||
|
||||
/* Floating point utilities */
|
||||
U_CAPI bool_t U_EXPORT2 icu_isNaN(double);
|
||||
U_CAPI bool_t U_EXPORT2 icu_isInfinite(double);
|
||||
U_CAPI bool_t U_EXPORT2 icu_isPositiveInfinity(double);
|
||||
U_CAPI bool_t U_EXPORT2 icu_isNegativeInfinity(double);
|
||||
U_CAPI double U_EXPORT2 icu_getNaN(void);
|
||||
U_CAPI double U_EXPORT2 icu_getInfinity(void);
|
||||
|
||||
U_CAPI double U_EXPORT2 icu_floor(double x);
|
||||
U_CAPI double U_EXPORT2 icu_ceil(double x);
|
||||
U_CAPI double U_EXPORT2 icu_fabs(double x);
|
||||
U_CAPI double U_EXPORT2 icu_modf(double x, double* y);
|
||||
U_CAPI double U_EXPORT2 icu_fmod(double x, double y);
|
||||
U_CAPI double U_EXPORT2 icu_pow10(int32_t x);
|
||||
U_CAPI double U_EXPORT2 icu_IEEEremainder(double x, double y);
|
||||
U_CAPI double U_EXPORT2 icu_fmax(double x, double y);
|
||||
U_CAPI double U_EXPORT2 icu_fmin(double x, double y);
|
||||
U_CAPI int32_t U_EXPORT2 icu_max(int32_t x, int32_t y);
|
||||
U_CAPI int32_t U_EXPORT2 icu_min(int32_t x, int32_t y);
|
||||
U_CAPI double U_EXPORT2 icu_trunc(double d);
|
||||
U_CAPI void U_EXPORT2 icu_longBitsFromDouble(double d, int32_t *hi, uint32_t *lo);
|
||||
#if U_IS_BIG_ENDIAN
|
||||
# define icu_isNegative(number) (*((signed char *)&(number))<0)
|
||||
#else
|
||||
# define icu_isNegative(number) (*((signed char *)&(number)+sizeof(number)-1)<0)
|
||||
#endif
|
||||
|
||||
/* Conversion from a digit to the character with radix base from 2-19 */
|
||||
#ifndef OS390
|
||||
#define T_CString_itosOffset(a) a<=9?(0x30+a):(0x30+a+7)
|
||||
#else
|
||||
#define T_CString_itosOffset(a) a<=9?(0xF0+a):(0xC1+a-10) /* C1 is EBCDIC 'A' */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Return the floor of the log base 10 of a given double.
|
||||
* This method compensates for inaccuracies which arise naturally when
|
||||
* computing logs, and always gives the correct value. The parameter
|
||||
* must be positive and finite.
|
||||
* (Thanks to Alan Liu for supplying this function.)
|
||||
*/
|
||||
/**
|
||||
* Returns the common log of the double value d.
|
||||
*
|
||||
* @param d the double value to apply the common log function for.
|
||||
* @return the log of value d.
|
||||
*/
|
||||
U_CAPI int16_t U_EXPORT2 icu_log10(double d);
|
||||
|
||||
/**
|
||||
* Returns the number of digits after the decimal point in a double number x.
|
||||
*
|
||||
* @param x the double number
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2 icu_digitsAfterDecimal(double x);
|
||||
|
||||
/**
|
||||
* Time zone utilities
|
||||
*
|
||||
* Wrappers for C runtime library functions relating to timezones.
|
||||
* The t_tzset() function (similar to tzset) uses the current setting
|
||||
* of the environment variable TZ to assign values to three global
|
||||
* variables: daylight, timezone, and tzname. These variables have the
|
||||
* following meanings, and are declared in <time.h>.
|
||||
*
|
||||
* daylight Nonzero if daylight-saving-time zone (DST) is specified
|
||||
* in TZ; otherwise, 0. Default value is 1.
|
||||
* timezone Difference in seconds between coordinated universal
|
||||
* time and local time. E.g., -28,800 for PST (GMT-8hrs)
|
||||
* tzname(0) Three-letter time-zone name derived from TZ environment
|
||||
* variable. E.g., "PST".
|
||||
* tzname(1) Three-letter DST zone name derived from TZ environment
|
||||
* variable. E.g., "PDT". If DST zone is omitted from TZ,
|
||||
* tzname(1) is an empty string.
|
||||
*
|
||||
* Notes: For example, to set the TZ environment variable to correspond
|
||||
* to the current time zone in Germany, you can use one of the
|
||||
* following statements:
|
||||
*
|
||||
* set TZ=GST1GDT
|
||||
* set TZ=GST+1GDT
|
||||
*
|
||||
* If the TZ value is not set, t_tzset() attempts to use the time zone
|
||||
* information specified by the operating system. Under Windows NT
|
||||
* and Windows 95, this information is specified in the Control Panel’s
|
||||
* Date/Time application.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 icu_tzset(void);
|
||||
U_CAPI int32_t U_EXPORT2 icu_timezone(void);
|
||||
U_CAPI char* U_EXPORT2 icu_tzname(int index);
|
||||
|
||||
/* Get UTC (GMT) time measured in seconds since 0:00 on 1/1/70. */
|
||||
U_CAPI int32_t U_EXPORT2 icu_getUTCtime(void);
|
||||
|
||||
/* Return the data directory for this platform. */
|
||||
U_CAPI const char* U_EXPORT2 u_getDataDirectory(void);
|
||||
|
||||
/* Set the data directory. */
|
||||
U_CAPI void U_EXPORT2 u_setDataDirectory(const char *directory);
|
||||
|
||||
/* Return the default codepage for this platform and locale */
|
||||
U_CAPI const char* U_EXPORT2 icu_getDefaultCodepage(void);
|
||||
|
||||
/* Return the default locale ID string by querying ths system, or
|
||||
zero if one cannot be found. */
|
||||
U_CAPI const char* U_EXPORT2 icu_getDefaultLocaleID(void);
|
||||
|
||||
/*
|
||||
* Finds the least double greater than d (if positive == true),
|
||||
* or the greatest double less than d (if positive == false).
|
||||
*
|
||||
* This is a special purpose function defined by the ChoiceFormat API
|
||||
* documentation.
|
||||
* It is not a general purpose function and not defined for NaN or Infinity
|
||||
*/
|
||||
U_CAPI double U_EXPORT2 icu_nextDouble(double d, bool_t positive);
|
||||
|
||||
/**
|
||||
* Filesystem file and path separator characters.
|
||||
* Example: '/' and ':' on Unix, '\\' and ';' on Windows.
|
||||
*/
|
||||
#ifdef XP_MAC
|
||||
# define U_FILE_SEP_CHAR ':'
|
||||
# define U_PATH_SEP_CHAR ';'
|
||||
# define U_FILE_SEP_STRING ":"
|
||||
# define U_PATH_SEP_STRING ";"
|
||||
#elif defined(WIN32) || defined(OS2)
|
||||
# define U_FILE_SEP_CHAR '\\'
|
||||
# define U_PATH_SEP_CHAR ';'
|
||||
# define U_FILE_SEP_STRING "\\"
|
||||
# define U_PATH_SEP_STRING ";"
|
||||
#else
|
||||
# define U_FILE_SEP_CHAR '/'
|
||||
# define U_PATH_SEP_CHAR ':'
|
||||
# define U_FILE_SEP_STRING "/"
|
||||
# define U_PATH_SEP_STRING ":"
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Convert char characters to UChar characters.
|
||||
* This utility function is useful only for "invariant characters"
|
||||
* that are encoded in the platform default encoding.
|
||||
* They are a small, constant subset of the encoding and include
|
||||
* just the latin letters, digits, and some punctuation.
|
||||
* For details, see utypes.h .
|
||||
*
|
||||
* @param cs Input string, points to <code>length</code>
|
||||
* character bytes from a subset of the platform encoding.
|
||||
* @param us Output string, points to memory for <code>length</code>
|
||||
* Unicode characters.
|
||||
* @param length The number of characters to convert; this may
|
||||
* include the terminating <code>NUL</code>.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_charsToUChars(const char *cs, UChar *us, UTextOffset length);
|
||||
|
||||
/**
|
||||
* Convert UChar characters to char characters.
|
||||
* This utility function is useful only for "invariant characters"
|
||||
* that can be encoded in the platform default encoding.
|
||||
* They are a small, constant subset of the encoding and include
|
||||
* just the latin letters, digits, and some punctuation.
|
||||
* For details, see utypes.h .
|
||||
*
|
||||
* @param us Input string, points to <code>length</code>
|
||||
* Unicode characters that can be encoded with the
|
||||
* codepage-invariant subset of the platform encoding.
|
||||
* @param cs Output string, points to memory for <code>length</code>
|
||||
* character bytes.
|
||||
* @param length The number of characters to convert; this may
|
||||
* include the terminating <code>NUL</code>.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_UCharsToChars(const UChar *us, char *cs, UTextOffset length);
|
||||
|
||||
#endif
|
||||
#error Please include unicode/putil.h instead
|
||||
|
|
|
@ -1,90 +1 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* FILE NAME : platform.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/13/98 nos Creation (content moved here from ptypes.h).
|
||||
* 03/02/99 stephen Added AS400 support.
|
||||
* 03/30/99 stephen Added Linux support.
|
||||
* 04/13/99 stephen Reworked for autoconf.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/* Define the platform we're on. */
|
||||
#ifndef WIN32
|
||||
#define WIN32
|
||||
#endif
|
||||
|
||||
/* Define whether inttypes.h is available */
|
||||
#define HAVE_INTTYPES_H 0
|
||||
|
||||
/* Determines whether specific types are available */
|
||||
#define HAVE_INT8_T 0
|
||||
#define HAVE_UINT8_T 0
|
||||
#define HAVE_INT16_T 0
|
||||
#define HAVE_UINT16_T 0
|
||||
#define HAVE_INT32_T 0
|
||||
#define HAVE_UINT32_T 0
|
||||
#define HAVE_BOOL_T 0
|
||||
|
||||
/* Determines the endianness of the platform */
|
||||
#define U_IS_BIG_ENDIAN 0
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Generic data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
/* If your platform does not have the <inttypes.h> header, you may
|
||||
need to edit the typedefs below. */
|
||||
#if HAVE_INTTYPES_H
|
||||
#include <inttypes.h>
|
||||
#else
|
||||
|
||||
#if ! HAVE_INT8_T
|
||||
typedef signed char int8_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_UINT8_T
|
||||
typedef unsigned char uint8_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_INT16_T
|
||||
typedef signed short int16_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_UINT16_T
|
||||
typedef unsigned short uint16_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_INT32_T
|
||||
typedef signed long int32_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_UINT32_T
|
||||
typedef unsigned long uint32_t;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#include <limits.h>
|
||||
#define T_INT32_MAX (LONG_MAX)
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Character data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
#define U_SIZEOF_WCHAR_T 2
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Symbol import-export control */
|
||||
/*===========================================================================*/
|
||||
|
||||
#define U_EXPORT __declspec(dllexport)
|
||||
#define U_EXPORT2
|
||||
#define U_IMPORT __declspec(dllimport)
|
||||
#error Please include unicode/pwin32.h instead
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
*/
|
||||
|
||||
#include "uhash.h"
|
||||
#include "unistr.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
/**
|
||||
* A class which represents an ordinary Hashtable which deletes its contents when it
|
||||
|
|
|
@ -18,9 +18,9 @@
|
|||
#ifndef RBDATA_H
|
||||
#define RBDATA_H 1
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "uhash.h"
|
||||
#include "unistr.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
/**
|
||||
* Abstract base class for data stored in resource bundles. These
|
||||
|
|
|
@ -19,10 +19,10 @@
|
|||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "filestrm.h"
|
||||
#include "ustring.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "rbdata.h"
|
||||
|
||||
#include "unistr.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "rbdata.h"
|
||||
|
||||
|
||||
|
@ -62,8 +62,8 @@ read_ustring(FileStream *rb,
|
|||
while(remain != 0) {
|
||||
|
||||
/* Read the next chunk of data */
|
||||
readLen = icu_min(BUF_SIZE, remain);
|
||||
icu_memset(buf, 0, readLen*sizeof(UChar));
|
||||
readLen = uprv_min(BUF_SIZE, remain);
|
||||
uprv_memset(buf, 0, readLen*sizeof(UChar));
|
||||
T_FileStream_read(rb, buf, sizeof(UChar) * readLen);
|
||||
|
||||
/* Append the chunk to the string */
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
#ifndef RBREAD_H
|
||||
#define RBREAD_H 1
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "filestrm.h"
|
||||
#include "uhash.h"
|
||||
#include "unistr.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
/* Byte order mark for compiled resource bundle files */
|
||||
static const int32_t sBOM = 0x021C;
|
||||
|
|
|
@ -1,123 +1 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 11/17/99 aliu Creation. Ported from java. Modified to
|
||||
* match current UnicodeString API. Forced
|
||||
* to use name "handleReplaceBetween" because
|
||||
* of existing methods in UnicodeString.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef REP_H
|
||||
#define REP_H
|
||||
|
||||
#include "utypes.h"
|
||||
|
||||
class UnicodeString;
|
||||
|
||||
/**
|
||||
* <code>Replaceable</code> is an abstract base class representing a
|
||||
* string of characters that supports the replacement of a range of
|
||||
* itself with a new string of characters. It is used by APIs that
|
||||
* change a piece of text while retaining style attributes. In other
|
||||
* words, an implicit aspect of the <code>Replaceable</code> API is
|
||||
* that during a replace operation, new characters take on the
|
||||
* attributes, if any, of the old characters. For example, if the
|
||||
* string "the <b>bold</b> font" has range (4, 8) replaced with
|
||||
* "strong", then it becomes "the <b>strong</b> font".
|
||||
*
|
||||
* <p><code>Replaceable</code> specifies ranges using an initial
|
||||
* offset and a limit offset. The range of characters thus specified
|
||||
* includes the characters at offset initial..limit-1. That is, the
|
||||
* start offset is inclusive, and the limit offset is exclusive.
|
||||
*
|
||||
* <p><code>Replaceable</code> also includes API to access characters
|
||||
* in the string: <code>length()</code>, <code>charAt()</code>, and
|
||||
* <code>extractBetween()</code>.
|
||||
*
|
||||
* @author Alan Liu
|
||||
*/
|
||||
class U_COMMON_API Replaceable {
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
virtual ~Replaceable();
|
||||
|
||||
/**
|
||||
* Return the number of characters in the text.
|
||||
* @return number of characters in text
|
||||
*/
|
||||
virtual int32_t length() const = 0;
|
||||
|
||||
/**
|
||||
* Return the character at the given offset into the text.
|
||||
* @param offset an integer between 0 and <code>length()</code>-1
|
||||
* inclusive
|
||||
* @return character of text at given offset
|
||||
*/
|
||||
virtual UChar charAt(UTextOffset offset) const = 0;
|
||||
|
||||
/**
|
||||
* Copy characters from this object into the destination character
|
||||
* array. The first character to be copied is at index
|
||||
* <code>srcStart</code>; the last character to be copied is at
|
||||
* index <code>srcLimit-1</code> (thus the total number of
|
||||
* characters to be copied is <code>srcLimit-srcStart</code>). The
|
||||
* characters are copied into the subarray of <code>dst</code>
|
||||
* starting at index <code>dstStart</code> and ending at index
|
||||
* <code>dstStart + (srcLimit-srcStart) - 1</code>.
|
||||
*
|
||||
* @param srcStart the beginning index to copy, inclusive; <code>0
|
||||
* <= srcStart <= srcLimit</code>.
|
||||
* @param srcLimit the ending index to copy, exclusive;
|
||||
* <code>srcStart <= srcLimit <= length()</code>.
|
||||
* @param dst the destination array.
|
||||
* @param dstStart the start offset in the destination array. */
|
||||
virtual void extractBetween(UTextOffset srcStart,
|
||||
UTextOffset srcLimit,
|
||||
UChar* dst,
|
||||
UTextOffset dstStart = 0) const = 0;
|
||||
|
||||
/**
|
||||
* Replace a substring of this object with the given text. If the
|
||||
* characters being replaced have attributes, the new characters
|
||||
* that replace them should be given the same attributes.
|
||||
*
|
||||
* @param start the beginning index, inclusive; <code>0 <= start
|
||||
* <= limit</code>.
|
||||
* @param limit the ending index, exclusive; <code>start <= limit
|
||||
* <= length()</code>.
|
||||
* @param text the text to replace characters <code>start</code>
|
||||
* to <code>limit - 1</code> */
|
||||
virtual void handleReplaceBetween(UTextOffset start,
|
||||
UTextOffset limit,
|
||||
const UnicodeString& text) = 0;
|
||||
// Note: All other methods in this class take the names of
|
||||
// existing UnicodeString methods. This method is the exception.
|
||||
// It is named differently because all replace methods of
|
||||
// UnicodeString return a UnicodeString&. The 'between' is
|
||||
// required in order to conform to the UnicodeString naming
|
||||
// convention; API taking start/length are named <operation>, and
|
||||
// those taking start/limit are named <operationBetween>. The
|
||||
// 'handle' is added because 'replaceBetween' and
|
||||
// 'doReplaceBetween' are already taken.
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* Default constructor.
|
||||
*/
|
||||
Replaceable();
|
||||
};
|
||||
|
||||
inline Replaceable::Replaceable() {}
|
||||
|
||||
inline Replaceable::~Replaceable() {}
|
||||
|
||||
#endif
|
||||
#error Please include unicode/rep.h instead
|
||||
|
|
|
@ -47,7 +47,7 @@
|
|||
|
||||
#include "rbcache.h"
|
||||
|
||||
#include "resbund.h"
|
||||
#include "unicode/resbund.h"
|
||||
#include "mutex.h"
|
||||
|
||||
#include "unistrm.h"
|
||||
|
@ -342,9 +342,9 @@ ResourceBundle::ResourceBundle(const wchar_t* path,
|
|||
: fgCache(fgUserCache),
|
||||
fgVisitedFiles(fgUserVisitedFiles)
|
||||
{
|
||||
int32_t wideNameLen = icu_mbstowcs(NULL, kDefaultSuffix, kDefaultSuffixLen);
|
||||
int32_t wideNameLen = uprv_mbstowcs(NULL, kDefaultSuffix, kDefaultSuffixLen);
|
||||
wchar_t* wideName = new wchar_t[wideNameLen + 1];
|
||||
icu_mbstowcs(wideName, kDefaultSuffix, kDefaultSuffixLen);
|
||||
uprv_mbstowcs(wideName, kDefaultSuffix, kDefaultSuffixLen);
|
||||
wideName[wideNameLen] = 0;
|
||||
constructForLocale(PathInfo(path, wideName), locale, err);
|
||||
delete [] wideName;
|
||||
|
@ -914,25 +914,25 @@ ResourceBundle::getVersionNumber() const
|
|||
// the length of the major part + the length of the separator
|
||||
// (==1) + the length of the minor part (+ 1 for the zero byte at
|
||||
// the end).
|
||||
int32_t len = icu_strlen(ICU_VERSION);
|
||||
int32_t len = uprv_strlen(ICU_VERSION);
|
||||
int32_t minor_len = 0;
|
||||
if(U_SUCCESS(status) && minor_version.length() > 0)
|
||||
minor_len = minor_version.length();
|
||||
len += (minor_len > 0) ? minor_len : 1 /*==icu_strlen(kDefaultMinorVersion)*/;
|
||||
len += (minor_len > 0) ? minor_len : 1 /*==uprv_strlen(kDefaultMinorVersion)*/;
|
||||
++len; // Add length of separator
|
||||
|
||||
// Allocate the string, and build it up.
|
||||
// + 1 for zero byte
|
||||
((ResourceBundle*)this)->fVersionID = new char[1 + len];
|
||||
|
||||
icu_strcpy(fVersionID, ICU_VERSION);
|
||||
icu_strcat(fVersionID, kVersionSeparator);
|
||||
uprv_strcpy(fVersionID, ICU_VERSION);
|
||||
uprv_strcat(fVersionID, kVersionSeparator);
|
||||
if(minor_len > 0) {
|
||||
minor_version.extract(0, minor_len, fVersionID + len - minor_len);
|
||||
fVersionID[len] = 0;
|
||||
}
|
||||
else {
|
||||
icu_strcat(fVersionID, kDefaultMinorVersion);
|
||||
uprv_strcat(fVersionID, kDefaultMinorVersion);
|
||||
}
|
||||
}
|
||||
return fVersionID;
|
||||
|
@ -1148,10 +1148,10 @@ ResourceBundle::PathInfo::PathInfo(const PathInfo& source)
|
|||
fWPrefix(NULL), fWSuffix(NULL)
|
||||
{
|
||||
if(source.fWPrefix) {
|
||||
fWPrefix = new wchar_t[icu_wcslen(source.fWPrefix)+1];
|
||||
fWSuffix = new wchar_t[icu_wcslen(source.fWSuffix)+1];
|
||||
icu_wcscpy(fWPrefix, source.fWPrefix);
|
||||
icu_wcscpy(fWSuffix, source.fWSuffix);
|
||||
fWPrefix = new wchar_t[uprv_wcslen(source.fWPrefix)+1];
|
||||
fWSuffix = new wchar_t[uprv_wcslen(source.fWSuffix)+1];
|
||||
uprv_wcscpy(fWPrefix, source.fWPrefix);
|
||||
uprv_wcscpy(fWSuffix, source.fWSuffix);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1176,10 +1176,10 @@ ResourceBundle::PathInfo::PathInfo(const wchar_t* path,
|
|||
fWPrefix(NULL),
|
||||
fWSuffix(NULL)
|
||||
{
|
||||
fWPrefix = new wchar_t[icu_wcslen(path)+1];
|
||||
fWSuffix = new wchar_t[icu_wcslen(suffix)+1];
|
||||
icu_wcscpy(fWPrefix, path);
|
||||
icu_wcscpy(fWSuffix, suffix);
|
||||
fWPrefix = new wchar_t[uprv_wcslen(path)+1];
|
||||
fWSuffix = new wchar_t[uprv_wcslen(suffix)+1];
|
||||
uprv_wcscpy(fWPrefix, path);
|
||||
uprv_wcscpy(fWSuffix, suffix);
|
||||
}
|
||||
|
||||
ResourceBundle::PathInfo::~PathInfo()
|
||||
|
@ -1195,10 +1195,10 @@ ResourceBundle::PathInfo::operator=(const PathInfo& source)
|
|||
wchar_t* tempPref = NULL;
|
||||
wchar_t* tempSuff = NULL;
|
||||
if(source.fWPrefix) {
|
||||
tempPref = new wchar_t[icu_wcslen(source.fWPrefix)+1];
|
||||
tempSuff = new wchar_t[icu_wcslen(source.fWSuffix)+1];
|
||||
icu_wcscpy(tempPref, source.fWPrefix);
|
||||
icu_wcscpy(tempSuff, source.fWSuffix);
|
||||
tempPref = new wchar_t[uprv_wcslen(source.fWPrefix)+1];
|
||||
tempSuff = new wchar_t[uprv_wcslen(source.fWSuffix)+1];
|
||||
uprv_wcscpy(tempPref, source.fWPrefix);
|
||||
uprv_wcscpy(tempSuff, source.fWSuffix);
|
||||
}
|
||||
delete fWPrefix;
|
||||
fWPrefix = tempPref;
|
||||
|
@ -1229,18 +1229,18 @@ ResourceBundle::PathInfo::makeCacheKey(const UnicodeString& name) const
|
|||
if(fWPrefix) {
|
||||
UnicodeString key;
|
||||
|
||||
size_t prefSize = icu_wcstombs(NULL, fWPrefix, ((size_t)-1) >> 1);
|
||||
size_t suffSize = icu_wcstombs(NULL, fWSuffix, ((size_t)-1) >> 1);
|
||||
size_t tempSize = icu_max((int32_t)prefSize, (int32_t)suffSize);
|
||||
size_t prefSize = uprv_wcstombs(NULL, fWPrefix, ((size_t)-1) >> 1);
|
||||
size_t suffSize = uprv_wcstombs(NULL, fWSuffix, ((size_t)-1) >> 1);
|
||||
size_t tempSize = uprv_max((int32_t)prefSize, (int32_t)suffSize);
|
||||
char *temp = new char[tempSize + 1];
|
||||
|
||||
tempSize = icu_wcstombs(temp, fWPrefix, prefSize);
|
||||
tempSize = uprv_wcstombs(temp, fWPrefix, prefSize);
|
||||
temp[tempSize] = 0;
|
||||
key += UnicodeString(temp);
|
||||
|
||||
key += name;
|
||||
|
||||
tempSize = icu_wcstombs(temp, fWSuffix, suffSize);
|
||||
tempSize = uprv_wcstombs(temp, fWSuffix, suffSize);
|
||||
temp[tempSize] = 0;
|
||||
key += UnicodeString(temp);
|
||||
|
||||
|
@ -1265,18 +1265,18 @@ ResourceBundle::PathInfo::makeHashkey(const UnicodeString& localeName) const
|
|||
|
||||
key += kSeparator;
|
||||
|
||||
size_t prefSize = icu_wcstombs(NULL, fWPrefix, ((size_t)-1) >> 1);
|
||||
size_t suffSize = icu_wcstombs(NULL, fWSuffix, ((size_t)-1) >> 1);
|
||||
size_t tempSize = icu_max((int32_t)prefSize, (int32_t)suffSize);
|
||||
size_t prefSize = uprv_wcstombs(NULL, fWPrefix, ((size_t)-1) >> 1);
|
||||
size_t suffSize = uprv_wcstombs(NULL, fWSuffix, ((size_t)-1) >> 1);
|
||||
size_t tempSize = uprv_max((int32_t)prefSize, (int32_t)suffSize);
|
||||
char *temp = new char[tempSize + 1];
|
||||
|
||||
tempSize = icu_wcstombs(temp, fWSuffix, suffSize);
|
||||
tempSize = uprv_wcstombs(temp, fWSuffix, suffSize);
|
||||
temp[tempSize] = 0;
|
||||
key += UnicodeString(temp);
|
||||
|
||||
key += kSeparator;
|
||||
|
||||
tempSize = icu_wcstombs(temp, fWPrefix, prefSize);
|
||||
tempSize = uprv_wcstombs(temp, fWPrefix, prefSize);
|
||||
temp[tempSize] = 0;
|
||||
key += UnicodeString(temp);
|
||||
|
||||
|
@ -1303,29 +1303,29 @@ ResourceBundle::PathInfo::openFile(const UnicodeString& localeName) const
|
|||
char* temp = new char[nameSize + 1];
|
||||
localeName.extract(0, nameSize, temp);
|
||||
temp[nameSize] = 0;
|
||||
int32_t wideNameLen = icu_mbstowcs(NULL, temp, nameSize);
|
||||
int32_t wideNameLen = uprv_mbstowcs(NULL, temp, nameSize);
|
||||
wchar_t* wideName = new wchar_t[wideNameLen + 1];
|
||||
icu_mbstowcs(wideName, temp, nameSize);
|
||||
uprv_mbstowcs(wideName, temp, nameSize);
|
||||
wideName[wideNameLen] = 0;
|
||||
delete [] temp;
|
||||
|
||||
size_t prefLen = icu_wcslen(fWPrefix);
|
||||
size_t suffLen = icu_wcslen(fWSuffix);
|
||||
size_t prefLen = uprv_wcslen(fWPrefix);
|
||||
size_t suffLen = uprv_wcslen(fWSuffix);
|
||||
|
||||
int32_t destSize = prefLen + suffLen + wideNameLen;
|
||||
wchar_t* dest = new wchar_t[destSize + 1];
|
||||
icu_wcscpy(dest, fWPrefix);
|
||||
uprv_wcscpy(dest, fWPrefix);
|
||||
dest[prefLen] = 0;
|
||||
|
||||
icu_wcscat(dest, wideName);
|
||||
uprv_wcscat(dest, wideName);
|
||||
dest[prefLen + wideNameLen] = 0;
|
||||
|
||||
icu_wcscat(dest, fWSuffix);
|
||||
uprv_wcscat(dest, fWSuffix);
|
||||
dest[destSize] = 0;
|
||||
|
||||
int32_t fmodeLen = icu_mbstowcs(NULL, "rb", 2);
|
||||
int32_t fmodeLen = uprv_mbstowcs(NULL, "rb", 2);
|
||||
wchar_t* fmode = new wchar_t[fmodeLen + 1];
|
||||
icu_mbstowcs(fmode, "rb", 2);
|
||||
uprv_mbstowcs(fmode, "rb", 2);
|
||||
fmode[fmodeLen] = 0;
|
||||
|
||||
FileStream* result = T_FileStream_wopen(dest, fmode);
|
||||
|
|
|
@ -1,633 +1 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1996-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* File resbund.h
|
||||
*
|
||||
* CREATED BY
|
||||
* Richard Gillam
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 2/5/97 aliu Added scanForLocaleInFile. Added
|
||||
* constructor which attempts to read resource bundle
|
||||
* from a specific file, without searching other files.
|
||||
* 2/11/97 aliu Added UErrorCode return values to constructors. Fixed
|
||||
* infinite loops in scanForFile and scanForLocale.
|
||||
* Modified getRawResourceData to not delete storage in
|
||||
* localeData and resourceData which it doesn't own.
|
||||
* Added Mac compatibility #ifdefs for tellp() and
|
||||
* ios::nocreate.
|
||||
* 2/18/97 helena Updated with 100% documentation coverage.
|
||||
* 3/13/97 aliu Rewrote to load in entire resource bundle and store
|
||||
* it as a Hashtable of ResourceBundleData objects.
|
||||
* Added state table to govern parsing of files.
|
||||
* Modified to load locale index out of new file distinct
|
||||
* from default.txt.
|
||||
* 3/25/97 aliu Modified to support 2-d arrays, needed for timezone data.
|
||||
* Added support for custom file suffixes. Again, needed to
|
||||
* support timezone data.
|
||||
* 4/7/97 aliu Cleaned up.
|
||||
* 03/02/99 stephen Removed dependency on FILE*.
|
||||
* 03/29/99 helena Merged Bertrand and Stephen's changes.
|
||||
* 06/11/99 stephen Removed parsing of .txt files.
|
||||
* Reworked to use new binary format.
|
||||
* Cleaned up.
|
||||
* 06/14/99 stephen Removed methods taking a filename suffix.
|
||||
* 11/09/99 weiv Added getLocale(), fRealLocale, removed fRealLocaleID
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef RESBUND_H
|
||||
#define RESBUND_H
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unistr.h"
|
||||
#include "locid.h"
|
||||
#include <wchar.h>
|
||||
|
||||
|
||||
class RBHashtable;
|
||||
class ResourceBundleData;
|
||||
class ResourceBundleCache;
|
||||
class VisitedFileCache;
|
||||
#ifndef _FILESTRM
|
||||
typedef struct _FileStream FileStream;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* A class representing a collection of resource information pertaining to a given
|
||||
* locale. A resource bundle provides a way of accessing locale- specfic information in
|
||||
* a data file. You create a resource bundle that manages the resources for a given
|
||||
* locale and then ask it for individual resources.
|
||||
* <P>
|
||||
* The resource bundle file is a text (ASCII or Unicode) file with the format:
|
||||
* <pre>
|
||||
* . locale {
|
||||
* . tag1 {...}
|
||||
* . tag2 {...}
|
||||
* . }
|
||||
* </pre>
|
||||
* The tags are used to retrieve the data later. You may not have multiple instances of
|
||||
* the same tag.
|
||||
* <P>
|
||||
* Four data types are supported. These are solitary strings, comma-delimited lists of
|
||||
* strings, 2-dimensional arrays of strings, and tagged lists of strings.
|
||||
* <P>
|
||||
* Note that all data is textual. Adjacent strings are merged by the low-level
|
||||
* tokenizer, so that the following effects occur: foo bar, baz // 2 elements, "foo
|
||||
* bar", and "baz" "foo" "bar", baz // 2 elements, "foobar", and "baz" Note that a
|
||||
* single intervening space is added between merged strings, unless they are both double
|
||||
* quoted. This extends to more than two strings in a row.
|
||||
* <P>
|
||||
* Whitespace is ignored, as in a C source file.
|
||||
* <P>
|
||||
* Solitary strings have the format:
|
||||
* <pre>
|
||||
* . Tag { Data }
|
||||
* </pre>
|
||||
* This is indistinguishable from a comma-delimited list with only one element, and in
|
||||
* fact may be retrieved as such (as an array, or as element 0 or an array).
|
||||
* <P>
|
||||
* Comma-delimited lists have the format:
|
||||
* <pre>
|
||||
* . Tag { Data, Data, Data }
|
||||
* </pre>
|
||||
* Parsing is lenient; a final string, after the last element, is allowed.
|
||||
* <P>
|
||||
* Tagged lists have the format:
|
||||
* <pre>
|
||||
* . Tag { Subtag { Data } Subtag {Data} }
|
||||
* </pre>
|
||||
* Data is retrieved by specifying the subtag.
|
||||
* <P>
|
||||
* Two-dimensional arrays have the format:
|
||||
* <pre>
|
||||
* . TwoD {
|
||||
* . { r1c1, r1c2, ..., r1cm },
|
||||
* . { r2c1, r2c2, ..., r2cm },
|
||||
* . ...
|
||||
* . { rnc1, rnc2, ..., rncm }
|
||||
* . }
|
||||
* </pre>
|
||||
* where n is the number of rows, and m is the number of columns. Parsing is lenient (as
|
||||
* in other data types). A final comma is always allowed after the last element; either
|
||||
* the last string in a row, or the last row itself. Furthermore, since there is no
|
||||
* ambiguity, the commas between the rows are entirely optional. (However, if a comma is
|
||||
* present, there can only be one comma, no more.) It is possible to have zero columns,
|
||||
* as follows:
|
||||
* <pre>
|
||||
* . Odd { {} {} {} } // 3 x 0 array
|
||||
* </pre>
|
||||
* But it is impossible to have zero rows. The smallest array is thus a 1 x 0 array,
|
||||
* which looks like this:
|
||||
* <pre>
|
||||
* . Smallest { {} } // 1 x 0 array
|
||||
* </pre>
|
||||
* The array must be strictly rectangular; that is, each row must have the same number
|
||||
* of elements.
|
||||
* <P>
|
||||
* This is an example for using a possible custom resource:
|
||||
* <pre>
|
||||
* . Locale currentLocale;
|
||||
* . UErrorCode success = U_ZERO_ERROR;
|
||||
* . ResourceBundle myResources("MyResources", currentLocale, success );
|
||||
* .
|
||||
* . UnicodeString button1Title, button2Title;
|
||||
* . myResources.getString("OkKey", button1Title, success );
|
||||
* . myResources.getString("CancelKey", button2Title, success );
|
||||
* </pre>
|
||||
*/
|
||||
class U_COMMON_API ResourceBundle {
|
||||
public:
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @param path This is a full pathname in the platform-specific format for the
|
||||
* directory containing the resource data files we want to load
|
||||
* resources from. We use locale IDs to generate filenames, and the
|
||||
* filenames have this string prepended to them before being passed
|
||||
* to the C++ I/O functions. Therefore, this string must always end
|
||||
* with a directory delimiter (whatever that is for the target OS)
|
||||
* for this class to work correctly.
|
||||
* @param locale This is the locale this resource bundle is for. To get resources
|
||||
* for the French locale, for example, you would create a
|
||||
* ResourceBundle passing Locale::FRENCH for the "locale" parameter,
|
||||
* and all subsequent calls to that resource bundle will return
|
||||
* resources that pertain to the French locale. If the caller doesn't
|
||||
* pass a locale parameter, the default locale for the system (as
|
||||
* returned by Locale::getDefault()) will be used.
|
||||
* The UErrorCode& err parameter is used to return status information to the user. To
|
||||
* check whether the construction succeeded or not, you should check the value of
|
||||
* U_SUCCESS(err). If you wish more detailed information, you can check for
|
||||
* informational error results which still indicate success. U_USING_FALLBACK_ERROR
|
||||
* indicates that a fall back locale was used. For example, 'de_CH' was requested,
|
||||
* but nothing was found there, so 'de' was used. U_USING_DEFAULT_ERROR indicates that
|
||||
* the default locale data was used; neither the requested locale nor any of its
|
||||
* fall back locales could be found.
|
||||
*/
|
||||
ResourceBundle( const UnicodeString& path,
|
||||
const Locale& locale,
|
||||
UErrorCode& err);
|
||||
ResourceBundle( const UnicodeString& path,
|
||||
UErrorCode& err);
|
||||
ResourceBundle( const wchar_t* path,
|
||||
const Locale& locale,
|
||||
UErrorCode& err);
|
||||
~ResourceBundle();
|
||||
|
||||
/**
|
||||
* Returns the contents of a string resource. Resource data is undifferentiated
|
||||
* Unicode text. The resource file may contain quoted strings or escape sequences;
|
||||
* these will be parsed prior to the data's return.
|
||||
* [THIS FUNCTION IS DERECATED; USE THE OVERLOAD BELOW INSTEAD]
|
||||
*
|
||||
* @param resourceTag The resource tag of the string resource the caller wants
|
||||
* @param theString Receives the actual data in the resource
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified tag couldn't be found.
|
||||
*/
|
||||
void getString( const char *resourceTag,
|
||||
UnicodeString& theString,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Returns the contents of a string resource. Resource data is undifferentiated
|
||||
* Unicode text. The resource file may contain quoted strings or escape sequences;
|
||||
* these will be parsed prior to the data's return.
|
||||
*
|
||||
* @param resourceTag The resource tag of the string resource the caller wants
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified tag couldn't be found.
|
||||
* @return A pointer to the string from the resource bundle, or NULL if there was
|
||||
* an error.
|
||||
*/
|
||||
const UnicodeString* getString( const char *resourceTag,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Returns the contents of a string-array resource. This will return the contents of
|
||||
* a string-array (comma-delimited-list) resource as a C++ array of UnicodeString
|
||||
* objects. The number of elements in the array is returned in numArrayItems.
|
||||
* Calling getStringArray on a resource of type string will return an array with one
|
||||
* element; calling it on a resource of type tagged-array results in a
|
||||
* U_MISSING_RESOURCE_ERROR error.
|
||||
*
|
||||
* @param resourceTag The resource tag of the string-array resource the caller
|
||||
* wants
|
||||
* @param numArrayItems Receives the number of items in the array the function
|
||||
* returns.
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified tag couldn't be found.
|
||||
* @return The resource requested, as a pointer to an array of
|
||||
* UnicodeStrings. The caller does not own the storage and
|
||||
* must not delete it.
|
||||
*/
|
||||
const UnicodeString* getStringArray( const char *resourceTag,
|
||||
int32_t& numArrayItems,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Returns a single item from a string-array resource. This will return the contents
|
||||
* of a single item in a resource of string-array (comma-delimited-list) type. If
|
||||
* the resource is not an array, a U_MISSING_RESOURCE_ERROR will be returned in err.
|
||||
* [THIS FUNCTION IS DEPRECATED; USE THE OVERLOAD BELOW INSTEAD]
|
||||
*
|
||||
* @param resourceTag The resource tag of the resource the caller wants to extract
|
||||
* an item from.
|
||||
* @param index The index (zero-based) of the particular array item the user
|
||||
* wants to extract from the resource.
|
||||
* @param theArrayItem Receives the actual text of the desired array item.
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified tag couldn't be found, or if the index was out of range.
|
||||
*/
|
||||
void getArrayItem( const char *resourceTag,
|
||||
int32_t index,
|
||||
UnicodeString& theArrayItem,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Returns a single item from a string-array resource. This will return the contents
|
||||
* of a single item in a resource of string-array (comma-delimited-list) type. If
|
||||
* the resource is not an array, a U_MISSING_RESOURCE_ERROR will be returned in err.
|
||||
*
|
||||
* @param resourceTag The resource tag of the resource the caller wants to extract
|
||||
* an item from.
|
||||
* @param index The index (zero-based) of the particular array item the user
|
||||
* wants to extract from the resource.
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified tag couldn't be found, or if the index was out of range.
|
||||
* @return A pointer to the text of the array item, or NULL is there was an error.
|
||||
*/
|
||||
const UnicodeString* getArrayItem( const char *resourceTag,
|
||||
int32_t index,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Return the contents of a 2-dimensional array resource. The return value will be a
|
||||
* UnicodeString** array. (This is really an array of pointers; each pointer is a
|
||||
* ROW of the data.) The number of rows and columns is returned. If the resource is
|
||||
* of the wrong type, or not present, U_MISSING_RESOURCE_ERROR is placed in err.
|
||||
*
|
||||
* @param resourceTag The resource tag of the string-array resource the caller
|
||||
* wants
|
||||
* @param rowCount Receives the number of rows in the array the function
|
||||
* returns.
|
||||
* @param columnCount Receives the number of columns in the array the function
|
||||
* returns.
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified tag couldn't be found.
|
||||
* @return The resource requested, as a UnicodeStrings**. The caller
|
||||
* does not own the storage and must not delete it.
|
||||
*/
|
||||
const UnicodeString** get2dArray(const char *resourceTag,
|
||||
int32_t& rowCount,
|
||||
int32_t& columnCount,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Return a single string from a 2-dimensional array resource. If the resource does
|
||||
* not exists, or if it is not a 2-d array, or if the row or column indices are out
|
||||
* of bounds, err is set to U_MISSING_RESOURCE_ERROR.
|
||||
* [THIS FUNCTION IS DEPRECATED; USE THE OVERLOAD BELOW INSTEAD]
|
||||
*
|
||||
* @param resourceTag The resource tag of the resource the caller wants to extract
|
||||
* an item from.
|
||||
* @param rowIndex The row index (zero-based) of the array item the user wants
|
||||
* to extract from the resource.
|
||||
* @param columnIndex The column index (zero-based) of the array item the user
|
||||
* wants to extract from the resource.
|
||||
* @param theArrayItem Receives the actual text of the desired array item.
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified tag couldn't be found, if the resource data was in
|
||||
* the wrong format, or if either index is out of bounds.
|
||||
*/
|
||||
void get2dArrayItem(const char *resourceTag,
|
||||
int32_t rowIndex,
|
||||
int32_t columnIndex,
|
||||
UnicodeString& theArrayItem,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Return a single string from a 2-dimensional array resource. If the resource does
|
||||
* not exists, or if it is not a 2-d array, or if the row or column indices are out
|
||||
* of bounds, err is set to U_MISSING_RESOURCE_ERROR.
|
||||
*
|
||||
* @param resourceTag The resource tag of the resource the caller wants to extract
|
||||
* an item from.
|
||||
* @param rowIndex The row index (zero-based) of the array item the user wants
|
||||
* to extract from the resource.
|
||||
* @param columnIndex The column index (zero-based) of the array item the user
|
||||
* wants to extract from the resource.
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified tag couldn't be found, if the resource data was in
|
||||
* the wrong format, or if either index is out of bounds.
|
||||
* @return A pointer to the text of the array item, or NULL is there was an error.
|
||||
*/
|
||||
const UnicodeString* get2dArrayItem( const char *resourceTag,
|
||||
int32_t rowIndex,
|
||||
int32_t columnIndex,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Returns a single item from a tagged-array resource This will return the contents
|
||||
* of a single item in a resource of type tagged-array. If this function is called
|
||||
* for a resource that is not of type tagged-array, it will set err to
|
||||
* MISSING_RESOUCE_ERROR.
|
||||
* [THIS FUNCTION IS DEPRECATED; USE THE OVERLOAD BELOW INSTEAD]
|
||||
*
|
||||
* @param resourceTag The resource tag of the resource the caller wants to extract
|
||||
* an item from.
|
||||
* @param itemTag The item tag for the item the caller wants to extract.
|
||||
* @param theArrayItem Receives the text of the desired array item.
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified resource tag couldn't be found, or if an item
|
||||
* with the specified item tag couldn't be found in the resource.
|
||||
*/
|
||||
void getTaggedArrayItem( const char *resourceTag,
|
||||
const UnicodeString& itemTag,
|
||||
UnicodeString& theArrayItem,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Returns a single item from a tagged-array resource This will return the contents
|
||||
* of a single item in a resource of type tagged-array. If this function is called
|
||||
* for a resource that is not of type tagged-array, it will set err to
|
||||
* MISSING_RESOUCE_ERROR.
|
||||
*
|
||||
* @param resourceTag The resource tag of the resource the caller wants to extract
|
||||
* an item from.
|
||||
* @param itemTag The item tag for the item the caller wants to extract.
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified resource tag couldn't be found, or if an item
|
||||
* with the specified item tag coldn't be found in the resource.
|
||||
* @return A pointer to the text of the array item, or NULL is there was an error.
|
||||
*/
|
||||
const UnicodeString* getTaggedArrayItem( const char *resourceTag,
|
||||
const UnicodeString& itemTag,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Returns a tagged-array resource. The contents of the resource is returned as two
|
||||
* separate arrays of UnicodeStrings, the addresses of which are placed in "itemTags"
|
||||
* and "items". After calling this function, the items in the resource will be in the
|
||||
* list pointed to by "items", and for each items[i], itemTags[i] will be the tag that
|
||||
* corresponds to it. The total number of entries in both arrays is returned in
|
||||
* numItems.
|
||||
*
|
||||
* @param resourceTag The resource tag of the resource the caller wants to extract
|
||||
* an item from.
|
||||
* @param itemTags Set to point to an array of UnicodeStrings representing the
|
||||
* tags in the specified resource. The caller DOES own this array,
|
||||
* and must delete it.
|
||||
* @param items Set to point to an array of UnicodeStrings containing the
|
||||
* individual resource items themselves. itemTags[i] will
|
||||
* contain the tag corresponding to items[i]. The caller DOES
|
||||
* own this array, and must delete it.
|
||||
* @param numItems Receives the number of items in the arrays pointed to by
|
||||
* items and itemTags.
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified tag couldn't be found.
|
||||
*/
|
||||
void getTaggedArray( const char *resourceTag,
|
||||
UnicodeString*& itemTags,
|
||||
UnicodeString*& items,
|
||||
int32_t& numItems,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Return the version number associated with this ResourceBundle. This version
|
||||
* number is a string of the form MAJOR.MINOR, where MAJOR is the version number of
|
||||
* the current analytic code package, and MINOR is the version number contained in
|
||||
* the resource file as the value of the tag "Version". A change in the MINOR
|
||||
* version indicated an updated data file. A change in the MAJOR version indicates a
|
||||
* new version of the code which is not binary-compatible with the previous version.
|
||||
* If no "Version" tag is present in a resource file, the MINOR version "0" is assigned.
|
||||
*
|
||||
* For example, if the Collation sort key algorithm changes, the MAJOR version
|
||||
* increments. If the collation data in a resource file changes, the MINOR version
|
||||
* for that file increments.
|
||||
*
|
||||
* @return A string of the form N.n, where N is the major version number,
|
||||
* representing the code version, and n is the minor version number,
|
||||
* representing the resource data file. The caller does not own this
|
||||
* string.
|
||||
*/
|
||||
const char* getVersionNumber(void) const;
|
||||
|
||||
/**
|
||||
* Return the Locale associated with this ResourceBundle.
|
||||
*
|
||||
* @return a Locale object
|
||||
*/
|
||||
const Locale &getLocale(void) const ;
|
||||
|
||||
private:
|
||||
class U_COMMON_API PathInfo {
|
||||
public:
|
||||
PathInfo();
|
||||
PathInfo(const PathInfo& source);
|
||||
PathInfo(const UnicodeString& path);
|
||||
PathInfo(const UnicodeString& path, const UnicodeString& suffix);
|
||||
PathInfo(const wchar_t* path, const wchar_t* suffix);
|
||||
~PathInfo();
|
||||
|
||||
PathInfo& operator=(const PathInfo& source);
|
||||
|
||||
bool_t fileExists(const UnicodeString& localeName) const;
|
||||
UnicodeString makeCacheKey(const UnicodeString& localeName) const;
|
||||
UnicodeString makeHashkey(const UnicodeString& localeName) const;
|
||||
FileStream* openFile(const UnicodeString& localeName) const;
|
||||
|
||||
private:
|
||||
static const UChar kSeparator;
|
||||
UnicodeString fPrefix;
|
||||
UnicodeString fSuffix;
|
||||
wchar_t* fWPrefix;
|
||||
wchar_t* fWSuffix;
|
||||
};
|
||||
|
||||
private:
|
||||
friend class Locale;
|
||||
friend class RuleBasedCollator;
|
||||
friend int32_t T_ResourceBundle_countArrayItemsImplementation(const ResourceBundle* resourceBundle,
|
||||
const char* resourceKey,
|
||||
UErrorCode& err) ;
|
||||
friend const UnicodeString** listInstalledLocalesImplementation(const char* path,
|
||||
int32_t* numInstalledLocales);
|
||||
friend void getTaggedArrayUCharsImplementation(
|
||||
const ResourceBundle* bundle,
|
||||
const char *resourceTag,
|
||||
UChar const** itemTags,
|
||||
UChar const** items,
|
||||
int32_t maxItems,
|
||||
int32_t& numItems,
|
||||
UErrorCode& err);
|
||||
|
||||
|
||||
/**
|
||||
* This constructor is used by Collation to load a resource bundle from a specific
|
||||
* file, without trying other files. This is used by the Collation caching
|
||||
* mechanism.
|
||||
*/
|
||||
ResourceBundle( const UnicodeString& path,
|
||||
const UnicodeString& localeName,
|
||||
UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Return a list of all installed locales. This function returns a list of the IDs
|
||||
* of all locales represented in the directory specified by this ResourceBundle. It
|
||||
* depends on that directory having an "Index" tagged-list item in its "index.txt"
|
||||
* file; it parses that list to determine its return value (therefore, that list
|
||||
* also has to be up to date). This function is static.
|
||||
*
|
||||
* This function is the implementation of the Locale::listInstalledLocales()
|
||||
* function. It's private because the API for it real;ly belongs in Locale.
|
||||
*
|
||||
* @param path The path to the locale data files. The function will
|
||||
* look here for "index.txt".
|
||||
* @param numInstalledLocales Receives the number of installed locales, according
|
||||
* to the Index resource in index.txt.
|
||||
* @return A list of the installed locales, as a pointer to an
|
||||
* array of UnicodeStrings. This storage is not owned by
|
||||
* the caller, who must not delete it. The information
|
||||
* in this list is derived from the Index resource in
|
||||
* default.txt, which must be kept up to date.
|
||||
*/
|
||||
static const UnicodeString* listInstalledLocales(const UnicodeString& path,
|
||||
int32_t& numInstalledLocales);
|
||||
|
||||
/**
|
||||
* Retrieve a ResourceBundle from the cache. Return NULL if not found.
|
||||
*/
|
||||
static const UHashtable* getFromCache(const PathInfo& path,
|
||||
const UnicodeString& localeName,
|
||||
ResourceBundleCache* someCache);
|
||||
|
||||
static const UHashtable* getFromCacheWithFallback(const PathInfo& path,
|
||||
const UnicodeString& desiredLocale,
|
||||
UnicodeString& returnedLocale,
|
||||
ResourceBundleCache* someCache,
|
||||
UErrorCode& error);
|
||||
|
||||
/**
|
||||
* Handlers which are passed to parse() have this signature.
|
||||
*/
|
||||
typedef void (*Handler)(const UnicodeString& localeName,
|
||||
UHashtable* hashtable,
|
||||
void* context,
|
||||
ResourceBundleCache* someCache);
|
||||
|
||||
/**
|
||||
* Parse a file, storing the resource data in the cache.
|
||||
*/
|
||||
static void parse(const PathInfo& path,
|
||||
const UnicodeString& localeName,
|
||||
Handler handler,
|
||||
void* context,
|
||||
ResourceBundleCache* someCache,
|
||||
UErrorCode &error);
|
||||
|
||||
/**
|
||||
* If the given file exists and has not been parsed, then parse it (caching the
|
||||
* resultant data) and return true.
|
||||
*/
|
||||
static bool_t parseIfUnparsed(const PathInfo& path,
|
||||
const UnicodeString& locale,
|
||||
ResourceBundleCache* fCache,
|
||||
VisitedFileCache* vCache,
|
||||
UErrorCode& error);
|
||||
|
||||
const UHashtable* getHashtableForLocale(const UnicodeString& localeName,
|
||||
UnicodeString& returnedLocale,
|
||||
UErrorCode& err);
|
||||
|
||||
const UHashtable* getHashtableForLocale(const UnicodeString& desiredLocale,
|
||||
UErrorCode& error);
|
||||
|
||||
const ResourceBundleData* getDataForTag(const char *tag,
|
||||
UErrorCode& err) const;
|
||||
|
||||
void constructForLocale(const PathInfo& path,
|
||||
const Locale& locale,
|
||||
UErrorCode& error);
|
||||
|
||||
static void addToCache(const UnicodeString& localeName,
|
||||
UHashtable* hashtable,
|
||||
void* context,
|
||||
ResourceBundleCache* someCache);
|
||||
|
||||
static void saveCollationHashtable(const UnicodeString& localeName,
|
||||
UHashtable* hashtable,
|
||||
void* context,
|
||||
ResourceBundleCache* cache);
|
||||
private:
|
||||
/**
|
||||
* This internal class iterates over the fallback and/or default locales. It
|
||||
* progresses as follows: Specific: language+country+variant language+country
|
||||
* language Default: language+country+variant language+country language Root:
|
||||
*/
|
||||
class LocaleFallbackIterator
|
||||
{
|
||||
public:
|
||||
LocaleFallbackIterator(const UnicodeString& startingLocale,
|
||||
const UnicodeString& root,
|
||||
bool_t useDefaultLocale);
|
||||
|
||||
const UnicodeString& getLocale(void) const { return fLocale; }
|
||||
|
||||
bool_t nextLocale(UErrorCode& status);
|
||||
|
||||
private:
|
||||
void chopLocale(void);
|
||||
|
||||
UnicodeString fLocale;
|
||||
UnicodeString fDefaultLocale;
|
||||
UnicodeString fRoot;
|
||||
bool_t fUseDefaultLocale;
|
||||
bool_t fTriedDefaultLocale;
|
||||
bool_t fTriedRoot;
|
||||
};
|
||||
|
||||
private:
|
||||
static const char* kDefaultSuffix;
|
||||
static const int32_t kDefaultSuffixLen;
|
||||
static const char* kDefaultFilename;
|
||||
static const char* kDefaultLocaleName;
|
||||
static const char* kIndexLocaleName;
|
||||
static const char* kIndexFilename;
|
||||
static const char* kIndexTag;
|
||||
|
||||
static const char* kDefaultMinorVersion;
|
||||
static const char* kVersionSeparator;
|
||||
static const char* kVersionTag;
|
||||
|
||||
static ResourceBundleCache* fgUserCache;
|
||||
static VisitedFileCache* fgUserVisitedFiles;
|
||||
|
||||
ResourceBundleCache* fgCache;
|
||||
VisitedFileCache* fgVisitedFiles;
|
||||
|
||||
/**
|
||||
* Data members. The ResourceBundle object is kept lightweight by having the fData[]
|
||||
* array entries be non-owned pointers. The cache (fgCache) owns the entries and
|
||||
* will delete them at static destruction time.
|
||||
*/
|
||||
PathInfo fPath;
|
||||
|
||||
enum { kDataCount = 4 };
|
||||
const UHashtable* fData[kDataCount]; // These aren't const if fIsDataOwned is true
|
||||
bool_t fLoaded[kDataCount];
|
||||
UErrorCode fDataStatus[kDataCount]; // Returns the appropriate error code for each data table.
|
||||
bool_t fIsDataOwned;
|
||||
Locale fRealLocale;
|
||||
LocaleFallbackIterator* fLocaleIterator;
|
||||
char* fVersionID;
|
||||
};
|
||||
|
||||
#endif
|
||||
#error Please include unicode/rebund.h instead
|
||||
|
|
|
@ -13,8 +13,8 @@
|
|||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include "chariter.h"
|
||||
#include "schriter.h"
|
||||
#include "unicode/chariter.h"
|
||||
#include "unicode/schriter.h"
|
||||
|
||||
|
||||
UClassID StringCharacterIterator::fgClassID = 0;
|
||||
|
|
|
@ -1,174 +1 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1998-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* File schriter.h
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/05/99 stephen Cleaned up.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef SCHRITER_H
|
||||
#define SCHRITER_H
|
||||
|
||||
#include "utypes.h"
|
||||
#include "chariter.h"
|
||||
|
||||
/**
|
||||
* A concrete subclass of CharacterIterator that iterates over the
|
||||
* characters in a UnicodeString. It's possible not only to create an
|
||||
* iterator that iterates over an entire UnicodeString, but also to
|
||||
* create only that iterates over only a subrange of a UnicodeString
|
||||
* (iterators over different subranges of the same UnicodeString don't
|
||||
* compare equal). */
|
||||
class U_COMMON_API StringCharacterIterator : public CharacterIterator {
|
||||
public:
|
||||
/**
|
||||
* Create an iterator over the UnicodeString referred to by "text".
|
||||
* The iteration range is the whole string, and the starting position is 0.
|
||||
*/
|
||||
StringCharacterIterator(const UnicodeString& text);
|
||||
|
||||
/**
|
||||
* Create an iterator over the UnicodeString referred to by "text".
|
||||
* The iteration range is the whole string, and the starting
|
||||
* position is specified by "pos". If "pos" is outside the valid
|
||||
* iteration range, the behavior of this object is undefined. */
|
||||
StringCharacterIterator(const UnicodeString& text,
|
||||
UTextOffset pos);
|
||||
|
||||
/**
|
||||
* Create an iterator over the UnicodeString referred to by "text".
|
||||
* The iteration range begins with the character specified by
|
||||
* "begin" and ends with the character BEFORE the character specfied
|
||||
* by "end". The starting position is specified by "pos". If
|
||||
* "begin" and "end" don't form a valid range on "text" (i.e., begin
|
||||
* >= end or either is negative or greater than text.size()), or
|
||||
* "pos" is outside the range defined by "begin" and "end", the
|
||||
* behavior of this iterator is undefined. */
|
||||
StringCharacterIterator(const UnicodeString& text,
|
||||
UTextOffset begin,
|
||||
UTextOffset end,
|
||||
UTextOffset pos);
|
||||
|
||||
/**
|
||||
* Copy constructor. The new iterator iterates over the same range
|
||||
* of the same string as "that", and its initial position is the
|
||||
* same as "that"'s current position. */
|
||||
StringCharacterIterator(const StringCharacterIterator& that);
|
||||
|
||||
/**
|
||||
* Destructor. */
|
||||
virtual ~StringCharacterIterator();
|
||||
|
||||
/**
|
||||
* Assignment operator. *this is altered to iterate over the sane
|
||||
* range of the same string as "that", and refers to the same
|
||||
* character within that string as "that" does. */
|
||||
StringCharacterIterator&
|
||||
operator=(const StringCharacterIterator& that);
|
||||
|
||||
/**
|
||||
* Returns true if the iterators iterate over the same range of the
|
||||
* same string and are pointing at the same character. */
|
||||
virtual bool_t operator==(const CharacterIterator& that) const;
|
||||
|
||||
/**
|
||||
* Generates a hash code for this iterator. */
|
||||
virtual int32_t hashCode(void) const;
|
||||
|
||||
/**
|
||||
* Returns a new StringCharacterIterator referring to the same
|
||||
* character in the same range of the same string as this one. The
|
||||
* caller must delete the new iterator. */
|
||||
virtual CharacterIterator* clone(void) const;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first character in its
|
||||
* iteration range, and returns that character, */
|
||||
virtual UChar first(void);
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last character in its iteration
|
||||
* range, and returns that character. */
|
||||
virtual UChar last(void);
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the "position"-th character in the
|
||||
* UnicodeString the iterator refers to, and returns that character.
|
||||
* If the index is outside the iterator's iteration range, the
|
||||
* behavior of the iterator is undefined. */
|
||||
virtual UChar setIndex(UTextOffset pos);
|
||||
|
||||
/**
|
||||
* Returns the character the iterator currently refers to. */
|
||||
virtual UChar current(void) const;
|
||||
|
||||
/**
|
||||
* Advances to the next character in the iteration range (toward
|
||||
* last()), and returns that character. If there are no more
|
||||
* characters to return, returns DONE. */
|
||||
virtual UChar next(void);
|
||||
|
||||
/**
|
||||
* Advances to the previous character in the iteration rance (toward
|
||||
* first()), and returns that character. If there are no more
|
||||
* characters to return, returns DONE. */
|
||||
virtual UChar previous(void);
|
||||
|
||||
/**
|
||||
* Returns the numeric index of the first character in this
|
||||
* iterator's iteration range. */
|
||||
virtual UTextOffset startIndex(void) const;
|
||||
|
||||
/**
|
||||
* Returns the numeric index of the character immediately BEYOND the
|
||||
* last character in this iterator's iteration range. */
|
||||
virtual UTextOffset endIndex(void) const;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying UnicodeString of the
|
||||
* character the iterator currently refers to (i.e., the character
|
||||
* returned by current()). */
|
||||
virtual UTextOffset getIndex(void) const;
|
||||
|
||||
/**
|
||||
* Copies the UnicodeString under iteration into the UnicodeString
|
||||
* referred to by "result". Even if this iterator iterates across
|
||||
* only a part of this string, the whole string is copied. @param
|
||||
* result Receives a copy of the text under iteration. */
|
||||
virtual void getText(UnicodeString& result);
|
||||
|
||||
/**
|
||||
* Return a class ID for this object (not really public) */
|
||||
virtual UClassID getDynamicClassID(void) const
|
||||
{ return getStaticClassID(); }
|
||||
|
||||
/**
|
||||
* Return a class ID for this class (not really public) */
|
||||
static UClassID getStaticClassID(void)
|
||||
{ return (UClassID)(&fgClassID); }
|
||||
|
||||
private:
|
||||
StringCharacterIterator();
|
||||
|
||||
UnicodeString text;
|
||||
UTextOffset pos;
|
||||
UTextOffset begin;
|
||||
UTextOffset end;
|
||||
|
||||
static UClassID fgClassID;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
#error Please include unicode/schriter.h instead
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
|
||||
#include <limits.h>
|
||||
|
||||
#include "scsu.h"
|
||||
#include "unicode/scsu.h"
|
||||
|
||||
#include "cmemory.h"
|
||||
|
||||
|
@ -889,7 +889,7 @@ scsu_decompress(UnicodeCompressor *comp,
|
|||
if(sourceLimit - byteBuffer < newBytes)
|
||||
newBytes = sourceLimit - byteBuffer;
|
||||
|
||||
icu_memcpy(comp->fBuffer + comp->fBufferLength, byteBuffer, newBytes);
|
||||
uprv_memcpy(comp->fBuffer + comp->fBufferLength, byteBuffer, newBytes);
|
||||
}
|
||||
|
||||
/* reset buffer length to 0 before recursive call */
|
||||
|
@ -967,7 +967,7 @@ scsu_decompress(UnicodeCompressor *comp,
|
|||
if not, save state and break out */
|
||||
if((unicharBuffer + 1) >= targetLimit) {
|
||||
--byteBuffer;
|
||||
icu_memcpy(comp->fBuffer, byteBuffer,
|
||||
uprv_memcpy(comp->fBuffer, byteBuffer,
|
||||
sourceLimit - byteBuffer);
|
||||
comp->fBufferLength = sourceLimit - byteBuffer;
|
||||
byteBuffer += comp->fBufferLength;
|
||||
|
@ -1019,7 +1019,7 @@ scsu_decompress(UnicodeCompressor *comp,
|
|||
rewind the source stream and break out */
|
||||
if( (byteBuffer + 1) >= sourceLimit ) {
|
||||
--byteBuffer;
|
||||
icu_memcpy(comp->fBuffer, byteBuffer,
|
||||
uprv_memcpy(comp->fBuffer, byteBuffer,
|
||||
sourceLimit - byteBuffer);
|
||||
comp->fBufferLength = sourceLimit - byteBuffer;
|
||||
byteBuffer += comp->fBufferLength;
|
||||
|
@ -1046,7 +1046,7 @@ scsu_decompress(UnicodeCompressor *comp,
|
|||
not, rewind the source stream and break out*/
|
||||
if( byteBuffer >= sourceLimit ) {
|
||||
--byteBuffer;
|
||||
icu_memcpy(comp->fBuffer, byteBuffer,
|
||||
uprv_memcpy(comp->fBuffer, byteBuffer,
|
||||
sourceLimit - byteBuffer);
|
||||
comp->fBufferLength = sourceLimit - byteBuffer;
|
||||
byteBuffer += comp->fBufferLength;
|
||||
|
@ -1076,7 +1076,7 @@ scsu_decompress(UnicodeCompressor *comp,
|
|||
not, rewind the source stream and break out*/
|
||||
if( byteBuffer >= sourceLimit ) {
|
||||
--byteBuffer;
|
||||
icu_memcpy(comp->fBuffer, byteBuffer,
|
||||
uprv_memcpy(comp->fBuffer, byteBuffer,
|
||||
sourceLimit - byteBuffer);
|
||||
comp->fBufferLength = sourceLimit - byteBuffer;
|
||||
byteBuffer += comp->fBufferLength;
|
||||
|
@ -1094,7 +1094,7 @@ scsu_decompress(UnicodeCompressor *comp,
|
|||
rewind the source stream and break out*/
|
||||
if( (byteBuffer + 1) >= sourceLimit ) {
|
||||
--byteBuffer;
|
||||
icu_memcpy(comp->fBuffer, byteBuffer,
|
||||
uprv_memcpy(comp->fBuffer, byteBuffer,
|
||||
sourceLimit - byteBuffer);
|
||||
comp->fBufferLength = sourceLimit - byteBuffer;
|
||||
byteBuffer += comp->fBufferLength;
|
||||
|
@ -1131,7 +1131,7 @@ scsu_decompress(UnicodeCompressor *comp,
|
|||
rewind the source stream and break out*/
|
||||
if( byteBuffer >= sourceLimit ) {
|
||||
--byteBuffer;
|
||||
icu_memcpy(comp->fBuffer, byteBuffer,
|
||||
uprv_memcpy(comp->fBuffer, byteBuffer,
|
||||
sourceLimit - byteBuffer);
|
||||
comp->fBufferLength = sourceLimit - byteBuffer;
|
||||
byteBuffer += comp->fBufferLength;
|
||||
|
@ -1153,7 +1153,7 @@ scsu_decompress(UnicodeCompressor *comp,
|
|||
rewind the source stream and break out*/
|
||||
if( (byteBuffer + 1) >= sourceLimit ) {
|
||||
--byteBuffer;
|
||||
icu_memcpy(comp->fBuffer, byteBuffer,
|
||||
uprv_memcpy(comp->fBuffer, byteBuffer,
|
||||
sourceLimit - byteBuffer);
|
||||
comp->fBufferLength = sourceLimit - byteBuffer;
|
||||
byteBuffer += comp->fBufferLength;
|
||||
|
@ -1187,7 +1187,7 @@ scsu_decompress(UnicodeCompressor *comp,
|
|||
rewind the source stream and break out*/
|
||||
if( byteBuffer >= sourceLimit - 1) {
|
||||
--byteBuffer;
|
||||
icu_memcpy(comp->fBuffer, byteBuffer,
|
||||
uprv_memcpy(comp->fBuffer, byteBuffer,
|
||||
sourceLimit - byteBuffer);
|
||||
comp->fBufferLength = sourceLimit - byteBuffer;
|
||||
byteBuffer += comp->fBufferLength;
|
||||
|
@ -1204,7 +1204,7 @@ scsu_decompress(UnicodeCompressor *comp,
|
|||
the source stream and break out*/
|
||||
if( byteBuffer >= sourceLimit ) {
|
||||
--byteBuffer;
|
||||
icu_memcpy(comp->fBuffer, byteBuffer,
|
||||
uprv_memcpy(comp->fBuffer, byteBuffer,
|
||||
sourceLimit - byteBuffer);
|
||||
comp->fBufferLength = sourceLimit - byteBuffer;
|
||||
byteBuffer += comp->fBufferLength;
|
||||
|
|
|
@ -1,142 +1 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1998-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* File scsu.h
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/17/99 stephen Creation (ported from java UnicodeCompressor.java)
|
||||
* 09/21/99 stephen Updated to handle data splits on decompression.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef SCSU_H
|
||||
#define SCSU_H 1
|
||||
|
||||
#include "utypes.h"
|
||||
|
||||
/* Number of windows */
|
||||
#define USCSU_NUM_WINDOWS 8
|
||||
#define USCSU_NUM_STATIC_WINDOWS 8
|
||||
|
||||
/* Maximum value for a window's index */
|
||||
#define USCSU_MAX_INDEX 0xFF
|
||||
|
||||
/* The size of the internal buffer for a UnicodeCompressor. */
|
||||
#define USCSU_BUFSIZE 3
|
||||
|
||||
/** The UnicodeCompressor struct */
|
||||
struct UnicodeCompressor {
|
||||
|
||||
/** Alias to current dynamic window */
|
||||
int32_t fCurrentWindow;
|
||||
|
||||
/** Dynamic compression window offsets */
|
||||
int32_t fOffsets [ USCSU_NUM_WINDOWS ];
|
||||
|
||||
/** Current compression mode */
|
||||
int32_t fMode;
|
||||
|
||||
/** Keeps count of times character indices are encountered */
|
||||
int32_t fIndexCount [ USCSU_MAX_INDEX + 1 ];
|
||||
|
||||
/** The time stamps indicate when a window was last defined */
|
||||
int32_t fTimeStamps [ USCSU_NUM_WINDOWS ];
|
||||
|
||||
/** The current time stamp */
|
||||
int32_t fTimeStamp;
|
||||
|
||||
/** Internal buffer for saving state */
|
||||
uint8_t fBuffer [ USCSU_BUFSIZE ];
|
||||
|
||||
/** Number of characters in our internal buffer */
|
||||
int32_t fBufferLength;
|
||||
};
|
||||
typedef struct UnicodeCompressor UnicodeCompressor;
|
||||
|
||||
/**
|
||||
* Initialize a UnicodeCompressor.
|
||||
* Sets all windows to their default values.
|
||||
* @see #reset
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 scsu_init(UnicodeCompressor *comp);
|
||||
|
||||
/**
|
||||
* Reset the compressor to its initial state.
|
||||
* @param comp The UnicodeCompressor to reset.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 scsu_reset(UnicodeCompressor *comp);
|
||||
|
||||
/**
|
||||
* Compress a Unicode character array into a byte array.
|
||||
*
|
||||
* This function is not guaranteed to completely fill the output buffer, nor
|
||||
* is it guaranteed to compress the entire input.
|
||||
* If the source data is completely compressed, <TT>status</TT> will be set
|
||||
* to <TT>U_ZERO_ERROR</TT>.
|
||||
* If the source data is not completely compressed, <TT>status</TT> will be
|
||||
* set to <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT>. If this occurs, larger buffers
|
||||
* should be allocated, or data flushed, and the function should be called
|
||||
* again with the new buffers.
|
||||
*
|
||||
* @param comp A pointer to a previously-initialized UnicodeCompressor
|
||||
* @param target I/O parameter. On input, a pointer to a buffer of bytes to
|
||||
* receive the compressed data. On output, points to the byte following
|
||||
* the last byte written. This buffer must be at least 4 bytes.
|
||||
* @param targetLimit A pointer to the end of the array <TT>target</TT>.
|
||||
* @param source I/O parameter. On input, a pointer to a buffer of
|
||||
* Unicode characters to be compressed. On output, points to the character
|
||||
* following the last character compressed.
|
||||
* @param sourceLimit A pointer to the end of the array <TT>source</TT>.
|
||||
* @param status A pointer to an UErrorCode to receive any errors.
|
||||
*
|
||||
* @see #decompress
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 scsu_compress(UnicodeCompressor *comp,
|
||||
uint8_t **target,
|
||||
const uint8_t *targetLimit,
|
||||
const UChar **source,
|
||||
const UChar *sourceLimit,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Decompress a byte array into a Unicode character array.
|
||||
*
|
||||
* This function will either completely fill the output buffer, or
|
||||
* consume the entire input.
|
||||
* If the source data is completely compressed, <TT>status</TT> will be set
|
||||
* to <TT>U_ZERO_ERROR</TT>.
|
||||
* If the source data is not completely compressed, <TT>status</TT> will be
|
||||
* set to <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT>. If this occurs, larger buffers
|
||||
* should be allocated, or data flushed, and the function should be called
|
||||
* again with the new buffers.
|
||||
*
|
||||
* @param comp A pointer to a previously-initialized UnicodeDecompressor
|
||||
* @param target I/O parameter. On input, a pointer to a buffer of Unicode
|
||||
* characters to receive the compressed data. On output, points to the
|
||||
* character following the last character written. This buffer must be
|
||||
* at least 2 bytes.
|
||||
* @param targetLimit A pointer to the end of the array <TT>target</TT>.
|
||||
* @param source I/O parameter. On input, a pointer to a buffer of
|
||||
* bytes to be decompressed. On output, points to the byte following the
|
||||
* last byte decompressed.
|
||||
* @param sourceLimit A pointer to the end of the array <TT>source</TT>.
|
||||
* @param status A pointer to an UErrorCode to receive any errors.
|
||||
* @return The number of Unicode characters writeten to <TT>target</TT>.
|
||||
*
|
||||
* @see #compress
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 scsu_decompress(UnicodeCompressor *comp,
|
||||
UChar **target,
|
||||
const UChar *targetLimit,
|
||||
const uint8_t **source,
|
||||
const uint8_t *sourceLimit,
|
||||
UErrorCode *status);
|
||||
|
||||
#endif
|
||||
#error Please include unicode/scsu.h instead
|
||||
|
|
|
@ -23,10 +23,10 @@
|
|||
#endif
|
||||
|
||||
#include "cmemory.h"
|
||||
#include "utypes.h"
|
||||
#include "ustring.h"
|
||||
#include "uchar.h"
|
||||
#include "ubidi.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/ubidi.h"
|
||||
#include "ubidiimp.h"
|
||||
|
||||
/*
|
||||
|
@ -154,14 +154,14 @@ ubidi_openSized(UTextOffset maxLength, UTextOffset maxRunCount, UErrorCode *pErr
|
|||
}
|
||||
|
||||
/* allocate memory for the object */
|
||||
pBiDi=(UBiDi *)icu_malloc(sizeof(UBiDi));
|
||||
pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
|
||||
if(pBiDi==NULL) {
|
||||
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
|
||||
icu_memset(pBiDi, 0, sizeof(UBiDi));
|
||||
uprv_memset(pBiDi, 0, sizeof(UBiDi));
|
||||
|
||||
/* allocate memory for arrays as requested */
|
||||
if(maxLength>0) {
|
||||
|
@ -211,7 +211,7 @@ getMemory(void **pMemory, UTextOffset *pSize, bool_t mayAllocate, UTextOffset si
|
|||
/* check for existing memory */
|
||||
if(*pMemory==NULL) {
|
||||
/* we need to allocate memory */
|
||||
if(mayAllocate && (*pMemory=icu_malloc(sizeNeeded))!=NULL) {
|
||||
if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
|
||||
*pSize=sizeNeeded;
|
||||
return TRUE;
|
||||
} else {
|
||||
|
@ -226,7 +226,7 @@ getMemory(void **pMemory, UTextOffset *pSize, bool_t mayAllocate, UTextOffset si
|
|||
/* we may try to grow or shrink */
|
||||
void *memory;
|
||||
|
||||
if((memory=icu_realloc(*pMemory, sizeNeeded))!=NULL) {
|
||||
if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
|
||||
*pMemory=memory;
|
||||
*pSize=sizeNeeded;
|
||||
return TRUE;
|
||||
|
@ -245,15 +245,15 @@ U_CAPI void U_EXPORT2
|
|||
ubidi_close(UBiDi *pBiDi) {
|
||||
if(pBiDi!=NULL) {
|
||||
if(pBiDi->dirPropsMemory!=NULL) {
|
||||
icu_free(pBiDi->dirPropsMemory);
|
||||
uprv_free(pBiDi->dirPropsMemory);
|
||||
}
|
||||
if(pBiDi->levelsMemory!=NULL) {
|
||||
icu_free(pBiDi->levelsMemory);
|
||||
uprv_free(pBiDi->levelsMemory);
|
||||
}
|
||||
if(pBiDi->runsMemory!=NULL) {
|
||||
icu_free(pBiDi->runsMemory);
|
||||
uprv_free(pBiDi->runsMemory);
|
||||
}
|
||||
icu_free(pBiDi);
|
||||
uprv_free(pBiDi);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,892 +1 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: ubidi.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999jul27
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef UBIDI_H
|
||||
#define UBIDI_H
|
||||
|
||||
#include "utypes.h"
|
||||
#include "uchar.h"
|
||||
|
||||
/*
|
||||
* javadoc-style comments are intended to be transformed into HTML
|
||||
* using DOC++ - see
|
||||
* http://www.zib.de/Visual/software/doc++/index.html .
|
||||
*
|
||||
* The HTML documentation is created with
|
||||
* doc++ -H ubidi.h
|
||||
*
|
||||
* The following #define trick allows us to do it all in one file
|
||||
* and still be able to compile it.
|
||||
*/
|
||||
#define DOCXX_TAG
|
||||
#define BIDI_SAMPLE_CODE
|
||||
|
||||
/**
|
||||
* @name BIDI algorithm for ICU
|
||||
*
|
||||
* <h2>BIDI algorithm for ICU</h2>
|
||||
*
|
||||
* This is an implementation of the Unicode Bidirectional algorithm.
|
||||
* The algorithm is defined in the
|
||||
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>,
|
||||
* version 5, also described in The Unicode Standard, Version 3.0 .<p>
|
||||
*
|
||||
* <h3>General remarks about the API:</h3>
|
||||
*
|
||||
* In functions with an error code parameter,
|
||||
* the <code>pErrorCode</code> pointer must be valid
|
||||
* and the value that it points to must not indicate a failure before
|
||||
* the function call. Otherwise, the function returns immediately.
|
||||
* After the function call, the value indicates success or failure.<p>
|
||||
*
|
||||
* The <quote>limit</quote> of a sequence of characters is the position just after their
|
||||
* last character, i.e., one more than that position.<p>
|
||||
*
|
||||
* Some of the API functions provide access to <quote>runs</quote>.
|
||||
* Such a <quote>run</quote> is defined as a sequence of characters
|
||||
* that are at the same embedding level
|
||||
* after performing the BIDI algorithm.<p>
|
||||
*
|
||||
* @author Markus W. Scherer
|
||||
* @version 1.0
|
||||
*/
|
||||
DOCXX_TAG
|
||||
/*@{*/
|
||||
|
||||
/**
|
||||
* UBiDiLevel is the type of the level values in this
|
||||
* BiDi implementation.
|
||||
* It holds an embedding level and indicates the visual direction
|
||||
* by its bit 0 (even/odd value).<p>
|
||||
*
|
||||
* It can also hold non-level values for the
|
||||
* <code>paraLevel</code> and <code>embeddingLevels</code>
|
||||
* arguments of <code>ubidi_setPara()</code>; there:
|
||||
* <ul>
|
||||
* <li>bit 7 of an <code>embeddingLevels[]</code>
|
||||
* value indicates whether the using application is
|
||||
* specifying the level of a character to <i>override</i> whatever the
|
||||
* BiDi implementation would resolve it to.</li>
|
||||
* <li><code>paraLevel</code> can be set to the
|
||||
* pesudo-level values <code>UBIDI_DEFAULT_LTR</code>
|
||||
* and <code>UBIDI_DEFAULT_RTL</code>.</li>
|
||||
*
|
||||
* @see ubidi_setPara
|
||||
*
|
||||
* <p>The related constants are not real, valid level values.
|
||||
* <code>UBIDI_DEFAULT_XXX</code> can be used to specify
|
||||
* a default for the paragraph level for
|
||||
* when the <code>ubidi_setPara()</code> function
|
||||
* shall determine it but there is no
|
||||
* strongly typed character in the input.<p>
|
||||
*
|
||||
* Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even
|
||||
* and the one for <code>UBIDI_DEFAULT_RTL</code> is odd,
|
||||
* just like with normal LTR and RTL level values -
|
||||
* these special values are designed that way. Also, the implementation
|
||||
* assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
|
||||
*
|
||||
* @see UBIDI_DEFAULT_LTR
|
||||
* @see UBIDI_DEFAULT_RTL
|
||||
* @see UBIDI_LEVEL_OVERRIDE
|
||||
* @see UBIDI_MAX_EXPLICIT_LEVEL
|
||||
*/
|
||||
typedef uint8_t UBiDiLevel;
|
||||
|
||||
/** Paragraph level setting.
|
||||
* If there is no strong character, then set the paragraph level to 0 (left-to-right).
|
||||
*/
|
||||
#define UBIDI_DEFAULT_LTR 0xfe
|
||||
|
||||
/** Paragraph level setting.
|
||||
* If there is no strong character, then set the paragraph level to 1 (right-to-left).
|
||||
*/
|
||||
#define UBIDI_DEFAULT_RTL 0xff
|
||||
|
||||
/**
|
||||
* Maximum explicit embedding level.
|
||||
* (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>).
|
||||
*
|
||||
*/
|
||||
#define UBIDI_MAX_EXPLICIT_LEVEL 61
|
||||
|
||||
/** Bit flag for level input.
|
||||
* Overrides directional properties.
|
||||
*/
|
||||
#define UBIDI_LEVEL_OVERRIDE 0x80
|
||||
|
||||
/**
|
||||
* @memo <code>UBiDiDirection</code> values indicate the text direction.
|
||||
*/
|
||||
enum UBiDiDirection {
|
||||
/** @memo All left-to-right text. This is a 0 value. */
|
||||
UBIDI_LTR,
|
||||
/** @memo All right-to-left text. This is a 1 value. */
|
||||
UBIDI_RTL,
|
||||
/** @memo Mixed-directional text. */
|
||||
UBIDI_MIXED
|
||||
};
|
||||
|
||||
typedef enum UBiDiDirection UBiDiDirection;
|
||||
|
||||
/**
|
||||
* Forward declaration of the <code>UBiDi</code> structure for the declaration of
|
||||
* the API functions. Its fields are implementation-specific.<p>
|
||||
* This structure holds information about a paragraph of text
|
||||
* with BiDi-algorithm-related details, or about one line of
|
||||
* such a paragraph.<p>
|
||||
* Reordering can be done on a line, or on a paragraph which is
|
||||
* then interpreted as one single line.
|
||||
*/
|
||||
struct UBiDi;
|
||||
|
||||
typedef struct UBiDi UBiDi;
|
||||
|
||||
/**
|
||||
* Allocate a <code>UBiDi</code> structure.
|
||||
* Such an object is initially empty. It is assigned
|
||||
* the BiDi properties of a paragraph by <code>ubidi_setPara()</code>
|
||||
* or the BiDi properties of a line of a paragraph by
|
||||
* <code>ubidi_getLine()</code>.<p>
|
||||
* This object can be reused for as long as it is not deallocated
|
||||
* by calling <code>ubidi_close()</code>.<p>
|
||||
* <code>ubidi_set()</code> will allocate additional memory for
|
||||
* internal structures as necessary.
|
||||
*
|
||||
* @return An empty <code>UBiDi</code> object.
|
||||
*/
|
||||
U_CAPI UBiDi * U_EXPORT2
|
||||
ubidi_open(void);
|
||||
|
||||
/**
|
||||
* Allocate a <code>UBiDi</code> structure with preallocated memory
|
||||
* for internal structures.
|
||||
* This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code>
|
||||
* with no arguments, but it also preallocates memory for internal structures
|
||||
* according to the sizings supplied by the caller.<p>
|
||||
* Subsequent functions will not allocate any more memory, and are thus
|
||||
* guaranteed not to fail because of lack of memory.<p>
|
||||
* The preallocation can be limited to some of the internal memory
|
||||
* by setting some values to 0 here. That means that if, e.g.,
|
||||
* <code>maxRunCount</code> cannot be reasonably predetermined and should not
|
||||
* be set to <code>maxLength</code> (the only failproof value) to avoid
|
||||
* wasting memory, then <code>maxRunCount</code> could be set to 0 here
|
||||
* and the internal structures that are associated with it will be allocated
|
||||
* on demand, just like with <code>ubidi_open()</code>.
|
||||
*
|
||||
* @param maxLength is the maximum paragraph or line length that internal memory
|
||||
* will be preallocated for. An attempt to associate this object with a
|
||||
* longer text will fail, unless this value is 0, which leaves the allocation
|
||||
* up to the implementation.
|
||||
*
|
||||
* @param maxRunCount is the maximum anticipated number of same-level runs
|
||||
* that internal memory will be preallocated for. An attempt to access
|
||||
* visual runs on an object that was not preallocated for as many runs
|
||||
* as the text was actually resolved to will fail,
|
||||
* unless this value is 0, which leaves the allocation up to the implementation.<p>
|
||||
* The number of runs depends on the actual text and maybe anywhere between
|
||||
* 1 and <code>maxLength</code>. It is typically small.<p>
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @return An empty <code>UBiDi</code> object with preallocated memory.
|
||||
*/
|
||||
U_CAPI UBiDi * U_EXPORT2
|
||||
ubidi_openSized(UTextOffset maxLength, UTextOffset maxRunCount, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* <code>ubidi_close()</code> must be called to free the memory
|
||||
* associated with a UBiDi object.<p>
|
||||
*
|
||||
* <strong>Important: </strong>
|
||||
* If a <code>UBiDi</code> object is the <quote>child</quote>
|
||||
* of another one (its <quote>parent</quote>), after calling
|
||||
* <code>ubidi_setLine()</code>, then the child object must
|
||||
* be destroyed (closed) or reused (by calling
|
||||
* <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>)
|
||||
* before the parent object.
|
||||
*
|
||||
* @param pBiDi is a <code>UBiDi</code> object.
|
||||
*
|
||||
* @see ubidi_setPara
|
||||
* @see ubidi_setLine
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_close(UBiDi *pBiDi);
|
||||
|
||||
/**
|
||||
* Perform the Unicode BiDi algorithm. It is defined in the
|
||||
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>,
|
||||
* version 5,
|
||||
* also described in The Unicode Standard, Version 3.0 .<p>
|
||||
*
|
||||
* This function takes a single plain text paragraph with or without
|
||||
* externally specified embedding levels from <quote>styled</quote> text
|
||||
* and computes the left-right-directionality of each character.<p>
|
||||
*
|
||||
* If the entire paragraph consists of text of only one direction, then
|
||||
* the function may not perform all the steps described by the algorithm,
|
||||
* i.e., some levels may not be the same as if all steps were performed.
|
||||
* This is not relevant for unidirectional text.<br>
|
||||
* For example, in pure LTR text with numbers the numbers would get
|
||||
* a resolved level of 2 higher than the surrounding text according to
|
||||
* the algorithm. This implementation may set all resolved levels to
|
||||
* the same value in such a case.<p>
|
||||
*
|
||||
* The text must be externally split into separate paragraphs (rule P1).
|
||||
* Paragraph separators (B) should appear at most at the very end.
|
||||
*
|
||||
* @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code>
|
||||
* which will be set to contain the reordering information,
|
||||
* especially the resolved levels for all the characters in <code>text</code>.
|
||||
*
|
||||
* @param text is a pointer to the single-paragraph text that the
|
||||
* BiDi algorithm will be performed on
|
||||
* (step (P1) of the algorithm is performed externally).
|
||||
* <strong>The text must be (at least) <code>length</code> long.</strong>
|
||||
*
|
||||
* @param length is the length of the text; if <code>length==-1</code> then
|
||||
* the text must be zero-terminated.
|
||||
*
|
||||
* @param paraLevel specifies the default level for the paragraph;
|
||||
* it is typically 0 (LTR) or 1 (RTL).
|
||||
* If the function shall determine the paragraph level from the text,
|
||||
* then <code>paraLevel</code> can be set to
|
||||
* either <code>UBIDI_DEFAULT_LTR</code>
|
||||
* or <code>UBIDI_DEFAULT_RTL</code>;
|
||||
* if there is no strongly typed character, then
|
||||
* the desired default is used (0 for LTR or 1 for RTL).
|
||||
* Any other value between 0 and <code>UBIDI_MAX_EXPLICIT_LEVEL</code> is also valid,
|
||||
* with odd levels indicating RTL.
|
||||
*
|
||||
* @param embeddingLevels (in) may be used to preset the embedding and override levels,
|
||||
* ignoring characters like LRE and PDF in the text.
|
||||
* A level overrides the directional property of its corresponding
|
||||
* (same index) character if the level has the
|
||||
* <code>UBIDI_LEVEL_OVERRIDE</code> bit set.<p>
|
||||
* Except for that bit, it must be
|
||||
* <code>paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL</code>.<p>
|
||||
* <strong>Caution: </strong>A copy of this pointer, not of the levels,
|
||||
* will be stored in the <code>UBiDi</code> object;
|
||||
* the <code>embeddingLevels</code> array must not be
|
||||
* deallocated before the <code>UBiDi</code> structure is destroyed or reused,
|
||||
* and the <code>embeddingLevels</code>
|
||||
* should not be modified to avoid unexpected results on subsequent BiDi operations.
|
||||
* However, the <code>ubidi_setPara()</code> and
|
||||
* <code>ubidi_setLine()</code> functions may modify some or all of the levels.<p>
|
||||
* After the <code>UBiDi</code> object is reused or destroyed, the caller
|
||||
* must take care of the deallocation of the <code>embeddingLevels</code> array.<p>
|
||||
* <strong>The <code>embeddingLevels</code> array must be
|
||||
* at least <code>length</code> long.</strong>
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_setPara(UBiDi *pBiDi, const UChar *text, UTextOffset length,
|
||||
UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* <code>ubidi_getLine()</code> sets a <code>UBiDi</code> to
|
||||
* contain the reordering information, especially the resolved levels,
|
||||
* for all the characters in a line of text. This line of text is
|
||||
* specified by referring to a <code>UBiDi</code> object representing
|
||||
* this information for a paragraph of text, and by specifying
|
||||
* a range of indexes in this paragraph.<p>
|
||||
* In the new line object, the indexes will range from 0 to <code>limit-start</code>.<p>
|
||||
*
|
||||
* This is used after calling <code>ubidi_setPara()</code>
|
||||
* for a paragraph, and after line-breaking on that paragraph.
|
||||
* It is not necessary if the paragraph is treated as a single line.<p>
|
||||
*
|
||||
* After line-breaking, rules (L1) and (L2) for the treatment of
|
||||
* trailing WS and for reordering are performed on
|
||||
* a <code>UBiDi</code> object that represents a line.<p>
|
||||
*
|
||||
* <strong>Important: </strong><code>pLineBiDi</code> shares data with
|
||||
* <code>pParaBiDi</code>.
|
||||
* You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>.
|
||||
* In other words, you must destroy or reuse the <code>UBiDi</code> object for a line
|
||||
* before the object for its parent paragraph.
|
||||
*
|
||||
* @param pParaBiDi is the parent paragraph object.
|
||||
*
|
||||
* @param start is the line's first index into the paragraph text.
|
||||
*
|
||||
* @param limit is just behind the line's last index into the paragraph text
|
||||
* (its last index +1).<br>
|
||||
* It must be <code>0<=start<=limit<=</code>paragraph length.
|
||||
*
|
||||
* @param pLineBiDi is the object that will now represent a line of the paragraph.
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see ubidi_setPara
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_setLine(const UBiDi *pParaBiDi,
|
||||
UTextOffset start, UTextOffset limit,
|
||||
UBiDi *pLineBiDi,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Get the directionality of the text.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @return A <code>UBIDI_XXX</code> value that indicates if the entire text
|
||||
* represented by this object is unidirectional,
|
||||
* and which direction, or if it is mixed-directional.
|
||||
*
|
||||
* @see UBiDiDirection
|
||||
*/
|
||||
U_CAPI UBiDiDirection U_EXPORT2
|
||||
ubidi_getDirection(const UBiDi *pBiDi);
|
||||
|
||||
/**
|
||||
* Get the length of the text.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @return The length of the text that the UBiDi object was created for.
|
||||
*/
|
||||
U_CAPI UTextOffset U_EXPORT2
|
||||
ubidi_getLength(const UBiDi *pBiDi);
|
||||
|
||||
/**
|
||||
* Get the paragraph level of the text.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @return The paragraph level.
|
||||
*
|
||||
* @see UBiDiLevel
|
||||
*/
|
||||
U_CAPI UBiDiLevel U_EXPORT2
|
||||
ubidi_getParaLevel(const UBiDi *pBiDi);
|
||||
|
||||
/**
|
||||
* Get the level for one character.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @param charIndex the index of a character.
|
||||
*
|
||||
* @return The level for the character at charIndex.
|
||||
*
|
||||
* @see UBiDiLevel
|
||||
*/
|
||||
U_CAPI UBiDiLevel U_EXPORT2
|
||||
ubidi_getLevelAt(const UBiDi *pBiDi, UTextOffset charIndex);
|
||||
|
||||
/**
|
||||
* Get an array of levels for each character.<p>
|
||||
*
|
||||
* Note that this function may allocate memory under some
|
||||
* circumstances, unlike <code>ubidi_getLevelAt()</code>.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @return The levels array for the text,
|
||||
* or <code>NULL</code> if an error occurs.
|
||||
*
|
||||
* @see UBiDiLevel
|
||||
*/
|
||||
U_CAPI const UBiDiLevel * U_EXPORT2
|
||||
ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Get a logical run.
|
||||
* This function returns information about a run and is used
|
||||
* to retrieve runs in logical order.<p>
|
||||
* This is especially useful for line-breaking on a paragraph.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @param logicalStart is the first character of the run.
|
||||
*
|
||||
* @param pLogicalLimit will receive the limit of the run.
|
||||
* The l-value that you point to here may be the
|
||||
* same expression (variable) as the one for
|
||||
* <code>logicalStart</code>.
|
||||
* This pointer can be <code>NULL</code> if this
|
||||
* value is not necessary.
|
||||
*
|
||||
* @param pLevel will receive the level of the run.
|
||||
* This pointer can be <code>NULL</code> if this
|
||||
* value is not necessary.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_getLogicalRun(const UBiDi *pBiDi, UTextOffset logicalStart,
|
||||
UTextOffset *pLogicalLimit, UBiDiLevel *pLevel);
|
||||
|
||||
/**
|
||||
* Get the number of runs.
|
||||
* This function may invoke the actual reordering on the
|
||||
* <code>UBiDi</code> object, after <code>ubidi_setPara()</code>
|
||||
* may have resolved only the levels of the text. Therefore,
|
||||
* <code>ubidi_countRuns()</code> may have to allocate memory,
|
||||
* and may fail doing so.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @return The number of runs.
|
||||
*/
|
||||
U_CAPI UTextOffset U_EXPORT2
|
||||
ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Get one run's logical start, length, and directionality,
|
||||
* which can be 0 for LTR or 1 for RTL.
|
||||
* In an RTL run, the character at the logical start is
|
||||
* visually on the right of the displayed run.
|
||||
* The length is the number of characters in the run.<p>
|
||||
* <code>ubidi_countRuns()</code> should be called
|
||||
* before the runs are retrieved.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @param runIndex is the number of the run in visual order, in the
|
||||
* range <code>[0..ubidi_countRuns(pBiDi)-1]</code>.
|
||||
*
|
||||
* @param pLogicalStart is the first logical character index in the text.
|
||||
* The pointer may be <code>NULL</code> if this index is not needed.
|
||||
*
|
||||
* @param pLength is the number of characters (at least one) in the run.
|
||||
* The pointer may be <code>NULL</code> if this is not needed.
|
||||
*
|
||||
* @return the directionality of the run,
|
||||
* <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>,
|
||||
* never <code>UBIDI_MIXED</code>.
|
||||
*
|
||||
* @see ubidi_countRuns
|
||||
*
|
||||
* Example:
|
||||
* <pre>
|
||||
* UTextOffset i, count=ubidi_countRuns(pBiDi),
|
||||
* logicalStart, visualIndex=0, length;
|
||||
* for(i=0; i<count; ++i) {
|
||||
* if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) {
|
||||
* do { // LTR
|
||||
* show_char(text[logicalStart++], visualIndex++);
|
||||
* } while(--length>0);
|
||||
* } else {
|
||||
* logicalStart+=length; // logicalLimit
|
||||
* do { // RTL
|
||||
* show_char(text[--logicalStart], visualIndex++);
|
||||
* } while(--length>0);
|
||||
* }
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* Note that in right-to-left runs, code like this places
|
||||
* modifier letters before base characters and second surrogates
|
||||
* before first ones.
|
||||
*/
|
||||
U_CAPI UBiDiDirection U_EXPORT2
|
||||
ubidi_getVisualRun(UBiDi *pBiDi, UTextOffset runIndex,
|
||||
UTextOffset *pLogicalStart, UTextOffset *pLength);
|
||||
|
||||
/**
|
||||
* Get the visual position from a logical text position.
|
||||
* If such a mapping is used many times on the same
|
||||
* <code>UBiDi</code> object, then calling
|
||||
* <code>ubidi_getLogicalMap()</code> is more efficient.<p>
|
||||
*
|
||||
* Note that in right-to-left runs, this mapping places
|
||||
* modifier letters before base characters and second surrogates
|
||||
* before first ones.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @param logicalIndex is the index of a character in the text.
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @return The visual position of this character.
|
||||
*
|
||||
* @see ubidi_getLogicalMap
|
||||
* @see ubidi_getLogicalIndex
|
||||
*/
|
||||
U_CAPI UTextOffset U_EXPORT2
|
||||
ubidi_getVisualIndex(UBiDi *pBiDi, UTextOffset logicalIndex, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Get the logical text position from a visual position.
|
||||
* If such a mapping is used many times on the same
|
||||
* <code>UBiDi</code> object, then calling
|
||||
* <code>ubidi_getVisualMap()</code> is more efficient.<p>
|
||||
*
|
||||
* This is the inverse function to <code>ubidi_getVisualIndex()</code>.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @param visualIndex is the visual position of a character.
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @return The index of this character in the text.
|
||||
*
|
||||
* @see ubidi_getVisualMap
|
||||
* @see ubidi_getVisualIndex
|
||||
*/
|
||||
U_CAPI UTextOffset U_EXPORT2
|
||||
ubidi_getLogicalIndex(UBiDi *pBiDi, UTextOffset visualIndex, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Get a logical-to-visual index map (array) for the characters in the UBiDi
|
||||
* (paragraph or line) object.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @param indexMap is a pointer to an array of <code>ubidi_getLength()</code>
|
||||
* indexes which will reflect the reordering of the characters.
|
||||
* The array does not need to be initialized.<p>
|
||||
* The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.<p>
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see ubidi_getVisualMap
|
||||
* @see ubidi_getVisualIndex
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_getLogicalMap(UBiDi *pBiDi, UTextOffset *indexMap, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Get a visual-to-logical index map (array) for the characters in the UBiDi
|
||||
* (paragraph or line) object.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @param indexMap is a pointer to an array of <code>ubidi_getLength()</code>
|
||||
* indexes which will reflect the reordering of the characters.
|
||||
* The array does not need to be initialized.<p>
|
||||
* The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.<p>
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see ubidi_getLogicalMap
|
||||
* @see ubidi_getLogicalIndex
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_getVisualMap(UBiDi *pBiDi, UTextOffset *indexMap, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* This is a convenience function that does not use a UBiDi object.
|
||||
* It is intended to be used for when an application has determined the levels
|
||||
* of objects (character sequences) and just needs to have them reordered (L2).
|
||||
* This is equivalent to using <code>ubidi_getLogicalMap</code> on a
|
||||
* <code>UBiDi</code> object.
|
||||
*
|
||||
* @param levels is an array with <code>length</code> levels that have been determined by
|
||||
* the application.
|
||||
*
|
||||
* @param length is the number of levels in the array, or, semantically,
|
||||
* the number of objects to be reordered.
|
||||
* It must be <code>length>0</code>.
|
||||
*
|
||||
* @param indexMap is a pointer to an array of <code>length</code>
|
||||
* indexes which will reflect the reordering of the characters.
|
||||
* The array does not need to be initialized.<p>
|
||||
* The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_reorderLogical(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap);
|
||||
|
||||
/**
|
||||
* This is a convenience function that does not use a UBiDi object.
|
||||
* It is intended to be used for when an application has determined the levels
|
||||
* of objects (character sequences) and just needs to have them reordered (L2).
|
||||
* This is equivalent to using <code>ubidi_getVisualMap</code> on a
|
||||
* <code>UBiDi</code> object.
|
||||
*
|
||||
* @param levels is an array with <code>length</code> levels that have been determined by
|
||||
* the application.
|
||||
*
|
||||
* @param length is the number of levels in the array, or, semantically,
|
||||
* the number of objects to be reordered.
|
||||
* It must be <code>length>0</code>.
|
||||
*
|
||||
* @param indexMap is a pointer to an array of <code>length</code>
|
||||
* indexes which will reflect the reordering of the characters.
|
||||
* The array does not need to be initialized.<p>
|
||||
* The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_reorderVisual(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap);
|
||||
|
||||
/**
|
||||
* Invert an index map.
|
||||
* The one-to-one index mapping of the first map is inverted and written to
|
||||
* the second one.
|
||||
*
|
||||
* @param srcMap is an array with <code>length</code> indexes
|
||||
* which define the original mapping.
|
||||
*
|
||||
* @param destMap is an array with <code>length</code> indexes
|
||||
* which will be filled with the inverse mapping.
|
||||
*
|
||||
* @param length is the length of each array.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_invertMap(const UTextOffset *srcMap, UTextOffset *destMap, UTextOffset length);
|
||||
|
||||
/**
|
||||
* @name Sample code for the ICU BIDI API
|
||||
*
|
||||
* <h2>Rendering a paragraph with the ICU BiDi API</h2>
|
||||
*
|
||||
* This is (hypothetical) sample code that illustrates
|
||||
* how the ICU BiDi API could be used to render a paragraph of text.
|
||||
* Rendering code depends highly on the graphics system,
|
||||
* therefore this sample code must make a lot of assumptions,
|
||||
* which may or may not match any existing graphics system's properties.
|
||||
*
|
||||
* <p>The basic assumptions are:</p>
|
||||
* <ul>
|
||||
* <li>Rendering is done from left to right on a horizontal line.</li>
|
||||
* <li>A run of single-style, unidirectional text can be rendered at once.</li>
|
||||
* <li>Such a run of text is passed to the graphics system with
|
||||
* characters (code units) in logical order.</li>
|
||||
* <li>The line-breaking algorithm is very complicated
|
||||
* and Locale-dependent -
|
||||
* and therefore its implementation omitted from this sample code.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <pre>
|
||||
* #include "ubidi.h"
|
||||
*
|
||||
* typedef enum {
|
||||
* styleNormal=0, styleSelected=1,
|
||||
* styleBold=2, styleItalics=4,
|
||||
* styleSuper=8, styleSub=16
|
||||
* } Style;
|
||||
*
|
||||
* typedef struct { UTextOffset limit; Style style; } StyleRun;
|
||||
*
|
||||
* int getTextWidth(const UChar *text, UTextOffset start, UTextOffset limit,
|
||||
* const StyleRun *styleRuns, int styleRunCount);
|
||||
*
|
||||
* // set *pLimit and *pStyleRunLimit for a line
|
||||
* // from text[start] and from styleRuns[styleRunStart]
|
||||
* // using ubidi_getLogicalRun(para, ...)
|
||||
* void getLineBreak(const UChar *text, UTextOffset start, UTextOffset *pLimit,
|
||||
* UBiDi *para,
|
||||
* const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit,
|
||||
* int *pLineWidth);
|
||||
*
|
||||
* // render runs on a line sequentially, always from left to right
|
||||
*
|
||||
* // prepare rendering a new line
|
||||
* void startLine(UBiDiDirection textDirection, int lineWidth);
|
||||
*
|
||||
* // render a run of text and advance to the right by the run width
|
||||
* // the text[start..limit-1] is always in logical order
|
||||
* void renderRun(const UChar *text, UTextOffset start, UTextOffset limit,
|
||||
* UBiDiDirection textDirection, Style style);
|
||||
*
|
||||
* // We could compute a cross-product
|
||||
* // from the style runs with the directional runs
|
||||
* // and then reorder it.
|
||||
* // Instead, here we iterate over each run type
|
||||
* // and render the intersections -
|
||||
* // with shortcuts in simple (and common) cases.
|
||||
* // renderParagraph() is the main function.
|
||||
*
|
||||
* // render a directional run with
|
||||
* // (possibly) multiple style runs intersecting with it
|
||||
* void renderDirectionalRun(const UChar *text,
|
||||
* UTextOffset start, UTextOffset limit,
|
||||
* UBiDiDirection direction,
|
||||
* const StyleRun *styleRuns, int styleRunCount) {
|
||||
* int i;
|
||||
*
|
||||
* // iterate over style runs
|
||||
* if(direction==UBIDI_LTR) {
|
||||
* int styleLimit;
|
||||
*
|
||||
* for(i=0; i<styleRunCount; ++i) {
|
||||
* styleLimit=styleRun[i].limit;
|
||||
* if(start<styleLimit) {
|
||||
* if(styleLimit>limit) { styleLimit=limit; }
|
||||
* renderRun(text, start, styleLimit,
|
||||
* direction, styleRun[i].style);
|
||||
* if(styleLimit==limit) { break; }
|
||||
* start=styleLimit;
|
||||
* }
|
||||
* }
|
||||
* } else {
|
||||
* int styleStart;
|
||||
*
|
||||
* for(i=styleRunCount-1; i>=0; --i) {
|
||||
* if(i>0) {
|
||||
* styleStart=styleRun[i-1].limit;
|
||||
* } else {
|
||||
* styleStart=0;
|
||||
* }
|
||||
* if(limit>=styleStart) {
|
||||
* if(styleStart<start) { styleStart=start; }
|
||||
* renderRun(text, styleStart, limit,
|
||||
* direction, styleRun[i].style);
|
||||
* if(styleStart==start) { break; }
|
||||
* limit=styleStart;
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* // the line object represents text[start..limit-1]
|
||||
* void renderLine(UBiDi *line, const UChar *text,
|
||||
* UTextOffset start, UTextOffset limit,
|
||||
* const StyleRun *styleRuns, int styleRunCount) {
|
||||
* UBiDiDirection direction=ubidi_getDirection(line);
|
||||
* if(direction!=UBIDI_MIXED) {
|
||||
* // unidirectional
|
||||
* if(styleRunCount<=1) {
|
||||
* renderRun(text, start, limit, direction, styleRuns[0].style);
|
||||
* } else {
|
||||
* renderDirectionalRun(text, start, limit,
|
||||
* direction, styleRuns, styleRunCount);
|
||||
* }
|
||||
* } else {
|
||||
* // mixed-directional
|
||||
* UTextOffset count, i, length;
|
||||
* UBiDiLevel level;
|
||||
*
|
||||
* count=ubidi_countRuns(para, pErrorCode);
|
||||
* if(U_SUCCESS(*pErrorCode)) {
|
||||
* if(styleRunCount<=1) {
|
||||
* Style style=styleRuns[0].style;
|
||||
*
|
||||
* // iterate over directional runs
|
||||
* for(i=0; i<count; ++i) {
|
||||
* direction=ubidi_getVisualRun(para, i, &start, &length);
|
||||
* renderRun(text, start, start+length, direction, style);
|
||||
* }
|
||||
* } else {
|
||||
* UTextOffset j;
|
||||
*
|
||||
* // iterate over both directional and style runs
|
||||
* for(i=0; i<count; ++i) {
|
||||
* direction=ubidi_getVisualRun(line, i, &start, &length);
|
||||
* renderDirectionalRun(text, start, start+length,
|
||||
* direction, styleRuns, styleRunCount);
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* void renderParagraph(const UChar *text, UTextOffset length,
|
||||
* UBiDiDirection textDirection,
|
||||
* const StyleRun *styleRuns, int styleRunCount,
|
||||
* int lineWidth,
|
||||
* UErrorCode *pErrorCode) {
|
||||
* UBiDi *para;
|
||||
*
|
||||
* if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) {
|
||||
* return;
|
||||
* }
|
||||
*
|
||||
* para=ubidi_openSized(length, 0, pErrorCode);
|
||||
* if(para==NULL) { return; }
|
||||
*
|
||||
* ubidi_setPara(para, text, length,
|
||||
* textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR,
|
||||
* NULL, pErrorCode);
|
||||
* if(U_SUCCESS(*pErrorCode)) {
|
||||
* UBiDiLevel paraLevel=1&ubidi_getParaLevel(para);
|
||||
* StyleRun styleRun={ length, styleNormal };
|
||||
* int width;
|
||||
*
|
||||
* if(styleRuns==NULL || styleRunCount<=0) {
|
||||
* styleRunCount=1;
|
||||
* styleRuns=&styleRun;
|
||||
* }
|
||||
*
|
||||
* // assume styleRuns[styleRunCount-1].limit>=length
|
||||
*
|
||||
* width=getTextWidth(text, 0, length, styleRuns, styleRunCount);
|
||||
* if(width<=lineWidth) {
|
||||
* // everything fits onto one line
|
||||
*
|
||||
* // prepare rendering a new line from either left or right
|
||||
* startLine(paraLevel, width);
|
||||
*
|
||||
* renderLine(para, text, 0, length,
|
||||
* styleRuns, styleRunCount);
|
||||
* } else {
|
||||
* UBiDi *line;
|
||||
*
|
||||
* // we need to render several lines
|
||||
* line=ubidi_openSized(length, 0, pErrorCode);
|
||||
* if(line!=NULL) {
|
||||
* UTextOffset start=0, limit;
|
||||
* int styleRunStart=0, styleRunLimit;
|
||||
*
|
||||
* for(;;) {
|
||||
* limit=length;
|
||||
* styleRunLimit=styleRunCount;
|
||||
* getLineBreak(text, start, &limit, para,
|
||||
* styleRuns, styleRunStart, &styleRunLimit,
|
||||
* &width);
|
||||
* ubidi_setLine(para, start, limit, line, pErrorCode);
|
||||
* if(U_SUCCESS(*pErrorCode)) {
|
||||
* // prepare rendering a new line
|
||||
* // from either left or right
|
||||
* startLine(paraLevel, width);
|
||||
*
|
||||
* renderLine(line, text, start, limit,
|
||||
* styleRuns+styleRunStart,
|
||||
* styleRunLimit-styleRunStart);
|
||||
* }
|
||||
* if(limit==length) { break; }
|
||||
* start=limit;
|
||||
* styleRunStart=styleRunLimit-1;
|
||||
* if(start>=styleRuns[styleRunStart].limit) {
|
||||
* ++styleRunStart;
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* ubidi_close(line);
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* ubidi_close(para);
|
||||
* }
|
||||
* </pre>
|
||||
*/
|
||||
BIDI_SAMPLE_CODE
|
||||
/*@{*/
|
||||
/*@}*/
|
||||
|
||||
/*@}*/
|
||||
|
||||
#endif
|
||||
#error Please include unicode/ubidi.h instead
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
/* set import/export definitions */
|
||||
#ifdef U_COMMON_IMPLEMENTATION
|
||||
|
||||
#include "utypes.h"
|
||||
#include "uchar.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uchar.h"
|
||||
|
||||
/* miscellaneous definitions ------------------------------------------------ */
|
||||
|
||||
|
|
|
@ -23,10 +23,10 @@
|
|||
#endif
|
||||
|
||||
#include "cmemory.h"
|
||||
#include "utypes.h"
|
||||
#include "ustring.h"
|
||||
#include "uchar.h"
|
||||
#include "ubidi.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/ubidi.h"
|
||||
#include "ubidiimp.h"
|
||||
|
||||
/*
|
||||
|
@ -254,9 +254,9 @@ ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
|
|||
UBiDiLevel *levels=pBiDi->levelsMemory;
|
||||
|
||||
if(start>0 && levels!=pBiDi->levels) {
|
||||
icu_memcpy(levels, pBiDi->levels, start);
|
||||
uprv_memcpy(levels, pBiDi->levels, start);
|
||||
}
|
||||
icu_memset(levels+start, pBiDi->paraLevel, length-start);
|
||||
uprv_memset(levels+start, pBiDi->paraLevel, length-start);
|
||||
|
||||
/* this new levels array is set for the line and reflects the WS run */
|
||||
pBiDi->trailingWSStart=length;
|
||||
|
|
|
@ -23,11 +23,11 @@
|
|||
* 11/11/1999 weiv added u_isalnum(), cleaned comments
|
||||
********************************************************************************************
|
||||
*/
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "ucmp16.h"
|
||||
#include "ucmp8.h"
|
||||
#include "umutex.h"
|
||||
#include "uchar.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "cmemory.h"
|
||||
#include <string.h>
|
||||
|
||||
|
@ -5263,7 +5263,7 @@ createDirTables()
|
|||
const char* u_getVersion()
|
||||
{
|
||||
int32_t len=strlen(UNICODE_VERSION) + strlen("Unicode Version ");
|
||||
_ucdVersion=(char*)icu_realloc(_ucdVersion, len + 1 );
|
||||
_ucdVersion=(char*)uprv_realloc(_ucdVersion, len + 1 );
|
||||
strcpy(_ucdVersion, "Unicode Version ");
|
||||
strcat(_ucdVersion, UNICODE_VERSION);
|
||||
return _ucdVersion;
|
||||
|
|
|
@ -1,841 +1 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1997-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
* File UCHAR.H
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 04/02/97 aliu Creation.
|
||||
* 03/29/99 helena Updated for C APIs.
|
||||
* 4/15/99 Madhu Updated for C Implementation and Javadoc
|
||||
* 5/20/99 Madhu Added the function u_getVersion()
|
||||
* 8/19/1999 srl Upgraded scripts to Unicode 3.0
|
||||
* 8/27/1999 schererm UCharDirection constants: U_...
|
||||
* 11/11/1999 weiv added u_isalnum(), cleaned comments
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef UCHAR_H
|
||||
#define UCHAR_H
|
||||
|
||||
#include "utypes.h"
|
||||
/*===========================================================================*/
|
||||
/* Unicode version number */
|
||||
/*===========================================================================*/
|
||||
#define UNICODE_VERSION "3.0.0"
|
||||
|
||||
/**
|
||||
* The Unicode C API allows you to query the properties associated with individual
|
||||
* Unicode character values.
|
||||
* <p>
|
||||
* The Unicode character information, provided implicitly by the
|
||||
* Unicode character encoding standard, includes information about the script
|
||||
* (for example, symbols or control characters) to which the character belongs,
|
||||
* as well as semantic information such as whether a character is a digit or
|
||||
* uppercase, lowercase, or uncased.
|
||||
* <P>
|
||||
*/
|
||||
|
||||
/**
|
||||
* Constants.
|
||||
*/
|
||||
|
||||
/**
|
||||
* The minimum value a UChar can have. The lowest value a
|
||||
* UChar can have is 0x0000.
|
||||
*/
|
||||
static UChar UCHAR_MIN_VALUE;
|
||||
/**
|
||||
* The maximum value a UChar can have. The greatest value a
|
||||
* UChar can have is 0xffff.
|
||||
*/
|
||||
|
||||
static UChar UCHAR_MAX_VALUE;
|
||||
|
||||
/**
|
||||
* Data for enumerated Unicode general category types
|
||||
*/
|
||||
enum UCharCategory
|
||||
{
|
||||
/** */
|
||||
U_UNASSIGNED = 0,
|
||||
/** */
|
||||
U_UPPERCASE_LETTER = 1,
|
||||
/** */
|
||||
U_LOWERCASE_LETTER = 2,
|
||||
/** */
|
||||
U_TITLECASE_LETTER = 3,
|
||||
/** */
|
||||
U_MODIFIER_LETTER = 4,
|
||||
/** */
|
||||
U_OTHER_LETTER = 5,
|
||||
/** */
|
||||
U_NON_SPACING_MARK = 6,
|
||||
/** */
|
||||
U_ENCLOSING_MARK = 7,
|
||||
/** */
|
||||
U_COMBINING_SPACING_MARK = 8,
|
||||
/** */
|
||||
U_DECIMAL_DIGIT_NUMBER = 9,
|
||||
/** */
|
||||
U_LETTER_NUMBER = 10,
|
||||
/** */
|
||||
U_OTHER_NUMBER = 11,
|
||||
/** */
|
||||
U_SPACE_SEPARATOR = 12,
|
||||
/** */
|
||||
U_LINE_SEPARATOR = 13,
|
||||
/** */
|
||||
U_PARAGRAPH_SEPARATOR = 14,
|
||||
/** */
|
||||
U_CONTROL_CHAR = 15,
|
||||
/** */
|
||||
U_FORMAT_CHAR = 16,
|
||||
/** */
|
||||
U_PRIVATE_USE_CHAR = 17,
|
||||
/** */
|
||||
U_SURROGATE = 18,
|
||||
/** */
|
||||
U_DASH_PUNCTUATION = 19,
|
||||
/** */
|
||||
U_START_PUNCTUATION = 20,
|
||||
/** */
|
||||
U_END_PUNCTUATION = 21,
|
||||
/** */
|
||||
U_CONNECTOR_PUNCTUATION = 22,
|
||||
/** */
|
||||
U_OTHER_PUNCTUATION = 23,
|
||||
/** */
|
||||
U_MATH_SYMBOL = 24,
|
||||
/** */
|
||||
U_CURRENCY_SYMBOL = 25,
|
||||
/** */
|
||||
U_MODIFIER_SYMBOL = 26,
|
||||
/** */
|
||||
U_OTHER_SYMBOL = 27,
|
||||
/** */
|
||||
U_INITIAL_PUNCTUATION = 28,
|
||||
/** */
|
||||
U_FINAL_PUNCTUATION = 29,
|
||||
/** */
|
||||
U_GENERAL_OTHER_TYPES = 30,
|
||||
/** */
|
||||
U_CHAR_CATEGORY_COUNT
|
||||
};
|
||||
|
||||
typedef enum UCharCategory UCharCategory;
|
||||
/**
|
||||
* This specifies the language directional property of a character set.
|
||||
*/
|
||||
enum UCharDirection {
|
||||
/** */
|
||||
U_LEFT_TO_RIGHT = 0,
|
||||
/** */
|
||||
U_RIGHT_TO_LEFT = 1,
|
||||
/** */
|
||||
U_EUROPEAN_NUMBER = 2,
|
||||
/** */
|
||||
U_EUROPEAN_NUMBER_SEPARATOR = 3,
|
||||
/** */
|
||||
U_EUROPEAN_NUMBER_TERMINATOR = 4,
|
||||
/** */
|
||||
U_ARABIC_NUMBER = 5,
|
||||
/** */
|
||||
U_COMMON_NUMBER_SEPARATOR = 6,
|
||||
/** */
|
||||
U_BLOCK_SEPARATOR = 7,
|
||||
/** */
|
||||
U_SEGMENT_SEPARATOR = 8,
|
||||
/** */
|
||||
U_WHITE_SPACE_NEUTRAL = 9,
|
||||
/** */
|
||||
U_OTHER_NEUTRAL = 10,
|
||||
/** */
|
||||
U_LEFT_TO_RIGHT_EMBEDDING = 11,
|
||||
/** */
|
||||
U_LEFT_TO_RIGHT_OVERRIDE = 12,
|
||||
/** */
|
||||
U_RIGHT_TO_LEFT_ARABIC = 13,
|
||||
/** */
|
||||
U_RIGHT_TO_LEFT_EMBEDDING = 14,
|
||||
/** */
|
||||
U_RIGHT_TO_LEFT_OVERRIDE = 15,
|
||||
/** */
|
||||
U_POP_DIRECTIONAL_FORMAT = 16,
|
||||
/** */
|
||||
U_DIR_NON_SPACING_MARK = 17,
|
||||
/** */
|
||||
U_BOUNDARY_NEUTRAL = 18,
|
||||
/** */
|
||||
U_CHAR_DIRECTION_COUNT
|
||||
};
|
||||
|
||||
typedef enum UCharDirection UCharDirection;
|
||||
/**
|
||||
* Script range as defined in the Unicode standard.
|
||||
*/
|
||||
|
||||
/* Generated from Unicode Data files */
|
||||
enum UCharScript {
|
||||
/* Script names */
|
||||
/** */
|
||||
U_BASIC_LATIN,
|
||||
/** */
|
||||
U_LATIN_1_SUPPLEMENT,
|
||||
/** */
|
||||
U_LATIN_EXTENDED_A,
|
||||
/** */
|
||||
U_LATIN_EXTENDED_B,
|
||||
/** */
|
||||
U_IPA_EXTENSIONS,
|
||||
/** */
|
||||
U_SPACING_MODIFIER_LETTERS,
|
||||
/** */
|
||||
U_COMBINING_DIACRITICAL_MARKS,
|
||||
/** */
|
||||
U_GREEK,
|
||||
/** */
|
||||
U_CYRILLIC,
|
||||
/** */
|
||||
U_ARMENIAN,
|
||||
/** */
|
||||
U_HEBREW,
|
||||
/** */
|
||||
U_ARABIC,
|
||||
/** */
|
||||
U_SYRIAC,
|
||||
/** */
|
||||
U_THAANA,
|
||||
/** */
|
||||
U_DEVANAGARI,
|
||||
/** */
|
||||
U_BENGALI,
|
||||
/** */
|
||||
U_GURMUKHI,
|
||||
/** */
|
||||
U_GUJARATI,
|
||||
/** */
|
||||
U_ORIYA,
|
||||
/** */
|
||||
U_TAMIL,
|
||||
/** */
|
||||
U_TELUGU,
|
||||
/** */
|
||||
U_KANNADA,
|
||||
/** */
|
||||
U_MALAYALAM,
|
||||
/** */
|
||||
U_SINHALA,
|
||||
/** */
|
||||
U_THAI,
|
||||
/** */
|
||||
U_LAO,
|
||||
/** */
|
||||
U_TIBETAN,
|
||||
/** */
|
||||
U_MYANMAR,
|
||||
/** */
|
||||
U_GEORGIAN,
|
||||
/** */
|
||||
U_HANGUL_JAMO,
|
||||
/** */
|
||||
U_ETHIOPIC,
|
||||
/** */
|
||||
U_CHEROKEE,
|
||||
/** */
|
||||
U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
|
||||
/** */
|
||||
U_OGHAM,
|
||||
/** */
|
||||
U_RUNIC,
|
||||
/** */
|
||||
U_KHMER,
|
||||
/** */
|
||||
U_MONGOLIAN,
|
||||
/** */
|
||||
U_LATIN_EXTENDED_ADDITIONAL,
|
||||
/** */
|
||||
U_GREEK_EXTENDED,
|
||||
/** */
|
||||
U_GENERAL_PUNCTUATION,
|
||||
/** */
|
||||
U_SUPERSCRIPTS_AND_SUBSCRIPTS,
|
||||
/** */
|
||||
U_CURRENCY_SYMBOLS,
|
||||
/** */
|
||||
U_COMBINING_MARKS_FOR_SYMBOLS,
|
||||
/** */
|
||||
U_LETTERLIKE_SYMBOLS,
|
||||
/** */
|
||||
U_NUMBER_FORMS,
|
||||
/** */
|
||||
U_ARROWS,
|
||||
/** */
|
||||
U_MATHEMATICAL_OPERATORS,
|
||||
/** */
|
||||
U_MISCELLANEOUS_TECHNICAL,
|
||||
/** */
|
||||
U_CONTROL_PICTURES,
|
||||
/** */
|
||||
U_OPTICAL_CHARACTER_RECOGNITION,
|
||||
/** */
|
||||
U_ENCLOSED_ALPHANUMERICS,
|
||||
/** */
|
||||
U_BOX_DRAWING,
|
||||
/** */
|
||||
U_BLOCK_ELEMENTS,
|
||||
/** */
|
||||
U_GEOMETRIC_SHAPES,
|
||||
/** */
|
||||
U_MISCELLANEOUS_SYMBOLS,
|
||||
/** */
|
||||
U_DINGBATS,
|
||||
/** */
|
||||
U_BRAILLE_PATTERNS,
|
||||
/** */
|
||||
U_CJK_RADICALS_SUPPLEMENT,
|
||||
/** */
|
||||
U_KANGXI_RADICALS,
|
||||
/** */
|
||||
U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
|
||||
/** */
|
||||
U_CJK_SYMBOLS_AND_PUNCTUATION,
|
||||
/** */
|
||||
U_HIRAGANA,
|
||||
/** */
|
||||
U_KATAKANA,
|
||||
/** */
|
||||
U_BOPOMOFO,
|
||||
/** */
|
||||
U_HANGUL_COMPATIBILITY_JAMO,
|
||||
/** */
|
||||
U_KANBUN,
|
||||
/** */
|
||||
U_BOPOMOFO_EXTENDED,
|
||||
/** */
|
||||
U_ENCLOSED_CJK_LETTERS_AND_MONTHS,
|
||||
/** */
|
||||
U_CJK_COMPATIBILITY,
|
||||
/** */
|
||||
U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
|
||||
/** */
|
||||
U_CJK_UNIFIED_IDEOGRAPHS,
|
||||
/** */
|
||||
U_YI_SYLLABLES,
|
||||
/** */
|
||||
U_YI_RADICALS,
|
||||
/** */
|
||||
U_HANGUL_SYLLABLES,
|
||||
/** */
|
||||
U_HIGH_SURROGATES,
|
||||
/** */
|
||||
U_HIGH_PRIVATE_USE_SURROGATES,
|
||||
/** */
|
||||
U_LOW_SURROGATES,
|
||||
/** */
|
||||
U_PRIVATE_USE_AREA,
|
||||
/** */
|
||||
U_CJK_COMPATIBILITY_IDEOGRAPHS,
|
||||
/** */
|
||||
U_ALPHABETIC_PRESENTATION_FORMS,
|
||||
/** */
|
||||
U_ARABIC_PRESENTATION_FORMS_A,
|
||||
/** */
|
||||
U_COMBINING_HALF_MARKS,
|
||||
/** */
|
||||
U_CJK_COMPATIBILITY_FORMS,
|
||||
/** */
|
||||
U_SMALL_FORM_VARIANTS,
|
||||
/** */
|
||||
U_ARABIC_PRESENTATION_FORMS_B,
|
||||
/** */
|
||||
U_SPECIALS,
|
||||
/** */
|
||||
U_HALFWIDTH_AND_FULLWIDTH_FORMS,
|
||||
/** */
|
||||
U_CHAR_SCRIPT_COUNT,
|
||||
/** */
|
||||
U_NO_SCRIPT=U_CHAR_SCRIPT_COUNT
|
||||
};
|
||||
typedef enum UCharScript UCharScript;
|
||||
|
||||
/**
|
||||
* Values returned by the u_getCellWidth() function.
|
||||
*/
|
||||
enum UCellWidth
|
||||
{
|
||||
/** */
|
||||
U_ZERO_WIDTH = 0,
|
||||
/** */
|
||||
U_HALF_WIDTH = 1,
|
||||
/** */
|
||||
U_FULL_WIDTH = 2,
|
||||
/** */
|
||||
U_NEUTRAL_WIDTH = 3,
|
||||
/** */
|
||||
U_CELL_WIDTH_COUNT
|
||||
};
|
||||
|
||||
typedef enum UCellWidth UCellWidth;
|
||||
|
||||
/**
|
||||
* Selector constants for u_charName().
|
||||
* <code>u_charName() returns either the "modern" name of a
|
||||
* Unicode character or the name that was defined in
|
||||
* Unicode version 1.0, before the Unicode standard merged
|
||||
* with ISO-10646.
|
||||
*
|
||||
* @see u_charName()
|
||||
*/
|
||||
enum UCharNameChoice {
|
||||
U_UNICODE_CHAR_NAME,
|
||||
U_UNICODE_10_CHAR_NAME,
|
||||
U_CHAR_NAME_CHOICE_COUNT
|
||||
};
|
||||
|
||||
typedef enum UCharNameChoice UCharNameChoice;
|
||||
|
||||
/**
|
||||
* Functions to classify characters.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Determines whether the specified UChar is a lowercase character
|
||||
* according to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is lowercase; false otherwise.
|
||||
* @see UNICODE_VERSION
|
||||
* @see u_isupper()
|
||||
* @see u_istitle()
|
||||
* @see u_islower()
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_islower(UChar c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is an uppercase character
|
||||
* according to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is uppercase; false otherwise.
|
||||
* @see u_islower()
|
||||
* @see u_istitle
|
||||
* @see u_tolower()
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isupper(UChar c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is a titlecase character
|
||||
* according to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is titlecase; false otherwise.
|
||||
* @see u_isupper()
|
||||
* @see u_islower()
|
||||
* @see u_totitle()
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_istitle(UChar c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is a digit according to Unicode
|
||||
* 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is a digit; false otherwise.
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isdigit(UChar c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is an alphanumeric character
|
||||
* (letter or digit)according to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is a letter or a digit; false otherwise.
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isalnum(UChar c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified numeric value is actually a defined character
|
||||
* according to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character has a defined Unicode meaning; false otherwise.
|
||||
*
|
||||
* @see u_isdigit()
|
||||
* @see u_isalpha()
|
||||
* @see u_isalnum()
|
||||
* @see u_isupper()
|
||||
* @see u_islower()
|
||||
* @see u_istitle()
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isdefined(UChar c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is a letter
|
||||
* according to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is a letter; false otherwise.
|
||||
*
|
||||
* @see u_isdigit()
|
||||
* @see u_isalnum()
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isalpha(UChar c);
|
||||
|
||||
/**
|
||||
* Determines if the specified character is a space character or not.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is a space character; false otherwise.
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isspace(UChar c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is a control character or not.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the Unicode character is a control character; false otherwise.
|
||||
*
|
||||
* @see u_isprint()
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_iscntrl(UChar c);
|
||||
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is a printable character according
|
||||
* to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the Unicode character is a printable character; false otherwise.
|
||||
*
|
||||
* @see u_iscntrl()
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isprint(UChar c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is of the base form according
|
||||
* to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the Unicode character is of the base form; false otherwise.
|
||||
*
|
||||
* @see u_isalpha()
|
||||
* @see u_isdigit()
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isbase(UChar c);
|
||||
/**
|
||||
* Returns the linguistic direction property of a character.
|
||||
* <P>
|
||||
* Returns the linguistic direction property of a character.
|
||||
* For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
|
||||
* property.
|
||||
* @see UCharDirection
|
||||
*/
|
||||
U_CAPI UCharDirection U_EXPORT2
|
||||
u_charDirection(UChar c);
|
||||
|
||||
/**
|
||||
* Returns a value indicating the display-cell width of the character
|
||||
* when used in Asian text, according to the Unicode standard (see p. 6-130
|
||||
* of The Unicode Standard, Version 2.0). The results for various characters
|
||||
* are as follows:
|
||||
* <P>
|
||||
* ZERO_WIDTH: Characters which are considered to take up no display-cell space:
|
||||
* control characters
|
||||
* format characters
|
||||
* line and paragraph separators
|
||||
* non-spacing marks
|
||||
* combining Hangul jungseong
|
||||
* combining Hangul jongseong
|
||||
* unassigned Unicode values
|
||||
* <P>
|
||||
* HALF_WIDTH: Characters which take up half a cell in standard Asian text:
|
||||
* all characters in the General Scripts Area except combining Hangul choseong
|
||||
* and the characters called out specifically above as ZERO_WIDTH
|
||||
* alphabetic and Arabic presentation forms
|
||||
* halfwidth CJK punctuation
|
||||
* halfwidth Katakana
|
||||
* halfwidth Hangul Jamo
|
||||
* halfwidth forms, arrows, and shapes
|
||||
* <P>
|
||||
* FULL_WIDTH: Characters which take up a full cell in standard Asian text:
|
||||
* combining Hangul choseong
|
||||
* all characters in the CJK Phonetics and Symbols Area
|
||||
* all characters in the CJK Ideographs Area
|
||||
* all characters in the Hangul Syllables Area
|
||||
* CJK compatibility ideographs
|
||||
* CJK compatibility forms
|
||||
* small form variants
|
||||
* fullwidth ASCII
|
||||
* fullwidth punctuation and currency signs
|
||||
* <P>
|
||||
* NEUTRAL: Characters whose cell width is context-dependent:
|
||||
* all characters in the Symbols Area, except those specifically called out above
|
||||
* all characters in the Surrogates Area
|
||||
* all charcaters in the Private Use Area
|
||||
* <P>
|
||||
* For Korean text, this algorithm should work properly with properly normalized Korean
|
||||
* text. Precomposed Hangul syllables and non-combining jamo are all considered full-
|
||||
* width characters. For combining jamo, we treat we treat choseong (initial consonants)
|
||||
* as double-width characters and junseong (vowels) and jongseong (final consonants)
|
||||
* as non-spacing marks. This will work right in text that uses the precomposed
|
||||
* choseong characters instead of teo choseong characters in a row, and which uses the
|
||||
* choseong filler character at the beginning of syllables that don't have an initial
|
||||
* consonant. The results may be slightly off with Korean text following different
|
||||
* conventions.
|
||||
*/
|
||||
U_CAPI uint16_t U_EXPORT2
|
||||
u_charCellWidth(UChar c);
|
||||
|
||||
/**
|
||||
* Returns a value indicating a character category according to Unicode
|
||||
* 2.1.2.
|
||||
* @param c the character to be tested
|
||||
* @return a value of type int, the character category.
|
||||
* @see UCharCategory
|
||||
*/
|
||||
U_CAPI int8_t U_EXPORT2
|
||||
u_charType(UChar c);
|
||||
|
||||
/**
|
||||
* Retrives the decimal numeric value of a digit character.
|
||||
*
|
||||
* @param c the digit character for which to get the numeric value
|
||||
* @return the numeric value of ch in decimal radix. This method returns
|
||||
* -1 if ch is not a valid digit character.
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_charDigitValue(UChar c);
|
||||
|
||||
/**
|
||||
* Returns the script associated with a character.
|
||||
*
|
||||
* @see #UCharScript
|
||||
*/
|
||||
U_CAPI UCharScript U_EXPORT2
|
||||
u_charScript(UChar ch);
|
||||
|
||||
/**
|
||||
* Retrieve the name of a Unicode character.
|
||||
* Depending on <code>nameChoice</code>, the character name written
|
||||
* into the buffer is the "modern" name or the name that was defined
|
||||
* in Unicode version 1.0.
|
||||
* The name contains only "invariant" characters
|
||||
* like A-Z, 0-9, space, and '-'.
|
||||
*
|
||||
* @param code The character (code point) for which to get the name.
|
||||
* It must be <code>0<=code<0x10ffff</code>.
|
||||
* @param nameChoice Selector for which name to get.
|
||||
* @param buffer Destination address for copying the name.
|
||||
* @param bufferLength <code>==sizeof(buffer)</code>
|
||||
* @param pErrorCode Pointer to a UErrorCode variable;
|
||||
* check for <code>U_SUCCESS()</code> after <code>u_charName()</code>
|
||||
* returns.
|
||||
*
|
||||
* @see UCharNameChoice
|
||||
*/
|
||||
U_CAPI UTextOffset U_EXPORT2
|
||||
u_charName(uint32_t code, UCharNameChoice nameChoice,
|
||||
char *buffer, UTextOffset bufferLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* The following functions are java specific.
|
||||
*/
|
||||
/**
|
||||
* A convenience method for determining if a Unicode character
|
||||
* is allowed to start in a Unicode identifier.
|
||||
* A character may start a Unicode identifier if and only if
|
||||
* it is a letter.
|
||||
*
|
||||
* @param c the Unicode character.
|
||||
* @return TRUE if the character may start a Unicode identifier;
|
||||
* FALSE otherwise.
|
||||
* @see u_isalpha
|
||||
* @see u_isIDPart
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isIDStart(UChar c);
|
||||
/**
|
||||
* A convenience method for determining if a Unicode character
|
||||
* may be part of a Unicode identifier other than the starting
|
||||
* character.
|
||||
* <P>
|
||||
* A character may be part of a Unicode identifier if and only if
|
||||
* it is one of the following:
|
||||
* <ul>
|
||||
* <li> a letter
|
||||
* <li> a connecting punctuation character (such as "_").
|
||||
* <li> a digit
|
||||
* <li> a numeric letter (such as a Roman numeral character)
|
||||
* <li> a combining mark
|
||||
* <li> a non-spacing mark
|
||||
* <li> an ignorable control character
|
||||
* </ul>
|
||||
*
|
||||
* @param c the Unicode character.
|
||||
* @return TRUE if the character may be part of a Unicode identifier;
|
||||
* FALSE otherwise.
|
||||
* @see u_isIDIgnorable
|
||||
* @see u_isIDStart
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isIDPart(UChar c);
|
||||
/**
|
||||
* A convenience method for determining if a Unicode character
|
||||
* should be regarded as an ignorable character
|
||||
* in a Unicode identifier.
|
||||
* <P>
|
||||
* The following Unicode characters are ignorable in a
|
||||
* Unicode identifier:
|
||||
* <table>
|
||||
* <tr><td>0x0000 through 0x0008,</td>
|
||||
* <td>ISO control characters that</td></tr>
|
||||
* <tr><td>0x000E through 0x001B,</td> <td>are not whitespace</td></tr>
|
||||
* <tr><td>and 0x007F through 0x009F</td></tr>
|
||||
* <tr><td>0x200C through 0x200F</td> <td>join controls</td></tr>
|
||||
* <tr><td>0x200A through 0x200E</td> <td>bidirectional controls</td></tr>
|
||||
* <tr><td>0x206A through 0x206F</td> <td>format controls</td></tr>
|
||||
* <tr><td>0xFEFF</td> <td>zero-width no-break space</td></tr>
|
||||
* </table>
|
||||
*
|
||||
* @param c the Unicode character.
|
||||
* @return TRUE if the character may be part of a Unicode identifier;
|
||||
* FALSE otherwise.
|
||||
* @see u_isIDPart
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isIDIgnorable(UChar c);
|
||||
/**
|
||||
* A convenience method for determining if a Unicode character
|
||||
* is allowed as the first character in a Java identifier.
|
||||
* <P>
|
||||
* A character may start a Java identifier if and only if
|
||||
* it is one of the following:
|
||||
* <ul>
|
||||
* <li> a letter
|
||||
* <li> a currency symbol (such as "$")
|
||||
* <li> a connecting punctuation symbol (such as "_").
|
||||
* </ul>
|
||||
*
|
||||
* @param c the Unicode character.
|
||||
* @return TRUE if the character may start a Java identifier;
|
||||
* FALSE otherwise.
|
||||
* @see u_isJavaIDPart
|
||||
* @see u_isalpha
|
||||
* @see u_isIDStart
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isJavaIDStart(UChar c);
|
||||
/**
|
||||
* A convenience method for determining if a Unicode character
|
||||
* may be part of a Java identifier other than the starting
|
||||
* character.
|
||||
* <P>
|
||||
* A character may be part of a Java identifier if and only if
|
||||
* it is one of the following:
|
||||
* <ul>
|
||||
* <li> a letter
|
||||
* <li> a currency symbol (such as "$")
|
||||
* <li> a connecting punctuation character (such as "_").
|
||||
* <li> a digit
|
||||
* <li> a numeric letter (such as a Roman numeral character)
|
||||
* <li> a combining mark
|
||||
* <li> a non-spacing mark
|
||||
* <li> an ignorable control character
|
||||
* </ul>
|
||||
*
|
||||
* @param c the Unicode character.
|
||||
* @return TRUE if the character may be part of a Unicode identifier;
|
||||
* FALSE otherwise.
|
||||
* @see u_isIDIgnorable
|
||||
* @see u_isJavaIDStart
|
||||
* @see u_isalpha
|
||||
* @see u_isdigit
|
||||
* @see u_isIDPart
|
||||
*/
|
||||
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isJavaIDPart(UChar c);
|
||||
|
||||
/**
|
||||
* Functions to change character case.
|
||||
*/
|
||||
|
||||
/**
|
||||
* The given character is mapped to its lowercase equivalent according to
|
||||
* Unicode 2.1.2; if the character has no lowercase equivalent, the character
|
||||
* itself is returned.
|
||||
* <P>
|
||||
* A character has a lowercase equivalent if and only if a lowercase mapping
|
||||
* is specified for the character in the Unicode 2.1.2 attribute table.
|
||||
* <P>
|
||||
* u_tolower() only deals with the general letter case conversion.
|
||||
* For language specific case conversion behavior, use ustrToUpper().
|
||||
* For example, the case conversion for dot-less i and dotted I in Turkish,
|
||||
* or for final sigma in Greek.
|
||||
*
|
||||
* @param ch the character to be converted
|
||||
* @return the lowercase equivalent of the character, if any;
|
||||
* otherwise the character itself.
|
||||
*/
|
||||
U_CAPI UChar U_EXPORT2
|
||||
u_tolower(UChar c);
|
||||
|
||||
/**
|
||||
* The given character is mapped to its uppercase equivalent according to Unicode
|
||||
* 2.1.2; if the character has no uppercase equivalent, the character itself is
|
||||
* returned.
|
||||
* <P>
|
||||
* u_toupper() only deals with the general letter case conversion.
|
||||
* For language specific case conversion behavior, use ustrToUpper().
|
||||
* For example, the case conversion for dot-less i and dotted I in Turkish,
|
||||
* or ess-zed (i.e., "sharp S") in German.
|
||||
*
|
||||
* @param ch the character to be converted
|
||||
* @return the uppercase equivalent of the character, if any;
|
||||
* otherwise the character itself.
|
||||
*/
|
||||
U_CAPI UChar U_EXPORT2
|
||||
u_toupper(UChar c);
|
||||
/**
|
||||
* The given character is mapped to its titlecase equivalent according to Unicode
|
||||
* 2.1.2. There are only four Unicode characters that are truly titlecase forms
|
||||
* that are distinct from uppercase forms. As a rule, if a character has no
|
||||
* true titlecase equivalent, its uppercase equivalent is returned.
|
||||
* <P>
|
||||
* A character has a titlecase equivalent if and only if a titlecase mapping
|
||||
* is specified for the character in the Unicode 2.1.2 data.
|
||||
*
|
||||
* @param ch the character to be converted
|
||||
* @return the titlecase equivalent of the character, if any;
|
||||
* otherwise the character itself.
|
||||
*/
|
||||
U_CAPI UChar U_EXPORT2
|
||||
u_totitle(UChar c);
|
||||
|
||||
/**
|
||||
* The function is used to get the Unicode standard Version that is used.
|
||||
*
|
||||
* @return the Unicode stabdard Version number
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
u_getVersion(void);
|
||||
|
||||
#endif /*_UCHAR*/
|
||||
/*eof*/
|
||||
#error Please include unicode/uchar.h instead
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include "uchriter.h"
|
||||
#include "unicode/uchriter.h"
|
||||
|
||||
UCharCharacterIterator::UCharCharacterIterator(const UChar* text,
|
||||
int32_t textLength)
|
||||
|
|
|
@ -1,142 +1 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1998-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef UCHRITER_H
|
||||
#define UCHRITER_H
|
||||
|
||||
#include "utypes.h"
|
||||
#include "chariter.h"
|
||||
|
||||
|
||||
/**
|
||||
* A concrete subclass of CharacterIterator that iterates over the
|
||||
* characters in a UnicodeString. It's possible not only to create an
|
||||
* iterator that iterates over an entire UnicodeString, but also to
|
||||
* create only that iterates over only a subrange of a UnicodeString
|
||||
* (iterators over different subranges of the same UnicodeString don't
|
||||
* compare equal). */
|
||||
class U_COMMON_API UCharCharacterIterator : public CharacterIterator {
|
||||
public:
|
||||
/**
|
||||
* Create an iterator over the UnicodeString referred to by "text".
|
||||
* The iteration range is the whole string, and the starting
|
||||
* position is 0. */
|
||||
UCharCharacterIterator(const UChar* text, int32_t len);
|
||||
|
||||
/**
|
||||
* Copy constructor. The new iterator iterates over the same range
|
||||
* of the same string as "that", and its initial position is the
|
||||
* same as "that"'s current position. */
|
||||
UCharCharacterIterator(const UCharCharacterIterator& that);
|
||||
|
||||
/**
|
||||
* Destructor. */
|
||||
~UCharCharacterIterator();
|
||||
|
||||
/**
|
||||
* Assignment operator. *this is altered to iterate over the sane
|
||||
* range of the same string as "that", and refers to the same
|
||||
* character within that string as "that" does. */
|
||||
UCharCharacterIterator&
|
||||
operator=(const UCharCharacterIterator& that);
|
||||
|
||||
/**
|
||||
* Returns true if the iterators iterate over the same range of the
|
||||
* same string and are pointing at the same character. */
|
||||
virtual bool_t operator==(const CharacterIterator& that) const;
|
||||
|
||||
/**
|
||||
* Generates a hash code for this iterator. */
|
||||
virtual int32_t hashCode(void) const;
|
||||
|
||||
/**
|
||||
* Returns a new StringCharacterIterator referring to the same
|
||||
* character in the same range of the same string as this one. The
|
||||
* caller must delete the new iterator. */
|
||||
virtual CharacterIterator* clone(void) const;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first character in its
|
||||
* iteration range, and returns that character, */
|
||||
virtual UChar first(void);
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last character in its iteration
|
||||
* range, and returns that character. */
|
||||
virtual UChar last(void);
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the "position"-th character in the
|
||||
* UnicodeString the iterator refers to, and returns that character.
|
||||
* If the index is outside the iterator's iteration range, the
|
||||
* behavior of the iterator is undefined. */
|
||||
virtual UChar setIndex(UTextOffset pos);
|
||||
|
||||
/**
|
||||
* Returns the character the iterator currently refers to. */
|
||||
virtual UChar current(void) const;
|
||||
|
||||
/**
|
||||
* Advances to the next character in the iteration range (toward
|
||||
* last()), and returns that character. If there are no more
|
||||
* characters to return, returns DONE. */
|
||||
virtual UChar next(void);
|
||||
|
||||
/**
|
||||
* Advances to the previous character in the iteration rance (toward
|
||||
* first()), and returns that character. If there are no more
|
||||
* characters to return, returns DONE. */
|
||||
virtual UChar previous(void);
|
||||
|
||||
/**
|
||||
* Returns the numeric index of the first character in this
|
||||
* iterator's iteration range. */
|
||||
virtual UTextOffset startIndex(void) const;
|
||||
|
||||
/**
|
||||
* Returns the numeric index of the character immediately BEYOND the
|
||||
* last character in this iterator's iteration range. */
|
||||
virtual UTextOffset endIndex(void) const;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying UnicodeString of the
|
||||
* character the iterator currently refers to (i.e., the character
|
||||
* returned by current()). */
|
||||
virtual UTextOffset getIndex(void) const;
|
||||
|
||||
/**
|
||||
* Copies the UnicodeString under iteration into the UnicodeString
|
||||
* referred to by "result". Even if this iterator iterates across
|
||||
* only a part of this string, the whole string is copied. @param
|
||||
* result Receives a copy of the text under iteration. */
|
||||
virtual void getText(UnicodeString& result);
|
||||
|
||||
/**
|
||||
* Return a class ID for this object (not really public) */
|
||||
virtual UClassID getDynamicClassID(void) const
|
||||
{ return getStaticClassID(); }
|
||||
|
||||
/**
|
||||
* Return a class ID for this class (not really public) */
|
||||
static UClassID getStaticClassID(void)
|
||||
{ return (UClassID)(&fgClassID); }
|
||||
|
||||
private:
|
||||
UCharCharacterIterator();
|
||||
|
||||
const UChar* text;
|
||||
UTextOffset pos;
|
||||
UTextOffset begin;
|
||||
UTextOffset end;
|
||||
|
||||
static char fgClassID;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#error Please include unicode/uchriter.h instead
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
|
||||
|
||||
|
||||
#define arrayRegionMatches(source, sourceStart, target, targetStart, len) (icu_memcmp(&source[sourceStart], &target[targetStart], len * sizeof(int16_t)) != 0)
|
||||
#define arrayRegionMatches(source, sourceStart, target, targetStart, len) (uprv_memcmp(&source[sourceStart], &target[targetStart], len * sizeof(int16_t)) != 0)
|
||||
|
||||
/* internal constants*/
|
||||
#define UCMP16_kMaxUnicode_int 65535
|
||||
|
@ -78,7 +78,7 @@ int32_t ucmp16_getkBlockCount()
|
|||
CompactShortArray* ucmp16_open(int16_t defaultValue)
|
||||
{
|
||||
int32_t i;
|
||||
CompactShortArray* this_obj = (CompactShortArray*) icu_malloc(sizeof(CompactShortArray));
|
||||
CompactShortArray* this_obj = (CompactShortArray*) uprv_malloc(sizeof(CompactShortArray));
|
||||
if (this_obj == NULL) return NULL;
|
||||
|
||||
this_obj->fStructSize = sizeof(CompactShortArray);
|
||||
|
@ -91,17 +91,17 @@ CompactShortArray* ucmp16_open(int16_t defaultValue)
|
|||
this_obj->fHashes = NULL;
|
||||
this_obj->fDefaultValue = defaultValue;
|
||||
|
||||
this_obj->fArray = (int16_t*)icu_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
|
||||
this_obj->fArray = (int16_t*)uprv_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
|
||||
if (this_obj->fArray == NULL)
|
||||
{
|
||||
this_obj->fBogus = TRUE;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
this_obj->fIndex = (uint16_t*)icu_malloc(UCMP16_kIndexCount * sizeof(uint16_t));
|
||||
this_obj->fIndex = (uint16_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(uint16_t));
|
||||
if (this_obj->fIndex == NULL)
|
||||
{
|
||||
icu_free(this_obj->fArray);
|
||||
uprv_free(this_obj->fArray);
|
||||
this_obj->fArray = NULL;
|
||||
|
||||
this_obj->fBogus = TRUE;
|
||||
|
@ -115,11 +115,11 @@ CompactShortArray* ucmp16_open(int16_t defaultValue)
|
|||
this_obj->fArray[i] = defaultValue;
|
||||
}
|
||||
|
||||
this_obj->fHashes =(int32_t*)icu_malloc(UCMP16_kIndexCount * sizeof(int32_t));
|
||||
this_obj->fHashes =(int32_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(int32_t));
|
||||
if (this_obj->fHashes == NULL)
|
||||
{
|
||||
icu_free(this_obj->fArray);
|
||||
icu_free(this_obj->fIndex);
|
||||
uprv_free(this_obj->fArray);
|
||||
uprv_free(this_obj->fIndex);
|
||||
this_obj->fBogus = TRUE;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -138,7 +138,7 @@ CompactShortArray* ucmp16_openAdopt(uint16_t *indexArray,
|
|||
int32_t count,
|
||||
int16_t defaultValue)
|
||||
{
|
||||
CompactShortArray* this_obj = (CompactShortArray*) icu_malloc(sizeof(CompactShortArray));
|
||||
CompactShortArray* this_obj = (CompactShortArray*) uprv_malloc(sizeof(CompactShortArray));
|
||||
if (this_obj == NULL) return NULL;
|
||||
this_obj->fHashes = NULL;
|
||||
this_obj->fCount = count;
|
||||
|
@ -179,7 +179,7 @@ CompactShortArray* ucmp16_openAlias(uint16_t *indexArray,
|
|||
int32_t count,
|
||||
int16_t defaultValue)
|
||||
{
|
||||
CompactShortArray* this_obj = (CompactShortArray*) icu_malloc(sizeof(CompactShortArray));
|
||||
CompactShortArray* this_obj = (CompactShortArray*) uprv_malloc(sizeof(CompactShortArray));
|
||||
if (this_obj == NULL) return NULL;
|
||||
this_obj->fHashes = NULL;
|
||||
this_obj->fCount = count;
|
||||
|
@ -202,16 +202,16 @@ void ucmp16_close(CompactShortArray* this_obj)
|
|||
if(this_obj != NULL) {
|
||||
if(!this_obj->fAlias) {
|
||||
if(this_obj->fArray != NULL) {
|
||||
icu_free(this_obj->fArray);
|
||||
uprv_free(this_obj->fArray);
|
||||
}
|
||||
if(this_obj->fIndex != NULL) {
|
||||
icu_free(this_obj->fIndex);
|
||||
uprv_free(this_obj->fIndex);
|
||||
}
|
||||
}
|
||||
if(this_obj->fHashes != NULL) {
|
||||
icu_free(this_obj->fHashes);
|
||||
uprv_free(this_obj->fHashes);
|
||||
}
|
||||
icu_free(this_obj);
|
||||
uprv_free(this_obj);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -219,13 +219,13 @@ CompactShortArray* setToBogus(CompactShortArray* this_obj)
|
|||
{
|
||||
if(this_obj != NULL) {
|
||||
if(!this_obj->fAlias) {
|
||||
icu_free(this_obj->fArray);
|
||||
uprv_free(this_obj->fArray);
|
||||
this_obj->fArray = NULL;
|
||||
|
||||
icu_free(this_obj->fIndex);
|
||||
uprv_free(this_obj->fIndex);
|
||||
this_obj->fIndex = NULL;
|
||||
}
|
||||
icu_free(this_obj->fHashes);
|
||||
uprv_free(this_obj->fHashes);
|
||||
this_obj->fHashes = NULL;
|
||||
|
||||
this_obj->fCount = 0;
|
||||
|
@ -242,7 +242,7 @@ void ucmp16_expand(CompactShortArray* this_obj)
|
|||
if (this_obj->fCompact)
|
||||
{
|
||||
int32_t i;
|
||||
int16_t *tempArray = (int16_t*)icu_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
|
||||
int16_t *tempArray = (int16_t*)uprv_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
|
||||
|
||||
if (tempArray == NULL)
|
||||
{
|
||||
|
@ -260,7 +260,7 @@ void ucmp16_expand(CompactShortArray* this_obj)
|
|||
this_obj->fIndex[i] = (uint16_t)(i<<this_obj->kBlockShift);
|
||||
}
|
||||
|
||||
icu_free(this_obj->fArray);
|
||||
uprv_free(this_obj->fArray);
|
||||
this_obj->fArray = tempArray;
|
||||
this_obj->fCompact = FALSE;
|
||||
}
|
||||
|
@ -357,7 +357,7 @@ void ucmp16_compact(CompactShortArray* this_obj)
|
|||
if (this_obj->fIndex[i] == 0xFFFF)
|
||||
{
|
||||
/* we didn't match, so copy & update*/
|
||||
icu_memcpy(&(this_obj->fArray[jBlockStart]),
|
||||
uprv_memcpy(&(this_obj->fArray[jBlockStart]),
|
||||
&(this_obj->fArray[iBlockStart]),
|
||||
(1 << this_obj->kBlockShift)*sizeof(int16_t));
|
||||
|
||||
|
@ -378,14 +378,14 @@ void ucmp16_compact(CompactShortArray* this_obj)
|
|||
/* we are done compacting, so now make the array shorter*/
|
||||
{
|
||||
int32_t newSize = limitCompacted * (1 << this_obj->kBlockShift);
|
||||
int16_t *result = (int16_t*) icu_malloc(sizeof(int16_t) * newSize);
|
||||
int16_t *result = (int16_t*) uprv_malloc(sizeof(int16_t) * newSize);
|
||||
|
||||
icu_memcpy(result, this_obj->fArray, newSize * sizeof(int16_t));
|
||||
uprv_memcpy(result, this_obj->fArray, newSize * sizeof(int16_t));
|
||||
|
||||
icu_free(this_obj->fArray);
|
||||
uprv_free(this_obj->fArray);
|
||||
this_obj->fArray = result;
|
||||
this_obj->fCount = newSize;
|
||||
icu_free(this_obj->fHashes);
|
||||
uprv_free(this_obj->fHashes);
|
||||
this_obj->fHashes = NULL;
|
||||
|
||||
this_obj->fCompact = TRUE;
|
||||
|
@ -453,7 +453,7 @@ U_CAPI CompactShortArray * U_EXPORT2 ucmp16_cloneFromData(const uint8_t **sourc
|
|||
}
|
||||
array = (CompactShortArray*)malloc(sizeof(*array));
|
||||
|
||||
icu_memcpy(array,*source, sizeof(*array));
|
||||
uprv_memcpy(array,*source, sizeof(*array));
|
||||
|
||||
*source += array->fStructSize;
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#define UCMP16_H
|
||||
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
|
||||
|
||||
|
@ -66,7 +66,7 @@
|
|||
* @see CompactIntArray
|
||||
* @see CompactCharArray
|
||||
* @see CompactStringArray
|
||||
* @version $Revision: 1.5 $ 8/25/98
|
||||
* @version $Revision: 1.6 $ 8/25/98
|
||||
* @author Helena Shih
|
||||
*/
|
||||
|
||||
|
|
|
@ -48,9 +48,9 @@ char c;
|
|||
if (this_obj->fCount != newCount)
|
||||
{
|
||||
this_obj->fCount = newCount;
|
||||
icu_free(this_obj->fArray);
|
||||
uprv_free(this_obj->fArray);
|
||||
this_obj->fArray = 0;
|
||||
this_obj->fArray = (int32_t*)icu_malloc(this_obj->fCount * sizeof(int32_t));
|
||||
this_obj->fArray = (int32_t*)uprv_malloc(this_obj->fCount * sizeof(int32_t));
|
||||
if (!this_obj->fArray) {
|
||||
this_obj->fBogus = TRUE;
|
||||
return;
|
||||
|
@ -60,16 +60,16 @@ char c;
|
|||
T_FileStream_read(is, &len, sizeof(len));
|
||||
if (len == 0)
|
||||
{
|
||||
icu_free(this_obj->fIndex);
|
||||
uprv_free(this_obj->fIndex);
|
||||
this_obj->fIndex = 0;
|
||||
}
|
||||
else if (len == UCMP32_kIndexCount)
|
||||
{
|
||||
if (this_obj->fIndex == 0)
|
||||
this_obj->fIndex =(uint16_t*)icu_malloc(UCMP32_kIndexCount * sizeof(uint16_t));
|
||||
this_obj->fIndex =(uint16_t*)uprv_malloc(UCMP32_kIndexCount * sizeof(uint16_t));
|
||||
if (!this_obj->fIndex) {
|
||||
this_obj->fBogus = TRUE;
|
||||
icu_free(this_obj->fArray);
|
||||
uprv_free(this_obj->fArray);
|
||||
this_obj->fArray = 0;
|
||||
return;
|
||||
}
|
||||
|
@ -123,7 +123,7 @@ CompactIntArray* ucmp32_open(int32_t defaultValue)
|
|||
uint16_t i;
|
||||
int32_t *p, *p_end;
|
||||
uint16_t *q, *q_end;
|
||||
CompactIntArray* this_obj = (CompactIntArray*) icu_malloc(sizeof(CompactIntArray));
|
||||
CompactIntArray* this_obj = (CompactIntArray*) uprv_malloc(sizeof(CompactIntArray));
|
||||
if (this_obj == NULL) return NULL;
|
||||
|
||||
this_obj->fCount = UCMP32_kUnicodeCount;
|
||||
|
@ -152,15 +152,15 @@ CompactIntArray* ucmp32_open(int32_t defaultValue)
|
|||
* to data position number 8, which has elements "bced". In the compressed
|
||||
* version, index# 2 points to data position 1, which also has "bced"
|
||||
*/
|
||||
this_obj->fArray = (int32_t*)icu_malloc(UCMP32_kUnicodeCount * sizeof(int32_t));
|
||||
this_obj->fArray = (int32_t*)uprv_malloc(UCMP32_kUnicodeCount * sizeof(int32_t));
|
||||
if (this_obj->fArray == NULL) {
|
||||
this_obj->fBogus = TRUE;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
this_obj->fIndex = (uint16_t*)icu_malloc(UCMP32_kIndexCount * sizeof(uint16_t));
|
||||
this_obj->fIndex = (uint16_t*)uprv_malloc(UCMP32_kIndexCount * sizeof(uint16_t));
|
||||
if (!this_obj->fIndex) {
|
||||
icu_free(this_obj->fArray);
|
||||
uprv_free(this_obj->fArray);
|
||||
this_obj->fArray = NULL;
|
||||
this_obj->fBogus = TRUE;
|
||||
return NULL;
|
||||
|
@ -182,7 +182,7 @@ CompactIntArray* ucmp32_open(int32_t defaultValue)
|
|||
|
||||
CompactIntArray* ucmp32_openAdopt(uint16_t *indexArray, int32_t *newValues, int32_t count)
|
||||
{
|
||||
CompactIntArray* this_obj = (CompactIntArray*) icu_malloc(sizeof(CompactIntArray));
|
||||
CompactIntArray* this_obj = (CompactIntArray*) uprv_malloc(sizeof(CompactIntArray));
|
||||
if (this_obj == NULL) return NULL;
|
||||
this_obj->fCount = count;
|
||||
this_obj->fBogus = FALSE;
|
||||
|
@ -198,12 +198,12 @@ void ucmp32_close( CompactIntArray* this_obj)
|
|||
{
|
||||
if(this_obj != NULL) {
|
||||
if(this_obj->fArray != NULL) {
|
||||
icu_free(this_obj->fArray);
|
||||
uprv_free(this_obj->fArray);
|
||||
}
|
||||
if(this_obj->fIndex != NULL) {
|
||||
icu_free(this_obj->fIndex);
|
||||
uprv_free(this_obj->fIndex);
|
||||
}
|
||||
icu_free(this_obj);
|
||||
uprv_free(this_obj);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -229,7 +229,7 @@ void ucmp32_expand(CompactIntArray* this_obj) {
|
|||
int32_t i;
|
||||
int32_t* tempArray;
|
||||
if (this_obj->fCompact) {
|
||||
tempArray = (int32_t*)icu_malloc(UCMP32_kUnicodeCount * sizeof(int32_t));
|
||||
tempArray = (int32_t*)uprv_malloc(UCMP32_kUnicodeCount * sizeof(int32_t));
|
||||
if (tempArray == NULL) {
|
||||
this_obj->fBogus = TRUE;
|
||||
return;
|
||||
|
@ -240,7 +240,7 @@ void ucmp32_expand(CompactIntArray* this_obj) {
|
|||
for (i = 0; i < UCMP32_kIndexCount; ++i) {
|
||||
this_obj->fIndex[i] = (uint16_t)(i<<UCMP32_kBlockShift);
|
||||
}
|
||||
icu_free(this_obj->fArray);
|
||||
uprv_free(this_obj->fArray);
|
||||
this_obj->fArray = tempArray;
|
||||
this_obj->fCompact = FALSE;
|
||||
}
|
||||
|
@ -345,7 +345,7 @@ void ucmp32_compact(CompactIntArray* this_obj, int32_t cycle) {
|
|||
cycle = UCMP32_kBlockCount;
|
||||
|
||||
/* make temp storage, larger than we need*/
|
||||
tempIndex =(UChar*)icu_malloc(UCMP32_kUnicodeCount * sizeof(uint32_t));
|
||||
tempIndex =(UChar*)uprv_malloc(UCMP32_kUnicodeCount * sizeof(uint32_t));
|
||||
if (tempIndex == NULL) {
|
||||
this_obj->fBogus = TRUE;
|
||||
return;
|
||||
|
@ -381,23 +381,23 @@ void ucmp32_compact(CompactIntArray* this_obj, int32_t cycle) {
|
|||
|
||||
|
||||
/* now allocate and copy the items into the array*/
|
||||
tempArray = (int32_t*)icu_malloc(tempIndexCount * sizeof(uint32_t));
|
||||
tempArray = (int32_t*)uprv_malloc(tempIndexCount * sizeof(uint32_t));
|
||||
if (tempArray == NULL) {
|
||||
this_obj->fBogus = TRUE;
|
||||
icu_free(tempIndex);
|
||||
uprv_free(tempIndex);
|
||||
return;
|
||||
}
|
||||
for (iIndex = 0; iIndex < tempIndexCount; ++iIndex) {
|
||||
tempArray[iIndex] = this_obj->fArray[tempIndex[iIndex]];
|
||||
}
|
||||
icu_free(this_obj->fArray);
|
||||
uprv_free(this_obj->fArray);
|
||||
this_obj->fArray = tempArray;
|
||||
this_obj->fCount = tempIndexCount;
|
||||
|
||||
|
||||
|
||||
/* free up temp storage*/
|
||||
icu_free(tempIndex);
|
||||
uprv_free(tempIndex);
|
||||
this_obj->fCompact = TRUE;
|
||||
|
||||
#ifdef _DEBUG
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
#define UCMP32_H
|
||||
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#include "filestrm.h"
|
||||
|
||||
|
@ -75,7 +75,7 @@
|
|||
* @see CompactIntArray
|
||||
* @see CompactCharArray
|
||||
* @see CompactStringArray
|
||||
* @version $Revision: 1.5 $ 8/25/98
|
||||
* @version $Revision: 1.6 $ 8/25/98
|
||||
* @author Helena Shih
|
||||
*/
|
||||
/*====================================*/
|
||||
|
|
|
@ -54,7 +54,7 @@ CompactByteArray* ucmp8_open(int8_t defaultValue)
|
|||
* to data position number 8, which has elements "bced". In the compressed
|
||||
* version, index# 2 points to data position 1, which also has "bced"
|
||||
*/
|
||||
CompactByteArray* this_obj = (CompactByteArray*) icu_malloc(sizeof(CompactByteArray));
|
||||
CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
|
||||
int32_t i;
|
||||
|
||||
if (this_obj == NULL) return NULL;
|
||||
|
@ -68,16 +68,16 @@ CompactByteArray* ucmp8_open(int8_t defaultValue)
|
|||
this_obj->fAlias = FALSE;
|
||||
|
||||
|
||||
this_obj->fArray = (int8_t*) icu_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
|
||||
this_obj->fArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
|
||||
if (!this_obj->fArray)
|
||||
{
|
||||
this_obj->fBogus = TRUE;
|
||||
return NULL;
|
||||
}
|
||||
this_obj->fIndex = (uint16_t*) icu_malloc(sizeof(uint16_t) * UCMP8_kIndexCount);
|
||||
this_obj->fIndex = (uint16_t*) uprv_malloc(sizeof(uint16_t) * UCMP8_kIndexCount);
|
||||
if (!this_obj->fIndex)
|
||||
{
|
||||
icu_free(this_obj->fArray);
|
||||
uprv_free(this_obj->fArray);
|
||||
this_obj->fArray = NULL;
|
||||
this_obj->fBogus = TRUE;
|
||||
return NULL;
|
||||
|
@ -98,7 +98,7 @@ CompactByteArray* ucmp8_openAdopt(uint16_t *indexArray,
|
|||
int8_t *newValues,
|
||||
int32_t count)
|
||||
{
|
||||
CompactByteArray* this_obj = (CompactByteArray*) icu_malloc(sizeof(CompactByteArray));
|
||||
CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
|
||||
if (!this_obj) return NULL;
|
||||
|
||||
this_obj->fArray = NULL;
|
||||
|
@ -118,7 +118,7 @@ CompactByteArray* ucmp8_openAlias(uint16_t *indexArray,
|
|||
int8_t *newValues,
|
||||
int32_t count)
|
||||
{
|
||||
CompactByteArray* this_obj = (CompactByteArray*) icu_malloc(sizeof(CompactByteArray));
|
||||
CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
|
||||
if (!this_obj) return NULL;
|
||||
|
||||
this_obj->fArray = NULL;
|
||||
|
@ -141,13 +141,13 @@ void ucmp8_close(CompactByteArray* this_obj)
|
|||
if(this_obj != NULL) {
|
||||
if(!this_obj->fAlias) {
|
||||
if(this_obj->fArray != NULL) {
|
||||
icu_free(this_obj->fArray);
|
||||
uprv_free(this_obj->fArray);
|
||||
}
|
||||
if(this_obj->fIndex != NULL) {
|
||||
icu_free(this_obj->fIndex);
|
||||
uprv_free(this_obj->fIndex);
|
||||
}
|
||||
}
|
||||
icu_free(this_obj);
|
||||
uprv_free(this_obj);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -173,7 +173,7 @@ void ucmp8_expand(CompactByteArray* this_obj)
|
|||
if (this_obj->fCompact)
|
||||
{
|
||||
int8_t* tempArray;
|
||||
tempArray = (int8_t*) icu_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
|
||||
tempArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
|
||||
if (!tempArray)
|
||||
{
|
||||
this_obj->fBogus = TRUE;
|
||||
|
@ -187,7 +187,7 @@ void ucmp8_expand(CompactByteArray* this_obj)
|
|||
{
|
||||
this_obj->fIndex[i] = (uint16_t)(i<< UCMP8_kBlockShift);
|
||||
}
|
||||
icu_free(this_obj->fArray);
|
||||
uprv_free(this_obj->fArray);
|
||||
this_obj->fArray = tempArray;
|
||||
this_obj->fCompact = FALSE;
|
||||
this_obj->fAlias = FALSE;
|
||||
|
@ -321,7 +321,7 @@ ucmp8_compact(CompactByteArray* this_obj,
|
|||
else if (cycle > (uint32_t)UCMP8_kBlockCount) cycle = UCMP8_kBlockCount;
|
||||
|
||||
/* make temp storage, larger than we need*/
|
||||
tempIndex = (UChar*) icu_malloc(sizeof(UChar)* UCMP8_kUnicodeCount);
|
||||
tempIndex = (UChar*) uprv_malloc(sizeof(UChar)* UCMP8_kUnicodeCount);
|
||||
if (!tempIndex)
|
||||
{
|
||||
this_obj->fBogus = TRUE;
|
||||
|
@ -364,24 +364,24 @@ ucmp8_compact(CompactByteArray* this_obj,
|
|||
} /* endfor (iBlock = 1.....)*/
|
||||
|
||||
/* now allocate and copy the items into the array*/
|
||||
tempArray = (int8_t*) icu_malloc(tempIndexCount * sizeof(int8_t));
|
||||
tempArray = (int8_t*) uprv_malloc(tempIndexCount * sizeof(int8_t));
|
||||
if (!tempArray)
|
||||
{
|
||||
this_obj->fBogus = TRUE;
|
||||
icu_free(tempIndex);
|
||||
uprv_free(tempIndex);
|
||||
return;
|
||||
}
|
||||
for (iIndex = 0; iIndex < tempIndexCount; ++iIndex)
|
||||
{
|
||||
tempArray[iIndex] = this_obj->fArray[tempIndex[iIndex]];
|
||||
}
|
||||
icu_free(this_obj->fArray);
|
||||
uprv_free(this_obj->fArray);
|
||||
this_obj->fArray = tempArray;
|
||||
this_obj->fCount = tempIndexCount;
|
||||
|
||||
|
||||
/* free up temp storage*/
|
||||
icu_free(tempIndex);
|
||||
uprv_free(tempIndex);
|
||||
this_obj->fCompact = TRUE;
|
||||
} /* endif (!this_obj->fCompact)*/
|
||||
}
|
||||
|
@ -407,7 +407,7 @@ U_CAPI CompactByteArray * U_EXPORT2 ucmp8_cloneFromData(const uint8_t **source,
|
|||
}
|
||||
array = (CompactByteArray*)malloc(sizeof(*array));
|
||||
|
||||
icu_memcpy(array,*source, sizeof(*array));
|
||||
uprv_memcpy(array,*source, sizeof(*array));
|
||||
|
||||
array->fAlias = TRUE;
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
#define UCMP8_H
|
||||
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/*====================================*/
|
||||
/* class CompactByteArray
|
||||
|
|
|
@ -21,20 +21,20 @@
|
|||
* 04/04/99 helena Fixed internal header inclusion.
|
||||
*/
|
||||
#include "umutex.h"
|
||||
#include "ures.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "uhash.h"
|
||||
#include "ucmp16.h"
|
||||
#include "ucmp8.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "unicode/ucnv_bld.h"
|
||||
#include "ucnv_io.h"
|
||||
#include "ucnv_err.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "ucnv_cnv.h"
|
||||
#include "ucnv_imp.h"
|
||||
#include "ucnv.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "ustring.h"
|
||||
#include "uloc.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/uloc.h"
|
||||
|
||||
#define CHUNK_SIZE 5*1024
|
||||
|
||||
|
@ -207,8 +207,8 @@ UConverter* ucnv_openCCSID (int32_t codepage,
|
|||
return NULL;
|
||||
|
||||
copyPlatformString (myName, platform);
|
||||
icu_strcat (myName, "-");
|
||||
T_CString_integerToString (myName + icu_strlen (myName), codepage, 10);
|
||||
uprv_strcat (myName, "-");
|
||||
T_CString_integerToString (myName + uprv_strlen (myName), codepage, 10);
|
||||
|
||||
|
||||
return createConverter (myName, err);
|
||||
|
@ -225,13 +225,13 @@ void ucnv_close (UConverter * converter)
|
|||
(converter->mode == UCNV_SO))
|
||||
{
|
||||
ucnv_close (((UConverterDataISO2022 *) (converter->extraInfo))->currentConverter);
|
||||
icu_free (converter->extraInfo);
|
||||
uprv_free (converter->extraInfo);
|
||||
}
|
||||
|
||||
umtx_lock (NULL);
|
||||
converter->sharedData->referenceCounter--;
|
||||
umtx_unlock (NULL);
|
||||
icu_free (converter);
|
||||
uprv_free (converter);
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -306,7 +306,7 @@ void ucnv_getSubstChars (const UConverter * converter,
|
|||
return;
|
||||
}
|
||||
|
||||
icu_memcpy (mySubChar, converter->subChar, converter->subCharLen); /*fills in the subchars */
|
||||
uprv_memcpy (mySubChar, converter->subChar, converter->subCharLen); /*fills in the subchars */
|
||||
*len = converter->subCharLen; /*store # of bytes copied to buffer */
|
||||
|
||||
return;
|
||||
|
@ -330,7 +330,7 @@ void ucnv_setSubstChars (UConverter * converter,
|
|||
return;
|
||||
}
|
||||
|
||||
icu_memcpy (converter->subChar, mySubChar, len); /*copies the subchars */
|
||||
uprv_memcpy (converter->subChar, mySubChar, len); /*copies the subchars */
|
||||
converter->subCharLen = len; /*sets the new len */
|
||||
|
||||
return;
|
||||
|
@ -373,7 +373,7 @@ int32_t ucnv_getDisplayName (const UConverter * converter,
|
|||
*sets stringToWriteLength (which accounts for a NULL terminator)
|
||||
*and stringToWrite
|
||||
*/
|
||||
stringToWriteLength = icu_strlen (converter->sharedData->name) + 1;
|
||||
stringToWriteLength = uprv_strlen (converter->sharedData->name) + 1;
|
||||
stringToWrite = u_uastrcpy (stringToWriteBuffer, converter->sharedData->name);
|
||||
|
||||
/*Hides the fallback to the internal name from the user */
|
||||
|
@ -931,7 +931,7 @@ UChar ucnv_getNextUChar (UConverter * converter,
|
|||
*Note that in the call itself we decrement
|
||||
*UCharErrorBufferLength
|
||||
*/
|
||||
icu_memmove (converter->UCharErrorBuffer,
|
||||
uprv_memmove (converter->UCharErrorBuffer,
|
||||
converter->UCharErrorBuffer + 1,
|
||||
--(converter->UCharErrorBufferLength) * sizeof (UChar));
|
||||
return myUChar;
|
||||
|
@ -1151,6 +1151,6 @@ void ucnv_getStarters(const UConverter* converter,
|
|||
}
|
||||
|
||||
/*fill's in the starters boolean array*/
|
||||
icu_memcpy(starters, converter->sharedData->table->mbcs.starters, 256*sizeof(bool_t));
|
||||
uprv_memcpy(starters, converter->sharedData->table->mbcs.starters, 256*sizeof(bool_t));
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -1,605 +1 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* ucnv.h:
|
||||
* External APIs for the ICU's codeset conversion library
|
||||
* Bertrand A. Damiba
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 04/04/99 helena Fixed internal header inclusion.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @name Character Conversion C API
|
||||
*
|
||||
* Character Conversion C API documentation is still under construction.
|
||||
* Please check for updates soon.
|
||||
*/
|
||||
|
||||
#ifndef UCNV_H
|
||||
#define UCNV_H
|
||||
|
||||
#include "utypes.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnv_err.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
typedef void (*UConverterToUCallback) (UConverter *,
|
||||
UChar **,
|
||||
const UChar *,
|
||||
const char **,
|
||||
const char *,
|
||||
int32_t* offsets,
|
||||
bool_t,
|
||||
UErrorCode *);
|
||||
|
||||
typedef void (*UConverterFromUCallback) (UConverter *,
|
||||
char **,
|
||||
const char *,
|
||||
const UChar **,
|
||||
const UChar *,
|
||||
int32_t* offsets,
|
||||
bool_t,
|
||||
UErrorCode *);
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
|
||||
/**
|
||||
* Creates a UConverter object with the names specified as a C string.
|
||||
* The actual name will be resolved with the alias file.
|
||||
* if <TT>NULL</TT> is passed for the converter name, it will create one with the
|
||||
* getDefaultName return value.
|
||||
* @param converterName : name of the uconv table
|
||||
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND</TT>
|
||||
* @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
|
||||
* @see ucnv_openU
|
||||
* @see ucnv_openCCSID
|
||||
* @see ucnv_close
|
||||
*/
|
||||
|
||||
U_CAPI
|
||||
UConverter* U_EXPORT2 ucnv_open (const char *converterName,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/**
|
||||
* Creates a Unicode converter with the names specified as unicode string. The name should be limited to
|
||||
* the ASCII-7 alphanumerics range. The actual name will be resolved with the alias file.
|
||||
* if <TT>NULL</TT> is passed for the converter name, it will create one with the
|
||||
* getDefaultName return value.
|
||||
* @param converterName : name of the uconv table in a zero terminated Unicode string
|
||||
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND</TT>
|
||||
* @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
|
||||
* @see ucnv_open
|
||||
* @see ucnv_openCCSID
|
||||
* @see ucnv_close
|
||||
*/
|
||||
U_CAPI UConverter* U_EXPORT2 ucnv_openU (const UChar * name,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Creates a UConverter object using a CCSID number.
|
||||
*
|
||||
* @param codepage : codepage # of the uconv table
|
||||
* @param platform : codepage's platform (now only <TT>IBM</TT> supported)
|
||||
* @param err error status <TT>U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND</TT>
|
||||
* @return the created Unicode converter object, or <TT>NULL</TT> if and error occured
|
||||
* @see ucnv_open
|
||||
* @see ucnv_openU
|
||||
* @see ucnv_close
|
||||
*/
|
||||
|
||||
U_CAPI UConverter* U_EXPORT2 ucnv_openCCSID (int32_t codepage,
|
||||
UConverterPlatform platform,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/**
|
||||
* Deletes the unicode converter.
|
||||
*
|
||||
* @param converter the converter object to be deleted
|
||||
* @see ucnv_open
|
||||
* @see ucnv_openU
|
||||
* @see ucnv_openCCSID
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 ucnv_close (UConverter * converter);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Fills in the output parameter, subChars, with the substitution characters
|
||||
* as multiple bytes.
|
||||
*
|
||||
* @param converter: the Unicode converter
|
||||
* @param subChars: the subsitution characters
|
||||
* @param len: on input the capacity of subChars, on output the number of bytes copied to it
|
||||
* @param err: the outgoing error status code.
|
||||
* If the substitution character array is too small, an
|
||||
* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
|
||||
* @see ucnv_setSubstChars
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_getSubstChars (const UConverter * converter,
|
||||
char *subChars,
|
||||
int8_t * len,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* Sets the substitution chars when converting from unicode to a codepage. The
|
||||
* substitution is specified as a string of 1-4 bytes, and may contain <TT>NULL</TT> byte.
|
||||
* The fill-in parameter err will get the error status on return.
|
||||
* @param converter the Unicode converter
|
||||
* @param subChars the substitution character byte sequence we want set
|
||||
* @param len the number of bytes in subChars
|
||||
* @param err the error status code. <TT>U_INDEX_OUTOFBOUNDS_ERROR </TT> if
|
||||
* len is bigger than the maximum number of bytes allowed in subchars
|
||||
* @see ucnv_getSubstChars
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_setSubstChars (UConverter * converter,
|
||||
const char *subChars,
|
||||
int8_t len,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Fills in the output parameter, errBytes, with the error characters from the
|
||||
* last failing conversion.
|
||||
*
|
||||
* @param converter: the Unicode converter
|
||||
* @param errBytes: the bytes in error
|
||||
* @param len: on input the capacity of errBytes, on output the number of bytes copied to it
|
||||
* @param err: the outgoing error status code.
|
||||
* If the substitution character array is too small, an
|
||||
* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_getInvalidChars (const UConverter * converter,
|
||||
char *errBytes,
|
||||
int8_t * len,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/**
|
||||
* Fills in the output parameter, errChars, with the error characters from the
|
||||
* last failing conversion.
|
||||
*
|
||||
* @param converter: the Unicode converter
|
||||
* @param errUChars: the bytes in error
|
||||
* @param len: on input the capacity of errUChars, on output the number of UChars copied to it
|
||||
* @param err: the outgoing error status code.
|
||||
* If the substitution character array is too small, an
|
||||
* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_getInvalidUChars (const UConverter * converter,
|
||||
char *errUChars,
|
||||
int8_t * len,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* Resets the state of stateful conversion to the default state. This is used
|
||||
* in the case of error to restart a conversion from a known default state.
|
||||
* it will also empty the internal output buffers.
|
||||
* @param converter the Unicode converter
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_reset (UConverter * converter);
|
||||
|
||||
/**
|
||||
* Returns the maximum length of bytes used by a character. This varies between 1 and 4
|
||||
* @param converter the Unicode converter
|
||||
* @return the maximum number of bytes allowed by this particular converter
|
||||
* @see ucnv_getMinCharSize
|
||||
*/
|
||||
U_CAPI int8_t U_EXPORT2
|
||||
ucnv_getMaxCharSize (const UConverter * converter);
|
||||
|
||||
|
||||
/**
|
||||
* Returns the minimum byte length for characters in this codepage. This is either
|
||||
* 1 or 2 for all supported codepages.
|
||||
* @param converter the Unicode converter
|
||||
* @return the minimum number of bytes allowed by this particular converter
|
||||
* @see ucnv_getMaxCharSize
|
||||
*/
|
||||
U_CAPI int8_t U_EXPORT2
|
||||
ucnv_getMinCharSize (const UConverter * converter);
|
||||
|
||||
|
||||
/**
|
||||
* Returns the display name of the converter passed in based on the Locale passed in,
|
||||
* in the case the locale contains no display name, the internal ASCII name will be
|
||||
* filled in.
|
||||
*
|
||||
* @param converter the Unicode converter.
|
||||
* @param displayLocale is the specific Locale we want to localised for
|
||||
* @param displayName user provided buffer to be filled in
|
||||
* @param displayNameCapacty size of displayName Buffer
|
||||
* @param err: outgoing error code.
|
||||
* @return displayNameLength number of UChar needed in displayName
|
||||
* @see ucnv_getName
|
||||
*/
|
||||
U_CAPI
|
||||
int32_t U_EXPORT2 ucnv_getDisplayName (const UConverter * converter,
|
||||
const char *displayLocale,
|
||||
UChar * displayName,
|
||||
int32_t displayNameCapacity,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* Gets the name of the converter (zero-terminated).
|
||||
* the name will be the internal name of the converter, the lifetime of the returned
|
||||
* string will be that of the converter passed to this function.
|
||||
* @param converter the Unicode converter
|
||||
* @param err UErrorCode status
|
||||
* @return the internal name of the converter
|
||||
* @see ucnv_getDisplayName
|
||||
*/
|
||||
U_CAPI
|
||||
const char * U_EXPORT2 ucnv_getName (const UConverter * converter, UErrorCode * err);
|
||||
|
||||
|
||||
/**
|
||||
* Gets a codepage number associated with the converter. This is not guaranteed
|
||||
* to be the one used to create the converter. Some converters do not represent
|
||||
* IBM registered codepages and return zero for the codepage number.
|
||||
* The error code fill-in parameter indicates if the codepage number is available.
|
||||
* @param converter the Unicode converter
|
||||
* @param err the error status code.
|
||||
* the converter is <TT>NULL</TT> or if converter's data table is <TT>NULL</TT>.
|
||||
* @return If any error occurrs, -1 will be returned otherwise, the codepage number
|
||||
* will be returned
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucnv_getCCSID (const UConverter * converter,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* Gets a codepage platform associated with the converter. Currently, only <TT>IBM</TT> is supported
|
||||
* The error code fill-in parameter indicates if the codepage number is available.
|
||||
* @param converter the Unicode converter
|
||||
* @param err the error status code.
|
||||
* the converter is <TT>NULL</TT> or if converter's data table is <TT>NULL</TT>.
|
||||
* @return The codepage platform
|
||||
*/
|
||||
U_CAPI UConverterPlatform U_EXPORT2
|
||||
ucnv_getPlatform (const UConverter * converter,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
*Gets the type of conversion associated with the converter
|
||||
* e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022, EBCDIC_STATEFUL, LATIN_1
|
||||
* @param converter: a valid, opened converter
|
||||
* @return the type of the converter
|
||||
*/
|
||||
U_CAPI UConverterType U_EXPORT2
|
||||
ucnv_getType (const UConverter * converter);
|
||||
|
||||
/**
|
||||
*Gets the "starter" bytes for the converters of type MBCS
|
||||
*will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in
|
||||
*is not MBCS.
|
||||
*fills in an array of boolean, with the value of the byte as offset to the array.
|
||||
*At return, if TRUE is found in at offset 0x20, it means that the byte 0x20 is a starter byte
|
||||
*in this converter.
|
||||
* @param converter: a valid, opened converter of type MBCS
|
||||
* @param starters: an array of size 256 to be filled in
|
||||
* @param err: an array of size 256 to be filled in
|
||||
* @see ucnv_getType
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 ucnv_getStarters(const UConverter* converter,
|
||||
bool_t starters[256],
|
||||
UErrorCode* err);
|
||||
|
||||
|
||||
/**
|
||||
* Gets the current calback function used by the converter when illegal or invalid sequence found.
|
||||
*
|
||||
* @param converter the unicode converter
|
||||
* @return a pointer to the callback function
|
||||
* @see ucnv_setToUCallBack
|
||||
*/
|
||||
U_CAPI UConverterToUCallback U_EXPORT2
|
||||
ucnv_getToUCallBack (const UConverter * converter);
|
||||
|
||||
/**
|
||||
* Gets the current callback function used by the converter when illegal or invalid sequence found.
|
||||
*
|
||||
* @param converter the unicode converter
|
||||
* @return a pointer to the callback function
|
||||
* @see ucnv_setFromUCallBack
|
||||
*/
|
||||
U_CAPI UConverterFromUCallback U_EXPORT2
|
||||
ucnv_getFromUCallBack (const UConverter * converter);
|
||||
|
||||
/**
|
||||
* Gets the current callback function used by the converter when illegal or invalid sequence found.
|
||||
*
|
||||
* @param converter the unicode converter
|
||||
* @param action the callback function we want to set.
|
||||
* @param err The error code status
|
||||
* @return the previously assigned callback function pointer
|
||||
* @see ucnv_getToUCallBack
|
||||
*/
|
||||
U_CAPI UConverterToUCallback U_EXPORT2
|
||||
ucnv_setToUCallBack (UConverter * converter,
|
||||
UConverterToUCallback action,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* Gets the current callback function used by the converter when illegal or invalid sequence found.
|
||||
*
|
||||
* @param converter the unicode converter
|
||||
* @param action the callback function we want to set.
|
||||
* @param err The error code status
|
||||
* @return the previously assigned callback function pointer
|
||||
* @see ucnv_getFromUCallBack
|
||||
*/
|
||||
U_CAPI UConverterFromUCallback U_EXPORT2
|
||||
ucnv_setFromUCallBack (UConverter * converter,
|
||||
UConverterFromUCallback action,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/**
|
||||
* Transcodes an array of unicode characters to an array of codepage characters.
|
||||
* The source pointer is an I/O parameter, it starts out pointing where the function is
|
||||
* to begin transcoding, and ends up pointing after the first sequence of the bytes
|
||||
* that it encounters that are semantically invalid.
|
||||
* if ucnv_setToUCallBack is called with an action other than <TT>STOP</TT>
|
||||
* before a call is made to this API, <TT>consumed</TT> and <TT>source</TT> should point to the same place
|
||||
* (unless <TT>target</TT> ends with an imcomplete sequence of bytes and <TT>flush</TT> is <TT>FALSE</TT>).
|
||||
* the <TT>target</TT> buffer buffer needs to be a least the size of the maximum # of bytes per characters
|
||||
* allowed by the target codepage.
|
||||
* @param converter the Unicode converter
|
||||
* @param converter the Unicode converter
|
||||
* @param target : I/O parameter. Input : Points to the beginning of the buffer to copy
|
||||
* codepage characters to. Output : points to after the last codepage character copied
|
||||
* to <TT>target</TT>.
|
||||
* @param targetLimit the pointer to the end of the <TT>target</TT> array
|
||||
* @param source the source Unicode character array
|
||||
* @param sourceLimit the pointer to the end of the source array
|
||||
* @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
|
||||
* of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
|
||||
* e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
|
||||
* For output data carried across calls -1 will be placed for offsets.
|
||||
* @param flush <TT>TRUE</TT> if the buffer is the last buffer of the conversion interation
|
||||
* and the conversion will finish with this call, FALSE otherwise.
|
||||
* @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be returned if the
|
||||
* converter is <TT>NULL</TT>.
|
||||
* @see ucnv_fromUChars
|
||||
* @see ucnv_convert
|
||||
* @see ucnv_getMinCharSize
|
||||
* @see ucnv_setToUCallBack
|
||||
*/
|
||||
|
||||
U_CAPI
|
||||
void U_EXPORT2 ucnv_fromUnicode (UConverter * converter,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/**
|
||||
* Converts an array of codepage characters into an array of unicode characters.
|
||||
* The source pointer is an I/O parameter, it starts out pointing at the place
|
||||
* to begin translating, and ends up pointing after the first sequence of the bytes
|
||||
* that it encounters that are semantically invalid.
|
||||
* if ucnv_setFromUCallBack is called with an action other than STOP
|
||||
* before a call is made to this API, consumed and source should point to the same place
|
||||
* (unless target ends with an imcomplete sequence of bytes and flush is FALSE).
|
||||
* @param converter the Unicode converter
|
||||
* @param target : I/O parameter. Input : Points to the beginning of the buffer to copy
|
||||
* Unicode characters to. Output : points to after the last UChar copied to target.
|
||||
* @param targetLimit the pointer to the end of the target array
|
||||
* @param source the source codepage character array
|
||||
* @param sourceLimit the pointer to the end of the source array
|
||||
* @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
|
||||
* of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
|
||||
* e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
|
||||
* For output data carried across calls -1 will be placed for offsets.
|
||||
* @param flush TRUE if the buffer is the last buffer and the conversion will finish
|
||||
* in this call, FALSE otherwise.
|
||||
* @param err the error code status <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be returned if the
|
||||
* converter is <TT>NULL</TT>, or if <TT>targetLimit</TT> and <TT>sourceLimit</TT> are misaligned.
|
||||
* @see ucnv_toUChars
|
||||
* @see ucnv_getNextUChar
|
||||
* @see ucnv_convert
|
||||
* @see ucnv_setFromUCallBack
|
||||
*/
|
||||
|
||||
U_CAPI
|
||||
void U_EXPORT2 ucnv_toUnicode (UConverter * converter,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/**
|
||||
* Transcodes the source Unicode string to the target string in a codepage encoding
|
||||
* with the specified Unicode converter. For example, if a Unicode to/from JIS
|
||||
* converter is specified, the source string in Unicode will be transcoded to JIS
|
||||
* encoding. The result will be stored in JIS encoding.
|
||||
* if any problems during conversion are encountered it will SUBSTITUTE with the default (initial)
|
||||
* substitute characters.
|
||||
* This function is a more convenient but less efficient version of \Ref{ucnv_fromUnicode}.
|
||||
* @param converter the Unicode converter
|
||||
* @param source the <TT>source</TT> Unicode string (zero Terminated)
|
||||
* @param target the <TT>target</TT> string in codepage encoding (<STRONG>not zero-terminated</STRONG> because some
|
||||
* codepage do not use '\0' as a string terminator
|
||||
* @param targetCapacity Input the number of bytes available in the <TT>target</TT> buffer
|
||||
* @param err the error status code.
|
||||
* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned if the
|
||||
* the # of bytes provided are not enough for transcoding.
|
||||
* <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> is returned if the converter is <TT>NULL</TT> or the source or target string is empty.
|
||||
* <TT>U_BUFFER_OVERFLOW_ERROR</TT> when <TT>targetSize</TT> turns out to be bigger than <TT>targetCapacity</TT>
|
||||
* @return number of bytes needed in target, regardless of <TT>targetCapacity</TT>
|
||||
* @see ucnv_fromUnicode
|
||||
* @see ucnv_convert
|
||||
*/
|
||||
U_CAPI
|
||||
int32_t U_EXPORT2 ucnv_fromUChars (const UConverter * converter,
|
||||
char *target,
|
||||
int32_t targetCapacity,
|
||||
const UChar * source,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Transcode the source string in codepage encoding to the target string in
|
||||
* Unicode encoding. For example, if a Unicode to/from JIS
|
||||
* converter is specified, the source string in JIS encoding will be transcoded
|
||||
* to Unicode and placed into a provided target buffer.
|
||||
* if any problems during conversion are encountered it will SUBSTITUTE with the Unicode REPLACEMENT char
|
||||
* We recomment, the size of the target buffer needs to be at least as long as the maximum # of bytes per char
|
||||
* in this character set.
|
||||
* A zero-terminator will be placed at the end of the target buffer
|
||||
* This function is a more convenient but less efficient version of \Ref{ucnv_toUnicode}.
|
||||
* @param converter the Unicode converter
|
||||
* @param source the source string in codepage encoding
|
||||
* @param target the target string in Unicode encoding
|
||||
* @param targetCapacity capacity of the target buffer
|
||||
* @param sourceSize : Number of bytes in <TT>source</TT> to be transcoded
|
||||
* @param err the error status code
|
||||
* <TT>U_MEMORY_ALLOCATION_ERROR</TT> will be returned if the
|
||||
* the internal process buffer cannot be allocated for transcoding.
|
||||
* <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> is returned if the converter is <TT>NULL</TT> or
|
||||
* if the source or target string is empty.
|
||||
* <TT>U_BUFFER_OVERFLOW_ERROR</TT> when the input buffer is prematurely exhausted and targetSize non-<TT>NULL</TT>.
|
||||
* @return the number of UChar needed in target (including the zero terminator)
|
||||
* @see ucnv_getNextUChar
|
||||
* @see ucnv_toUnicode
|
||||
* @see ucnv_convert
|
||||
*/
|
||||
U_CAPI
|
||||
int32_t U_EXPORT2 ucnv_toUChars (const UConverter * converter,
|
||||
UChar * target,
|
||||
int32_t targetCapacity,
|
||||
const char *source,
|
||||
int32_t sourceSize,
|
||||
UErrorCode * err);
|
||||
|
||||
/********************************
|
||||
* Will convert a codepage buffer one character at a time.
|
||||
* This function was written to be efficient when transcoding small amounts of data at a time.
|
||||
* In that case it will be more efficient than \Ref{ucnv_toUnicode}.
|
||||
* When converting large buffers use \Ref{ucnv_toUnicode}.
|
||||
*@param converter an open UConverter
|
||||
*@param source the address of a pointer to the codepage buffer, will be updated to point after
|
||||
*the bytes consumed in the conversion call.
|
||||
*@param points to the end of the input buffer
|
||||
*@param err fills in error status (see ucnv_toUnicode)
|
||||
*@return a UChar resulting from the partial conversion of source
|
||||
*@see ucnv_toUnicode
|
||||
*@see ucnv_toUChars
|
||||
*@see ucnv_convert
|
||||
*/
|
||||
U_CAPI
|
||||
UChar U_EXPORT2 ucnv_getNextUChar (UConverter * converter,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/**************************
|
||||
* Will convert a sequence of bytes from one codepage to another.
|
||||
* This is <STRONG>NOT AN EFFICIENT</STRONG> way to transcode.
|
||||
* use \Ref{ucnv_toUnicode} and \Ref{ucnv_fromUnicode} for efficiency
|
||||
* @param toConverterName: The name of the converter that will be used to encode the output buffer
|
||||
* @param fromConverterName: The name of the converter that will be used to decode the input buffer
|
||||
* @param target: Pointer to the output buffer to write to
|
||||
* @param targetCapacity: on input contains the capacity of target
|
||||
* @param source: Pointer to the input buffer
|
||||
* @param sourceLength: on input contains the capacity of source
|
||||
* @param err: fills in an error status
|
||||
* @return will be filled in with the number of bytes needed in target
|
||||
* @see ucnv_fromUnicode
|
||||
* @see ucnv_toUnicode
|
||||
* @see ucnv_fromUChars
|
||||
* @see ucnv_toUChars
|
||||
* @see ucnv_getNextUChar
|
||||
*/
|
||||
U_CAPI
|
||||
int32_t U_EXPORT2 ucnv_convert (const char *toConverterName,
|
||||
const char *fromConverterName,
|
||||
char *target,
|
||||
int32_t targetCapacity,
|
||||
const char *source,
|
||||
int32_t sourceLength,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* SYSTEM API
|
||||
* Iterates through every cached converter and frees all the unused ones.
|
||||
*
|
||||
* @return the number of cached converters successfully deleted
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2 ucnv_flushCache (void);
|
||||
|
||||
|
||||
/**
|
||||
* provides a string containing the internal name (based on the alias file) of the converter.
|
||||
* given an index.
|
||||
* @param index the number of converters available on the system (<TT>[0..ucnv_countAvaiable()]</TT>)
|
||||
* @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds.
|
||||
* @see ucnv_countAvailable
|
||||
*/
|
||||
U_CAPI
|
||||
const char * U_EXPORT2 ucnv_getAvailableName (int32_t index);
|
||||
|
||||
/**
|
||||
* returns the number of available converters.
|
||||
*
|
||||
* @return the number of available converters
|
||||
* @see ucnv_getAvailableName
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2 ucnv_countAvailable (void);
|
||||
|
||||
/**
|
||||
* returns the current default converter name.
|
||||
*
|
||||
* @return returns the current default converter name;
|
||||
* if a default converter name cannot be determined,
|
||||
* then <code>NULL</code> is returned
|
||||
* @see ucnv_setDefaultName
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2 ucnv_getDefaultName (void);
|
||||
|
||||
/**
|
||||
* sets the current default converter name.
|
||||
* The lifetime of the return ptr is that of the library
|
||||
* @param name: the converter name you want as default (has to appear in alias file)
|
||||
* @see ucnv_getDefaultName
|
||||
*
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 ucnv_setDefaultName (const char *name);
|
||||
|
||||
|
||||
#endif
|
||||
/*_UCNV*/
|
||||
#error Please include unicode/ucnv.h instead
|
||||
|
|
|
@ -18,11 +18,11 @@
|
|||
#include "uhash.h"
|
||||
#include "ucmp16.h"
|
||||
#include "ucmp8.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnv_err.h"
|
||||
#include "unicode/ucnv_bld.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "ucnv_imp.h"
|
||||
#include "udata.h"
|
||||
#include "ucnv.h"
|
||||
#include "unicode/udata.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "umutex.h"
|
||||
#include "cstring.h"
|
||||
#include "cmemory.h"
|
||||
|
@ -100,7 +100,7 @@ char *
|
|||
{
|
||||
int32_t i = 0;
|
||||
|
||||
while (name[i] = icu_toupper (name[i]))
|
||||
while (name[i] = uprv_toupper (name[i]))
|
||||
i++;
|
||||
|
||||
return name;
|
||||
|
@ -153,7 +153,7 @@ char *
|
|||
int32_t uhash_hashIString(const void* name)
|
||||
{
|
||||
char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
|
||||
icu_strcpy(myName, (char*)name);
|
||||
uprv_strcpy(myName, (char*)name);
|
||||
strtoupper(myName);
|
||||
|
||||
return uhash_hashString(myName);
|
||||
|
@ -181,7 +181,7 @@ CompactShortArray* createCompactShortArrayFromFile (FileStream * infile, UError
|
|||
*err = U_INVALID_TABLE_FILE;
|
||||
return NULL;
|
||||
}
|
||||
myShortArray = (int16_t *) icu_malloc (myValuesCount * sizeof (int16_t));
|
||||
myShortArray = (int16_t *) uprv_malloc (myValuesCount * sizeof (int16_t));
|
||||
if (myShortArray == NULL)
|
||||
{
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
|
@ -192,15 +192,15 @@ CompactShortArray* createCompactShortArrayFromFile (FileStream * infile, UError
|
|||
|
||||
if (myIndexCount < 0)
|
||||
{
|
||||
icu_free (myShortArray);
|
||||
uprv_free (myShortArray);
|
||||
*err = U_INVALID_TABLE_FILE;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
myIndexArray = (uint16_t *) icu_malloc (myIndexCount * sizeof (uint16_t));
|
||||
myIndexArray = (uint16_t *) uprv_malloc (myIndexCount * sizeof (uint16_t));
|
||||
if (myIndexArray == NULL)
|
||||
{
|
||||
icu_free (myShortArray);
|
||||
uprv_free (myShortArray);
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -235,7 +235,7 @@ CompactByteArray* createCompactByteArrayFromFile (FileStream * infile,
|
|||
*err = U_INVALID_TABLE_FILE;
|
||||
return NULL;
|
||||
}
|
||||
myByteArray = (int8_t *) icu_malloc (myValuesCount * sizeof (int8_t));
|
||||
myByteArray = (int8_t *) uprv_malloc (myValuesCount * sizeof (int8_t));
|
||||
if (myByteArray == NULL)
|
||||
{
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
|
@ -246,14 +246,14 @@ CompactByteArray* createCompactByteArrayFromFile (FileStream * infile,
|
|||
|
||||
if (myIndexCount < 0)
|
||||
{
|
||||
icu_free (myByteArray);
|
||||
uprv_free (myByteArray);
|
||||
*err = U_INVALID_TABLE_FILE;
|
||||
return NULL;
|
||||
}
|
||||
myIndexArray = (uint16_t *) icu_malloc (myIndexCount * sizeof (uint16_t));
|
||||
myIndexArray = (uint16_t *) uprv_malloc (myIndexCount * sizeof (uint16_t));
|
||||
if (myIndexArray == NULL)
|
||||
{
|
||||
icu_free (myByteArray);
|
||||
uprv_free (myByteArray);
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -308,7 +308,7 @@ UConverter* createConverterFromFile (const char *fileName, UErrorCode * err)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
myConverter = (UConverter *) icu_malloc (sizeof (UConverter));
|
||||
myConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
|
||||
if (myConverter == NULL)
|
||||
{
|
||||
udata_close(data);
|
||||
|
@ -322,7 +322,7 @@ UConverter* createConverterFromFile (const char *fileName, UErrorCode * err)
|
|||
if (myConverter->sharedData == NULL)
|
||||
{
|
||||
udata_close(data);
|
||||
icu_free (myConverter);
|
||||
uprv_free (myConverter);
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -336,7 +336,7 @@ UConverter* createConverterFromFile (const char *fileName, UErrorCode * err)
|
|||
if(U_FAILURE(*err))
|
||||
{
|
||||
udata_close(data);
|
||||
icu_free (myConverter);
|
||||
uprv_free (myConverter);
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -357,12 +357,12 @@ void
|
|||
{
|
||||
case UCNV_IBM:
|
||||
{
|
||||
icu_strcpy (platformString, "ibm");
|
||||
uprv_strcpy (platformString, "ibm");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
icu_strcpy (platformString, "");
|
||||
uprv_strcpy (platformString, "");
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
@ -375,29 +375,29 @@ void
|
|||
UConverterType
|
||||
getAlgorithmicTypeFromName (const char *realName)
|
||||
{
|
||||
if (icu_strcmp (realName, "UTF8") == 0)
|
||||
if (uprv_strcmp (realName, "UTF8") == 0)
|
||||
return UCNV_UTF8;
|
||||
else if (icu_strcmp (realName, "UTF16_BigEndian") == 0)
|
||||
else if (uprv_strcmp (realName, "UTF16_BigEndian") == 0)
|
||||
return UCNV_UTF16_BigEndian;
|
||||
else if (icu_strcmp (realName, "UTF16_LittleEndian") == 0)
|
||||
else if (uprv_strcmp (realName, "UTF16_LittleEndian") == 0)
|
||||
return UCNV_UTF16_LittleEndian;
|
||||
else if (icu_strcmp (realName, "LATIN_1") == 0)
|
||||
else if (uprv_strcmp (realName, "LATIN_1") == 0)
|
||||
return UCNV_LATIN_1;
|
||||
else if (icu_strcmp (realName, "JIS") == 0)
|
||||
else if (uprv_strcmp (realName, "JIS") == 0)
|
||||
return UCNV_JIS;
|
||||
else if (icu_strcmp (realName, "EUC") == 0)
|
||||
else if (uprv_strcmp (realName, "EUC") == 0)
|
||||
return UCNV_EUC;
|
||||
else if (icu_strcmp (realName, "GB") == 0)
|
||||
else if (uprv_strcmp (realName, "GB") == 0)
|
||||
return UCNV_GB;
|
||||
else if (icu_strcmp (realName, "ISO_2022") == 0)
|
||||
else if (uprv_strcmp (realName, "ISO_2022") == 0)
|
||||
return UCNV_ISO_2022;
|
||||
else if (icu_strcmp (realName, "UTF16_PlatformEndian") == 0)
|
||||
else if (uprv_strcmp (realName, "UTF16_PlatformEndian") == 0)
|
||||
# if U_IS_BIG_ENDIAN
|
||||
return UCNV_UTF16_BigEndian;
|
||||
# else
|
||||
return UCNV_UTF16_LittleEndian;
|
||||
# endif
|
||||
else if (icu_strcmp (realName, "UTF16_OppositeEndian") == 0)
|
||||
else if (uprv_strcmp (realName, "UTF16_OppositeEndian") == 0)
|
||||
# if U_IS_BIG_ENDIAN
|
||||
return UCNV_UTF16_LittleEndian;
|
||||
# else
|
||||
|
@ -417,7 +417,7 @@ UConverterPlatform
|
|||
getToken (myPlatform, name, mySeparators);
|
||||
strtoupper (myPlatform);
|
||||
|
||||
if (icu_strcmp (myPlatform, "IBM") == 0)
|
||||
if (uprv_strcmp (myPlatform, "IBM") == 0)
|
||||
return UCNV_IBM;
|
||||
else
|
||||
return UCNV_UNKNOWN;
|
||||
|
@ -502,7 +502,7 @@ bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData)
|
|||
case UCNV_SBCS:
|
||||
{
|
||||
ucmp8_close (deadSharedData->table->sbcs.fromUnicode);
|
||||
icu_free (deadSharedData->table);
|
||||
uprv_free (deadSharedData->table);
|
||||
};
|
||||
break;
|
||||
|
||||
|
@ -510,7 +510,7 @@ bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData)
|
|||
{
|
||||
ucmp16_close (deadSharedData->table->mbcs.fromUnicode);
|
||||
ucmp16_close (deadSharedData->table->mbcs.toUnicode);
|
||||
icu_free (deadSharedData->table);
|
||||
uprv_free (deadSharedData->table);
|
||||
};
|
||||
break;
|
||||
|
||||
|
@ -519,7 +519,7 @@ bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData)
|
|||
{
|
||||
ucmp16_close (deadSharedData->table->dbcs.fromUnicode);
|
||||
ucmp16_close (deadSharedData->table->dbcs.toUnicode);
|
||||
icu_free (deadSharedData->table);
|
||||
uprv_free (deadSharedData->table);
|
||||
};
|
||||
break;
|
||||
|
||||
|
@ -532,7 +532,7 @@ bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData)
|
|||
udata_close(data);
|
||||
}
|
||||
|
||||
icu_free (deadSharedData);
|
||||
uprv_free (deadSharedData);
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
@ -661,7 +661,7 @@ UConverter *
|
|||
else
|
||||
{
|
||||
/*Is already cached, point to an existing one */
|
||||
myUConverter = (UConverter *) icu_malloc (sizeof (UConverter));
|
||||
myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
|
||||
if (myUConverter == NULL)
|
||||
{
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
|
@ -690,7 +690,7 @@ UConverter *
|
|||
myUConverter = createConverterFromAlgorithmicType (realName, err);
|
||||
if (U_FAILURE (*err) || (myUConverter == NULL))
|
||||
{
|
||||
icu_free (myUConverter);
|
||||
uprv_free (myUConverter);
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
|
@ -702,7 +702,7 @@ UConverter *
|
|||
}
|
||||
else
|
||||
{
|
||||
myUConverter = (UConverter *) icu_malloc (sizeof (UConverter));
|
||||
myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
|
||||
if (myUConverter == NULL)
|
||||
{
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
|
@ -736,7 +736,7 @@ void initializeDataConverter (UConverter * myUConverter)
|
|||
myUConverter->UCharErrorBufferLength = 0;
|
||||
myUConverter->charErrorBufferLength = 0;
|
||||
myUConverter->subCharLen = myUConverter->sharedData->defaultConverterValues.subCharLen;
|
||||
icu_memcpy (myUConverter->subChar,
|
||||
uprv_memcpy (myUConverter->subChar,
|
||||
myUConverter->sharedData->defaultConverterValues.subChar,
|
||||
myUConverter->subCharLen);
|
||||
myUConverter->toUnicodeStatus = 0x00;
|
||||
|
@ -788,9 +788,9 @@ void
|
|||
myConverter->fromUnicodeStatus = 0; /* srl */
|
||||
myConverter->sharedData->platform = UCNV_IBM;
|
||||
myConverter->sharedData->codepage = 1208;
|
||||
icu_strcpy(myConverter->sharedData->name, "UTF8");
|
||||
icu_memcpy (myConverter->subChar, UTF8_subChar, 3);
|
||||
icu_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF8_subChar, 3);
|
||||
uprv_strcpy(myConverter->sharedData->name, "UTF8");
|
||||
uprv_memcpy (myConverter->subChar, UTF8_subChar, 3);
|
||||
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF8_subChar, 3);
|
||||
|
||||
break;
|
||||
}
|
||||
|
@ -804,7 +804,7 @@ void
|
|||
myConverter->toUnicodeStatus = 0;
|
||||
myConverter->sharedData->platform = UCNV_IBM;
|
||||
myConverter->sharedData->codepage = 819;
|
||||
icu_strcpy(myConverter->sharedData->name, "LATIN_1");
|
||||
uprv_strcpy(myConverter->sharedData->name, "LATIN_1");
|
||||
*(myConverter->subChar) = LATIN1_subChar;
|
||||
*(myConverter->sharedData->defaultConverterValues.subChar) = LATIN1_subChar;
|
||||
break;
|
||||
|
@ -819,11 +819,11 @@ void
|
|||
myConverter->subCharLen = 2;
|
||||
myConverter->toUnicodeStatus = 0;
|
||||
myConverter->fromUnicodeStatus = 0;
|
||||
icu_strcpy(myConverter->sharedData->name, "UTF_16BE");
|
||||
uprv_strcpy(myConverter->sharedData->name, "UTF_16BE");
|
||||
myConverter->sharedData->platform = UCNV_IBM;
|
||||
myConverter->sharedData->codepage = 1200;
|
||||
icu_memcpy (myConverter->subChar, UTF16BE_subChar, 2);
|
||||
icu_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF16BE_subChar, 2);
|
||||
uprv_memcpy (myConverter->subChar, UTF16BE_subChar, 2);
|
||||
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF16BE_subChar, 2);
|
||||
|
||||
break;
|
||||
}
|
||||
|
@ -839,9 +839,9 @@ void
|
|||
myConverter->fromUnicodeStatus = 0;
|
||||
myConverter->sharedData->platform = UCNV_IBM;
|
||||
myConverter->sharedData->codepage = 1200;
|
||||
icu_strcpy(myConverter->sharedData->name, "UTF_16LE");
|
||||
icu_memcpy (myConverter->subChar, UTF16LE_subChar, 2);
|
||||
icu_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF16LE_subChar, 2);
|
||||
uprv_strcpy(myConverter->sharedData->name, "UTF_16LE");
|
||||
uprv_memcpy (myConverter->subChar, UTF16LE_subChar, 2);
|
||||
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF16LE_subChar, 2);
|
||||
break;
|
||||
}
|
||||
case UCNV_EUC:
|
||||
|
@ -852,8 +852,8 @@ void
|
|||
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
|
||||
myConverter->subCharLen = 2;
|
||||
myConverter->toUnicodeStatus = 0;
|
||||
icu_memcpy (myConverter->subChar, EUC_subChar, 2);
|
||||
icu_memcpy (myConverter->sharedData->defaultConverterValues.subChar, EUC_subChar, 2);
|
||||
uprv_memcpy (myConverter->subChar, EUC_subChar, 2);
|
||||
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, EUC_subChar, 2);
|
||||
break;
|
||||
}
|
||||
case UCNV_ISO_2022:
|
||||
|
@ -870,10 +870,10 @@ void
|
|||
myConverter->toUnicodeStatus = 0;
|
||||
myConverter->fromUnicodeStatus = 0; /* srl */
|
||||
myConverter->sharedData->codepage = 2022;
|
||||
icu_strcpy(myConverter->sharedData->name, "ISO_2022");
|
||||
uprv_strcpy(myConverter->sharedData->name, "ISO_2022");
|
||||
*(myConverter->subChar) = LATIN1_subChar;
|
||||
*(myConverter->sharedData->defaultConverterValues.subChar) = LATIN1_subChar;
|
||||
myConverter->extraInfo = icu_malloc (sizeof (UConverterDataISO2022));
|
||||
myConverter->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
|
||||
((UConverterDataISO2022 *) myConverter->extraInfo)->currentConverter = NULL;
|
||||
((UConverterDataISO2022 *) myConverter->extraInfo)->escSeq2022Length = 0;
|
||||
break;
|
||||
|
@ -886,8 +886,8 @@ void
|
|||
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
|
||||
myConverter->subCharLen = 2;
|
||||
myConverter->toUnicodeStatus = 0;
|
||||
icu_memcpy (myConverter->subChar, GB_subChar, 2);
|
||||
icu_memcpy (myConverter->sharedData->defaultConverterValues.subChar, GB_subChar, 2);
|
||||
uprv_memcpy (myConverter->subChar, GB_subChar, 2);
|
||||
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, GB_subChar, 2);
|
||||
break;
|
||||
}
|
||||
case UCNV_JIS:
|
||||
|
@ -898,8 +898,8 @@ void
|
|||
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
|
||||
myConverter->subCharLen = 2;
|
||||
myConverter->toUnicodeStatus = 0;
|
||||
icu_memcpy (myConverter->subChar, JIS_subChar, 2);
|
||||
icu_memcpy (myConverter->sharedData->defaultConverterValues.subChar, JIS_subChar, 2);
|
||||
uprv_memcpy (myConverter->subChar, JIS_subChar, 2);
|
||||
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, JIS_subChar, 2);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -927,7 +927,7 @@ UConverter *
|
|||
if (U_FAILURE (*err))
|
||||
return NULL;
|
||||
|
||||
myConverter = (UConverter *) icu_malloc (sizeof (UConverter));
|
||||
myConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
|
||||
if (myConverter == NULL)
|
||||
{
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
|
@ -935,17 +935,17 @@ UConverter *
|
|||
}
|
||||
|
||||
myConverter->sharedData = NULL;
|
||||
mySharedData = (UConverterSharedData *) icu_malloc (sizeof (UConverterSharedData));
|
||||
mySharedData = (UConverterSharedData *) uprv_malloc (sizeof (UConverterSharedData));
|
||||
if (mySharedData == NULL)
|
||||
{
|
||||
*err = U_MEMORY_ALLOCATION_ERROR;
|
||||
icu_free (myConverter);
|
||||
uprv_free (myConverter);
|
||||
return NULL;
|
||||
}
|
||||
mySharedData->structSize = sizeof(UConverterSharedData);
|
||||
mySharedData->table = NULL;
|
||||
mySharedData->dataMemory = NULL;
|
||||
icu_strcpy (mySharedData->name, actualName);
|
||||
uprv_strcpy (mySharedData->name, actualName);
|
||||
/*Initializes the referenceCounter to 1 */
|
||||
mySharedData->referenceCounter = 1;
|
||||
mySharedData->platform = UCNV_UNKNOWN;
|
||||
|
@ -974,7 +974,7 @@ UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData *sourc
|
|||
|
||||
data = (UConverterSharedData*) malloc(sizeof(UConverterSharedData));
|
||||
raw = (uint8_t*)source;
|
||||
icu_memcpy(data,source,sizeof(UConverterSharedData));
|
||||
uprv_memcpy(data,source,sizeof(UConverterSharedData));
|
||||
|
||||
raw += data->structSize;
|
||||
|
||||
|
@ -993,7 +993,7 @@ UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData *sourc
|
|||
|
||||
case UCNV_EBCDIC_STATEFUL:
|
||||
case UCNV_DBCS:
|
||||
data->table = icu_malloc(sizeof(UConverterDBCSTable));
|
||||
data->table = uprv_malloc(sizeof(UConverterDBCSTable));
|
||||
|
||||
oldraw = raw;
|
||||
|
||||
|
@ -1007,7 +1007,7 @@ UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData *sourc
|
|||
break;
|
||||
|
||||
case UCNV_MBCS:
|
||||
data->table = icu_malloc(sizeof(UConverterMBCSTable));
|
||||
data->table = uprv_malloc(sizeof(UConverterMBCSTable));
|
||||
|
||||
data->table->mbcs.starters = (bool_t*)raw;
|
||||
raw += sizeof(bool_t)*256;
|
||||
|
|
|
@ -1,215 +1 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
*
|
||||
* ucnv_bld.h:
|
||||
* Contains all internal and external data structure definitions
|
||||
* Created & Maitained by Bertrand A. Damiba
|
||||
*
|
||||
*
|
||||
*
|
||||
* ATTENTION:
|
||||
* ---------
|
||||
* Although the data structures in this file are open and stack allocatable
|
||||
* we reserve the right to hide them in further releases.
|
||||
*/
|
||||
|
||||
#ifndef UCNV_BLD_H
|
||||
#define UCNV_BLD_H
|
||||
|
||||
#include "utypes.h"
|
||||
|
||||
#define UCNV_MAX_SUBCHAR_LEN 4
|
||||
#define UCNV_ERROR_BUFFER_LENGTH 20
|
||||
|
||||
#ifndef UCMP16_H
|
||||
typedef struct _CompactShortArray CompactShortArray;
|
||||
#endif
|
||||
|
||||
#ifndef UCMP8_H
|
||||
typedef struct _CompactByteArray CompactByteArray;
|
||||
#endif
|
||||
|
||||
#define UCNV_IMPLEMENTED_CONVERSION_TYPES 9
|
||||
/*Sentinel Value used to check the integrity of the binary data files */
|
||||
|
||||
#define UCNV_FILE_CHECK_MARKER 0xBEDA
|
||||
|
||||
/*maximum length of the converter names */
|
||||
#define UCNV_MAX_CONVERTER_NAME_LENGTH 60
|
||||
#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
|
||||
|
||||
/*Pointer to the aforementioned file */
|
||||
#define UCNV_MAX_LINE_TEXT (UCNV_MAX_CONVERTER_NAME_LENGTH*400)
|
||||
|
||||
#define UCNV_SI 0x0F /*Shift in for EBDCDIC_STATEFUL and iso2022 states */
|
||||
#define UCNV_SO 0x0E /*Shift out for EBDCDIC_STATEFUL and iso2022 states */
|
||||
|
||||
typedef enum {
|
||||
UCNV_UNSUPPORTED_CONVERTER = -1,
|
||||
UCNV_SBCS = 0,
|
||||
UCNV_DBCS = 1,
|
||||
UCNV_MBCS = 2,
|
||||
UCNV_LATIN_1 = 3,
|
||||
UCNV_UTF8 = 4,
|
||||
UCNV_UTF16_BigEndian = 5,
|
||||
UCNV_UTF16_LittleEndian = 6,
|
||||
UCNV_EBCDIC_STATEFUL = 7,
|
||||
UCNV_ISO_2022 = 8,
|
||||
/* Number of converter types for which we have conversion routines. */
|
||||
UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES = 9,
|
||||
UCNV_JIS = 9,
|
||||
UCNV_EUC = 10,
|
||||
UCNV_GB = 11
|
||||
} UConverterType;
|
||||
|
||||
typedef enum {
|
||||
UCNV_UNKNOWN = -1,
|
||||
UCNV_IBM = 0
|
||||
} UConverterPlatform;
|
||||
|
||||
|
||||
/*Table Node Definitions */
|
||||
typedef struct
|
||||
{
|
||||
UChar *toUnicode; /* [256]; */
|
||||
CompactByteArray *fromUnicode;
|
||||
}
|
||||
UConverterSBCSTable;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
CompactShortArray *toUnicode;
|
||||
CompactShortArray *fromUnicode;
|
||||
}
|
||||
UConverterDBCSTable;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
bool_t *starters; /* [256]; */
|
||||
CompactShortArray *toUnicode;
|
||||
CompactShortArray *fromUnicode;
|
||||
}
|
||||
UConverterMBCSTable;
|
||||
|
||||
typedef union
|
||||
{
|
||||
UConverterSBCSTable sbcs;
|
||||
UConverterDBCSTable dbcs;
|
||||
UConverterMBCSTable mbcs;
|
||||
}
|
||||
UConverterTable;
|
||||
|
||||
|
||||
/*Defines the struct of a UConverterSharedData the immutable, shared part of
|
||||
*UConverter
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint32_t structSize; /* Size of this structure */
|
||||
void *dataMemory;
|
||||
uint32_t referenceCounter; /*used to count number of clients */
|
||||
char name[UCNV_MAX_CONVERTER_NAME_LENGTH]; /*internal name of the converter */
|
||||
UConverterPlatform platform; /*platform of the converter (only IBM now) */
|
||||
int32_t codepage; /*codepage # (now IBM-$codepage) */
|
||||
UConverterType conversionType; /*conversion type */
|
||||
int8_t minBytesPerChar; /*Minimum # bytes per char in this codepage */
|
||||
int8_t maxBytesPerChar; /*Maximum # bytes per char in this codepage */
|
||||
struct
|
||||
{ /*initial values of some members of the mutable part of object */
|
||||
uint32_t toUnicodeStatus;
|
||||
int8_t subCharLen;
|
||||
unsigned char subChar[UCNV_MAX_SUBCHAR_LEN];
|
||||
}
|
||||
defaultConverterValues;
|
||||
UConverterTable *table; /*Pointer to conversion data */
|
||||
}
|
||||
UConverterSharedData;
|
||||
|
||||
|
||||
/*Defines a UConverter, the lightweight mutable part the user sees */
|
||||
|
||||
U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv
|
||||
itself is compiled under C++, the linkage of the funcptrs will
|
||||
work.
|
||||
*/
|
||||
|
||||
struct UConverter
|
||||
{
|
||||
int32_t toUnicodeStatus; /*Used to internalize stream status information */
|
||||
int32_t fromUnicodeStatus;
|
||||
int8_t invalidCharLength;
|
||||
int8_t invalidUCharLength;
|
||||
int8_t pad;
|
||||
int32_t mode;
|
||||
int8_t subCharLen; /*length of the codepage specific character sequence */
|
||||
unsigned char subChar[UCNV_MAX_SUBCHAR_LEN]; /*codepage specific character sequence */
|
||||
UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store unicode data meant for
|
||||
*output stream by the Error function pointers
|
||||
*/
|
||||
unsigned char charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store codepage data meant for
|
||||
* output stream by the Error function pointers
|
||||
*/
|
||||
int8_t UCharErrorBufferLength; /*used to indicate the number of valid UChars
|
||||
*in charErrorBuffer
|
||||
*/
|
||||
int8_t charErrorBufferLength; /*used to indicate the number of valid bytes
|
||||
*in charErrorBuffer
|
||||
*/
|
||||
|
||||
UChar invalidUCharBuffer[3];
|
||||
char invalidCharBuffer[UCNV_MAX_SUBCHAR_LEN];
|
||||
/*Error function pointer called when conversion issues
|
||||
*occur during a T_UConverter_fromUnicode call
|
||||
*/
|
||||
void (*fromUCharErrorBehaviour) (struct UConverter *,
|
||||
char **,
|
||||
const char *,
|
||||
const UChar **,
|
||||
const UChar *,
|
||||
int32_t* offsets,
|
||||
bool_t,
|
||||
UErrorCode *);
|
||||
/*Error function pointer called when conversion issues
|
||||
*occur during a T_UConverter_toUnicode call
|
||||
*/
|
||||
void (*fromCharErrorBehaviour) (struct UConverter *,
|
||||
UChar **,
|
||||
const UChar *,
|
||||
const char **,
|
||||
const char *,
|
||||
int32_t* offsets,
|
||||
bool_t,
|
||||
UErrorCode *);
|
||||
|
||||
UConverterSharedData *sharedData; /*Pointer to the shared immutable part of the
|
||||
*converter object
|
||||
*/
|
||||
void *extraInfo; /*currently only used to point to a struct containing UConverter used by iso 2022
|
||||
Could be used by clients writing their own call back function to
|
||||
pass context to them
|
||||
*/
|
||||
};
|
||||
|
||||
U_CDECL_END /* end of UConverter */
|
||||
|
||||
typedef struct UConverter UConverter;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
UConverter *currentConverter;
|
||||
unsigned char escSeq2022[10];
|
||||
int8_t escSeq2022Length;
|
||||
}
|
||||
UConverterDataISO2022;
|
||||
|
||||
#define CONVERTER_FILE_EXTENSION ".cnv"
|
||||
|
||||
/*case insensitive hash key*/
|
||||
U_CAPI int32_t U_EXPORT2 uhash_hashIString(const void* name);
|
||||
|
||||
#endif /* _UCNV_BLD */
|
||||
#error Please include unicode/ucnv_bld.h instead
|
||||
|
|
|
@ -16,14 +16,14 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "uhash.h"
|
||||
#include "ucmp16.h"
|
||||
#include "ucmp8.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnv_err.h"
|
||||
#include "unicode/ucnv_bld.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "ucnv_cnv.h"
|
||||
#include "ucnv.h"
|
||||
#include "unicode/ucnv.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
#ifdef Debug
|
||||
|
@ -1581,7 +1581,7 @@ void T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC(UConverter* _this,
|
|||
{
|
||||
int32_t len = *target - targetStart;
|
||||
int32_t i;
|
||||
/* icu_memmove(offsets+3, offsets, len); MEMMOVE SEEMS BROKEN --srl */
|
||||
/* uprv_memmove(offsets+3, offsets, len); MEMMOVE SEEMS BROKEN --srl */
|
||||
|
||||
for(i=len-1;i>=0;i--) offsets[i] = offsets[i];
|
||||
|
||||
|
@ -1797,7 +1797,7 @@ void changeState_2022(UConverter* _this,
|
|||
/*Customize the converter with the attributes set on the 2022 converter*/
|
||||
myUConverter->fromUCharErrorBehaviour = _this->fromUCharErrorBehaviour;
|
||||
myUConverter->fromCharErrorBehaviour = _this->fromCharErrorBehaviour;
|
||||
icu_memcpy(myUConverter->subChar,
|
||||
uprv_memcpy(myUConverter->subChar,
|
||||
_this->subChar,
|
||||
myUConverter->subCharLen = _this->subCharLen);
|
||||
|
||||
|
@ -2855,7 +2855,7 @@ void flushInternalUnicodeBuffer (UConverter * _this,
|
|||
{
|
||||
/*we have enough space
|
||||
*So we just copy the whole Error Buffer in to the output stream*/
|
||||
icu_memcpy (myTarget,
|
||||
uprv_memcpy (myTarget,
|
||||
_this->UCharErrorBuffer,
|
||||
sizeof (UChar) * myUCharErrorBufferLength);
|
||||
if (offsets)
|
||||
|
@ -2872,14 +2872,14 @@ void flushInternalUnicodeBuffer (UConverter * _this,
|
|||
/* We don't have enough space so we copy as much as we can
|
||||
* on the output stream and update the object
|
||||
* by updating the internal buffer*/
|
||||
icu_memcpy (myTarget, _this->UCharErrorBuffer, sizeof (UChar) * targetLength);
|
||||
uprv_memcpy (myTarget, _this->UCharErrorBuffer, sizeof (UChar) * targetLength);
|
||||
if (offsets)
|
||||
{
|
||||
int32_t i=0;
|
||||
for (i=0; i< targetLength;i++) (*offsets)[i] = -1;
|
||||
*offsets += targetLength;
|
||||
}
|
||||
icu_memmove (_this->UCharErrorBuffer,
|
||||
uprv_memmove (_this->UCharErrorBuffer,
|
||||
_this->UCharErrorBuffer + targetLength,
|
||||
sizeof (UChar) * (myUCharErrorBufferLength - targetLength));
|
||||
_this->UCharErrorBufferLength -= (int8_t) targetLength;
|
||||
|
@ -2903,7 +2903,7 @@ void flushInternalCharBuffer (UConverter * _this,
|
|||
/*we have enough space */
|
||||
if (myCharErrorBufferLength <= targetLength)
|
||||
{
|
||||
icu_memcpy (myTarget, _this->charErrorBuffer, myCharErrorBufferLength);
|
||||
uprv_memcpy (myTarget, _this->charErrorBuffer, myCharErrorBufferLength);
|
||||
if (offsets)
|
||||
{
|
||||
int32_t i=0;
|
||||
|
@ -2918,14 +2918,14 @@ void flushInternalCharBuffer (UConverter * _this,
|
|||
/* We don't have enough space so we copy as much as we can
|
||||
* on the output stream and update the object*/
|
||||
{
|
||||
icu_memcpy (myTarget, _this->charErrorBuffer, targetLength);
|
||||
uprv_memcpy (myTarget, _this->charErrorBuffer, targetLength);
|
||||
if (offsets)
|
||||
{
|
||||
int32_t i=0;
|
||||
for (i=0; i< targetLength;i++) (*offsets)[i] = -1;
|
||||
*offsets += targetLength;
|
||||
}
|
||||
icu_memmove (_this->charErrorBuffer,
|
||||
uprv_memmove (_this->charErrorBuffer,
|
||||
_this->charErrorBuffer + targetLength,
|
||||
(myCharErrorBufferLength - targetLength));
|
||||
_this->charErrorBufferLength -= (int8_t) targetLength;
|
||||
|
|
|
@ -12,8 +12,8 @@
|
|||
#ifndef UCNV_CNV_H
|
||||
#define UCNV_CNV_H
|
||||
|
||||
#include "utypes.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ucnv_bld.h"
|
||||
|
||||
bool_t CONVERSION_U_SUCCESS (UErrorCode err);
|
||||
|
||||
|
|
|
@ -19,11 +19,11 @@
|
|||
#include "uhash.h"
|
||||
#include "ucmp8.h"
|
||||
#include "ucmp16.h"
|
||||
#include "ucnv_bld.h"
|
||||
#include "ucnv_err.h"
|
||||
#include "unicode/ucnv_bld.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "ucnv_cnv.h"
|
||||
#include "cmemory.h"
|
||||
#include "ucnv.h"
|
||||
#include "unicode/ucnv.h"
|
||||
|
||||
#define VALUE_STRING_LENGTH 32
|
||||
/*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
|
||||
|
@ -135,7 +135,7 @@ void UCNV_FROM_U_CALLBACK_SUBSTITUTE (UConverter * _this,
|
|||
|
||||
/*In case we're dealing with a modal converter a la UCNV_EBCDIC_STATEFUL,
|
||||
we need to make sure that the emitting of the substitution charater in the right mode*/
|
||||
icu_memcpy(togo, _this->subChar, togoLen = _this->subCharLen);
|
||||
uprv_memcpy(togo, _this->subChar, togoLen = _this->subCharLen);
|
||||
if (ucnv_getType(_this) == UCNV_EBCDIC_STATEFUL)
|
||||
{
|
||||
if ((_this->fromUnicodeStatus)&&(togoLen != 2))
|
||||
|
@ -159,7 +159,7 @@ void UCNV_FROM_U_CALLBACK_SUBSTITUTE (UConverter * _this,
|
|||
the subchar there and update the pointer */
|
||||
if ((targetLimit - *target) >= togoLen)
|
||||
{
|
||||
icu_memcpy (*target, togo, togoLen);
|
||||
uprv_memcpy (*target, togo, togoLen);
|
||||
*target += togoLen;
|
||||
*err = U_ZERO_ERROR;
|
||||
if (offsets)
|
||||
|
@ -176,14 +176,14 @@ void UCNV_FROM_U_CALLBACK_SUBSTITUTE (UConverter * _this,
|
|||
*copy the rest in the internal buffer, and increase the
|
||||
*length marker
|
||||
*/
|
||||
icu_memcpy (*target, togo, (targetLimit - *target));
|
||||
uprv_memcpy (*target, togo, (targetLimit - *target));
|
||||
if (offsets)
|
||||
{
|
||||
int i=0;
|
||||
for (i=0;i<(targetLimit - *target);i++) offsets[i]=0;
|
||||
offsets += (targetLimit - *target);
|
||||
}
|
||||
icu_memcpy (_this->charErrorBuffer + _this->charErrorBufferLength,
|
||||
uprv_memcpy (_this->charErrorBuffer + _this->charErrorBufferLength,
|
||||
togo + (targetLimit - *target),
|
||||
togoLen - (targetLimit - *target));
|
||||
_this->charErrorBufferLength += togoLen - (targetLimit - *target);
|
||||
|
@ -244,7 +244,7 @@ void UCNV_FROM_U_CALLBACK_ESCAPE (UConverter * _this,
|
|||
while (i < _this->invalidUCharLength)
|
||||
{
|
||||
itou (codepoint + 2, _this->invalidUCharBuffer[i++], 16, 4);
|
||||
icu_memcpy (valueString + valueStringLength, codepoint, sizeof (UChar) * 6);
|
||||
uprv_memcpy (valueString + valueStringLength, codepoint, sizeof (UChar) * 6);
|
||||
valueStringLength += CODEPOINT_STRING_LENGTH - 1;
|
||||
}
|
||||
|
||||
|
@ -282,7 +282,7 @@ void UCNV_FROM_U_CALLBACK_ESCAPE (UConverter * _this,
|
|||
*/
|
||||
if ((targetLimit - *target) >= valueStringLength)
|
||||
{
|
||||
icu_memcpy (*target, myTarget, valueStringLength);
|
||||
uprv_memcpy (*target, myTarget, valueStringLength);
|
||||
*target += valueStringLength;
|
||||
*err = U_ZERO_ERROR;
|
||||
|
||||
|
@ -307,8 +307,8 @@ void UCNV_FROM_U_CALLBACK_ESCAPE (UConverter * _this,
|
|||
for (i=0;i<(targetLimit - *target);i++) offsets[i]=0;
|
||||
offsets += (targetLimit - *target);
|
||||
}
|
||||
icu_memcpy (*target, myTarget, (targetLimit - *target));
|
||||
icu_memcpy (_this->charErrorBuffer + _this->charErrorBufferLength,
|
||||
uprv_memcpy (*target, myTarget, (targetLimit - *target));
|
||||
uprv_memcpy (_this->charErrorBuffer + _this->charErrorBufferLength,
|
||||
myTarget + (targetLimit - *target),
|
||||
valueStringLength - (targetLimit - *target));
|
||||
_this->charErrorBufferLength += valueStringLength - (targetLimit - *target);
|
||||
|
@ -391,7 +391,7 @@ void UCNV_TO_U_CALLBACK_ESCAPE (UConverter * _this,
|
|||
while (i < _this->invalidCharLength)
|
||||
{
|
||||
itou (codepoint + 2, _this->invalidCharBuffer[i++], 16, 2);
|
||||
icu_memcpy (uniValueString + valueStringLength, codepoint, sizeof (UChar) * 4);
|
||||
uprv_memcpy (uniValueString + valueStringLength, codepoint, sizeof (UChar) * 4);
|
||||
valueStringLength += 4;
|
||||
}
|
||||
|
||||
|
@ -400,7 +400,7 @@ void UCNV_TO_U_CALLBACK_ESCAPE (UConverter * _this,
|
|||
/*if we have enough space on the output buffer we just copy
|
||||
* the subchar there and update the pointer
|
||||
*/
|
||||
icu_memcpy (*target, uniValueString, (sizeof (UChar)) * (valueStringLength));
|
||||
uprv_memcpy (*target, uniValueString, (sizeof (UChar)) * (valueStringLength));
|
||||
if (offsets)
|
||||
{
|
||||
for (i = 0; i < valueStringLength; i++) offsets[i] = 0;
|
||||
|
@ -416,14 +416,14 @@ void UCNV_TO_U_CALLBACK_ESCAPE (UConverter * _this,
|
|||
*copy the rest in the internal buffer, and increase the
|
||||
*length marker
|
||||
*/
|
||||
icu_memcpy (*target, uniValueString, (sizeof (UChar)) * (targetLimit - *target));
|
||||
uprv_memcpy (*target, uniValueString, (sizeof (UChar)) * (targetLimit - *target));
|
||||
if (offsets)
|
||||
{
|
||||
for (i = 0; i < (targetLimit - *target); i++) offsets[i] = 0;
|
||||
}
|
||||
|
||||
|
||||
icu_memcpy (_this->UCharErrorBuffer,
|
||||
uprv_memcpy (_this->UCharErrorBuffer,
|
||||
uniValueString + (targetLimit - *target),
|
||||
(sizeof (UChar)) * (valueStringLength - (targetLimit - *target)));
|
||||
_this->UCharErrorBufferLength += valueStringLength - (targetLimit - *target);
|
||||
|
|
|
@ -1,151 +1 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
*
|
||||
* ucnv_err.h:
|
||||
* defines error behaviour functions called by T_UConverter_{from,to}Unicode
|
||||
*
|
||||
* These Functions, although public, should NEVER be called directly, they should be used as parameters to
|
||||
* the T_UConverter_setMissing{Char,Unicode}Action API, to set the behaviour of a converter
|
||||
* when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
|
||||
*
|
||||
* usage example:
|
||||
*
|
||||
* ...
|
||||
* UErrorCode err = U_ZERO_ERROR;
|
||||
* UConverter* myConverter = T_UConverter_create("ibm-949", &err);
|
||||
*
|
||||
* if (U_SUCCESS(err))
|
||||
* {
|
||||
* T_UConverter_setMissingUnicodeAction(myConverter, (MissingUnicodeAction)UCNV_FROM_U_CALLBACK_STOP, &err);
|
||||
* T_UConverter_setMissingCharAction(myConverter, (MissingCharAction)UCNV_TO_U_CALLBACK_SUBSTITUTE, &err);
|
||||
* }
|
||||
* ...
|
||||
*
|
||||
* The code above tells "myConverter" to stop when it encounters a ILLEGAL/TRUNCATED/INVALID sequences when it is used to
|
||||
* convert from Unicode -> Codepage.
|
||||
* and to substitute with a codepage specific substitutions sequence when converting from Codepage -> Unicode
|
||||
*/
|
||||
|
||||
|
||||
#ifndef UCNV_ERR_H
|
||||
#define UCNV_ERR_H
|
||||
|
||||
#include "ucnv.h"
|
||||
#include "utypes.h"
|
||||
|
||||
|
||||
/*Functor STOPS at the ILLEGAL_SEQUENCE */
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (UConverter * _this,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/*Functor STOPS at the ILLEGAL_SEQUENCE */
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (UConverter * _this,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
|
||||
|
||||
/*Functor SKIPs the ILLEGAL_SEQUENCE */
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (UConverter * _this,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
/* Functor Substitute the ILLEGAL SEQUENCE with the current substitution string assiciated with _this,
|
||||
* in the event target buffer is too small, it will store the extra info in the UConverter, and err
|
||||
* will be set to U_INDEX_OUTOFBOUNDS_ERROR. The next time T_UConverter_fromUnicode is called, it will
|
||||
* store the left over data in target, before transcoding the "source Stream"
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (UConverter * _this,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
/* Functor Substitute the ILLEGAL SEQUENCE with a sequence escaped codepoints corresponding to the ILLEGAL
|
||||
* SEQUENCE (format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). In the Event the Converter doesn't support the
|
||||
* characters {u,%}[A-F][0-9], it will substitute the illegal sequence with the substitution characters
|
||||
* (it will behave like the above functor).
|
||||
* in the event target buffer is too small, it will store the extra info in the UConverter, and err
|
||||
* will be set to U_INDEX_OUTOFBOUNDS_ERROR. The next time T_UConverter_fromUnicode is called, it will
|
||||
* store the left over data in target, before transcoding the "source Stream"
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (UConverter * _this,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/*Functor SKIPs the ILLEGAL_SEQUENCE */
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (UConverter * _this,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/* Functor Substitute the ILLEGAL SEQUENCE with the current substitution string assiciated with _this,
|
||||
* in the event target buffer is too small, it will store the extra info in the UConverter, and err
|
||||
* will be set to U_INDEX_OUTOFBOUNDS_ERROR. The next time T_UConverter_fromUnicode is called, it will
|
||||
* store the left over data in target, before transcoding the "source Stream"
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (UConverter * _this,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
/* Functor Substitute the ILLEGAL SEQUENCE with a sequence escaped codepoints corresponding to the
|
||||
* ILLEGAL SEQUENCE (format %XNN, e.g. "%XFF%X0A%XC8%X03").
|
||||
* in the event target buffer is too small, it will store the extra info in the UConverter, and err
|
||||
* will be set to U_INDEX_OUTOFBOUNDS_ERROR. The next time T_UConverter_fromUnicode is called, it will
|
||||
* store the left over data in target, before transcoding the "source Stream"
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (UConverter * _this,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
#endif/*UCNV_ERR_H*/
|
||||
#error Please include unicode/ucnv_err.h instead
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
#ifndef UCNV_IMP_H
|
||||
#define UCNV_IMP_H
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#ifndef UHASH_H
|
||||
typedef struct _UHashtable UHashtable;
|
||||
|
|
|
@ -24,12 +24,12 @@
|
|||
********************************************************************************
|
||||
*/
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "umutex.h"
|
||||
#include "cstring.h"
|
||||
#include "cmemory.h"
|
||||
#include "ucnv_io.h"
|
||||
#include "udata.h"
|
||||
#include "unicode/udata.h"
|
||||
|
||||
/* Format of cnvalias.dat ------------------------------------------------------
|
||||
*
|
||||
|
@ -159,7 +159,7 @@ strHalfCaseCmp(const char *str1, const char *str2) {
|
|||
return 1;
|
||||
} else {
|
||||
/* compare non-zero characters with lowercase */
|
||||
rc=(int)c1-(int)(unsigned char)icu_tolower(c2);
|
||||
rc=(int)c1-(int)(unsigned char)uprv_tolower(c2);
|
||||
if(rc!=0) {
|
||||
return rc;
|
||||
}
|
||||
|
@ -187,7 +187,7 @@ findAlias(const char *alias) {
|
|||
|
||||
/* convert the alias name to lowercase to do case-insensitive comparisons */
|
||||
for(i=0; i<sizeof(name)-1 && *alias!=0; ++i) {
|
||||
name[i]=icu_tolower(*alias++);
|
||||
name[i]=uprv_tolower(*alias++);
|
||||
}
|
||||
name[i]=0;
|
||||
|
||||
|
@ -249,7 +249,7 @@ ucnv_io_getAlias(const char *alias, uint16_t index, UErrorCode *pErrorCode) {
|
|||
const char *aliases=(const char *)aliasTable+*p;
|
||||
while(index>0) {
|
||||
/* skip a name, first the canonical converter name */
|
||||
aliases+=icu_strlen(aliases)+1;
|
||||
aliases+=uprv_strlen(aliases)+1;
|
||||
--index;
|
||||
}
|
||||
return aliases;
|
||||
|
@ -338,7 +338,7 @@ ucnv_io_getDefaultConverterName() {
|
|||
/* local variable to be thread-safe */
|
||||
const char *name=defaultConverterName;
|
||||
if(name==NULL) {
|
||||
const char *codepage=icu_getDefaultCodepage();
|
||||
const char *codepage=uprv_getDefaultCodepage();
|
||||
if(codepage!=NULL) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
name=ucnv_io_getConverterName(codepage, &errorCode);
|
||||
|
@ -363,7 +363,7 @@ ucnv_io_setDefaultConverterName(const char *converterName) {
|
|||
defaultConverterName=name;
|
||||
} else {
|
||||
/* do not set the name if the alias lookup failed and it is too long */
|
||||
int32_t length=icu_strlen(converterName);
|
||||
int32_t length=uprv_strlen(converterName);
|
||||
if(length<sizeof(defaultConverterNameBuffer)) {
|
||||
/* it was not found as an alias, so copy it - accept an empty name */
|
||||
bool_t didLock;
|
||||
|
@ -373,7 +373,7 @@ ucnv_io_setDefaultConverterName(const char *converterName) {
|
|||
} else {
|
||||
didLock=FALSE;
|
||||
}
|
||||
icu_memcpy(defaultConverterNameBuffer, converterName, length);
|
||||
uprv_memcpy(defaultConverterNameBuffer, converterName, length);
|
||||
defaultConverterNameBuffer[length]=0;
|
||||
defaultConverterName=defaultConverterNameBuffer;
|
||||
if(didLock) {
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
#ifndef UCNV_IO_H
|
||||
#define UCNV_IO_H
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* Map a converter alias name to a canonical converter name.
|
||||
|
|
|
@ -17,13 +17,13 @@
|
|||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "utypes.h"
|
||||
#include "putil.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "umutex.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "filestrm.h"
|
||||
#include "udata.h"
|
||||
#include "unicode/udata.h"
|
||||
|
||||
#if !defined(HAVE_DLOPEN)
|
||||
# define HAVE_DLOPEN 0
|
||||
|
@ -151,8 +151,8 @@ LOAD_LIBRARY(const char *path, const char *basename, bool_t isCommon) {
|
|||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
|
||||
/* set up the mapping name and the filename */
|
||||
icu_strcpy(buffer, "icu ");
|
||||
icu_strcat(buffer, basename);
|
||||
uprv_strcpy(buffer, "icu ");
|
||||
uprv_strcat(buffer, basename);
|
||||
|
||||
/* open the mapping */
|
||||
map=OpenFileMapping(FILE_MAP_READ, FALSE, buffer);
|
||||
|
@ -184,7 +184,7 @@ LOAD_LIBRARY(const char *path, const char *basename, bool_t isCommon) {
|
|||
}
|
||||
|
||||
/* allocate the data structure */
|
||||
pData=(UDataMemory *)icu_malloc(sizeof(UDataMemory));
|
||||
pData=(UDataMemory *)uprv_malloc(sizeof(UDataMemory));
|
||||
if(pData==NULL) {
|
||||
UnmapViewOfFile(pData->p);
|
||||
CloseHandle(map);
|
||||
|
@ -212,7 +212,7 @@ udata_close(UDataMemory *pData) {
|
|||
UnmapViewOfFile(pData->p);
|
||||
CloseHandle(pData->map);
|
||||
}
|
||||
icu_free(pData);
|
||||
uprv_free(pData);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -350,7 +350,7 @@ LOAD_LIBRARY(const char *path, const char *basename, bool_t isCommon) {
|
|||
#endif
|
||||
|
||||
/* allocate the data structure */
|
||||
pData=(UDataMemory *)icu_malloc(sizeof(UDataMemory));
|
||||
pData=(UDataMemory *)uprv_malloc(sizeof(UDataMemory));
|
||||
if(pData==NULL) {
|
||||
munmap(data, length);
|
||||
return NULL;
|
||||
|
@ -376,7 +376,7 @@ udata_close(UDataMemory *pData) {
|
|||
if(pData->length!=0 && munmap(pData->p, pData->length)==-1) {
|
||||
perror("munmap");
|
||||
}
|
||||
icu_free(pData);
|
||||
uprv_free(pData);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
@ -409,7 +409,7 @@ getChoice(Library lib, const char *entry,
|
|||
|
||||
#define NO_LIBRARY NULL
|
||||
#define IS_LIBRARY(lib) ((lib)!=NULL)
|
||||
#define UNLOAD_LIBRARY(lib) icu_free(lib)
|
||||
#define UNLOAD_LIBRARY(lib) uprv_free(lib)
|
||||
|
||||
static Library
|
||||
LOAD_LIBRARY(const char *path, const char *basename, bool_t isCommon) {
|
||||
|
@ -431,7 +431,7 @@ LOAD_LIBRARY(const char *path, const char *basename, bool_t isCommon) {
|
|||
}
|
||||
|
||||
/* allocate the data structure */
|
||||
pData=(UDataMemory *)icu_malloc(fileLength);
|
||||
pData=(UDataMemory *)uprv_malloc(fileLength);
|
||||
if(pData==NULL) {
|
||||
T_FileStream_close(file);
|
||||
return NULL;
|
||||
|
@ -439,7 +439,7 @@ LOAD_LIBRARY(const char *path, const char *basename, bool_t isCommon) {
|
|||
|
||||
/* read the file */
|
||||
if(fileLength!=T_FileStream_read(file, pData, fileLength)) {
|
||||
icu_free(pData);
|
||||
uprv_free(pData);
|
||||
T_FileStream_close(file);
|
||||
return NULL;
|
||||
}
|
||||
|
@ -452,7 +452,7 @@ LOAD_LIBRARY(const char *path, const char *basename, bool_t isCommon) {
|
|||
U_CAPI void U_EXPORT2
|
||||
udata_close(UDataMemory *pData) {
|
||||
if(pData!=NULL) {
|
||||
icu_free(pData);
|
||||
uprv_free(pData);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -474,7 +474,7 @@ udata_getInfo(UDataMemory *pData, UDataInfo *pInfo) {
|
|||
if(size>info->size) {
|
||||
pInfo->size=info->size;
|
||||
}
|
||||
icu_memcpy((uint16_t *)pInfo+1, (uint16_t *)info+1, size-2);
|
||||
uprv_memcpy((uint16_t *)pInfo+1, (uint16_t *)info+1, size-2);
|
||||
} else {
|
||||
pInfo->size=0;
|
||||
}
|
||||
|
@ -496,7 +496,7 @@ udata_close(UDataMemory *pData) {
|
|||
if(IS_LIBRARY(pData->lib)) {
|
||||
UNLOAD_LIBRARY(pData->lib);
|
||||
}
|
||||
icu_free(pData);
|
||||
uprv_free(pData);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -518,14 +518,14 @@ getCommonMapData(const UDataMemory *data, const char *dataName) {
|
|||
limit=*toc++; /* number of names in this table of contents */
|
||||
while(start<limit-1) {
|
||||
number=(start+limit)/2;
|
||||
if(icu_strcmp(dataName, (const char *)(base+toc[2*number]))<0) {
|
||||
if(uprv_strcmp(dataName, (const char *)(base+toc[2*number]))<0) {
|
||||
limit=number;
|
||||
} else {
|
||||
start=number;
|
||||
}
|
||||
}
|
||||
|
||||
if(icu_strcmp(dataName, (const char *)(base+toc[2*start]))==0) {
|
||||
if(uprv_strcmp(dataName, (const char *)(base+toc[2*start]))==0) {
|
||||
/* found it */
|
||||
return (MappedData *)(base+toc[2*start+1]);
|
||||
} else {
|
||||
|
@ -574,7 +574,7 @@ udata_getInfo(UDataMemory *pData, UDataInfo *pInfo) {
|
|||
if(size>info->size) {
|
||||
pInfo->size=info->size;
|
||||
}
|
||||
icu_memcpy((uint16_t *)pInfo+1, (uint16_t *)info+1, size-2);
|
||||
uprv_memcpy((uint16_t *)pInfo+1, (uint16_t *)info+1, size-2);
|
||||
} else {
|
||||
pInfo->size=0;
|
||||
}
|
||||
|
@ -591,8 +591,8 @@ static const char *strcpy_dllentry(char *target, const char *src)
|
|||
{
|
||||
int i, length;
|
||||
|
||||
icu_strcpy(target,src);
|
||||
length = icu_strlen(target);
|
||||
uprv_strcpy(target,src);
|
||||
length = uprv_strlen(target);
|
||||
for(i=0;i<length;i++)
|
||||
{
|
||||
if(target[i] == '-')
|
||||
|
@ -607,11 +607,11 @@ static const char *strcat_dllentry(char *target, const char *src)
|
|||
{
|
||||
int i, length;
|
||||
|
||||
i = icu_strlen(target); /* original size */
|
||||
i = uprv_strlen(target); /* original size */
|
||||
|
||||
icu_strcat(target,src);
|
||||
uprv_strcat(target,src);
|
||||
|
||||
length = i + icu_strlen(src);
|
||||
length = i + uprv_strlen(src);
|
||||
|
||||
for(;i<length;i++)
|
||||
{
|
||||
|
@ -645,8 +645,8 @@ doOpenChoice(const char *path, const char *type, const char *name,
|
|||
/* copy the path to the path buffer */
|
||||
path=u_getDataDirectory();
|
||||
if(path!=NULL && *path!=0) {
|
||||
int length=icu_strlen(path);
|
||||
icu_memcpy(pathBuffer, path, length);
|
||||
int length=uprv_strlen(path);
|
||||
uprv_memcpy(pathBuffer, path, length);
|
||||
basename+=length;
|
||||
hasPath=TRUE;
|
||||
} else {
|
||||
|
@ -655,11 +655,11 @@ doOpenChoice(const char *path, const char *type, const char *name,
|
|||
|
||||
/* add (prefix and) basename */
|
||||
# ifndef LIB_PREFIX
|
||||
icu_strcpy(basename, COMMON_DATA_NAME);
|
||||
uprv_strcpy(basename, COMMON_DATA_NAME);
|
||||
suffix=basename+COMMON_DATA_NAME_LENGTH;
|
||||
# else
|
||||
icu_memcpy(basename, LIB_PREFIX, LIB_PREFIX_LENGTH);
|
||||
icu_strcpy(basename+LIB_PREFIX_LENGTH, COMMON_DATA_NAME);
|
||||
uprv_memcpy(basename, LIB_PREFIX, LIB_PREFIX_LENGTH);
|
||||
uprv_strcpy(basename+LIB_PREFIX_LENGTH, COMMON_DATA_NAME);
|
||||
suffix=basename+LIB_PREFIX_LENGTH+COMMON_DATA_NAME_LENGTH;
|
||||
# endif
|
||||
hasBasename=TRUE;
|
||||
|
@ -669,14 +669,14 @@ doOpenChoice(const char *path, const char *type, const char *name,
|
|||
isICUData=FALSE;
|
||||
|
||||
/* find the last file sepator */
|
||||
basename=icu_strrchr(path, '/');
|
||||
basename=uprv_strrchr(path, '/');
|
||||
if(basename==NULL) {
|
||||
basename=(char *)path;
|
||||
} else {
|
||||
++basename;
|
||||
}
|
||||
|
||||
basename2=icu_strrchr(basename, '\\');
|
||||
basename2=uprv_strrchr(basename, '\\');
|
||||
if(basename2!=NULL) {
|
||||
basename=basename2+1;
|
||||
}
|
||||
|
@ -684,16 +684,16 @@ doOpenChoice(const char *path, const char *type, const char *name,
|
|||
if(path!=basename) {
|
||||
# ifndef LIB_PREFIX
|
||||
/* copy the path/basename to the path buffer */
|
||||
icu_strcpy(pathBuffer, path);
|
||||
uprv_strcpy(pathBuffer, path);
|
||||
basename=pathBuffer+(basename-path);
|
||||
# else
|
||||
/* copy the path to the path buffer */
|
||||
icu_memcpy(pathBuffer, path, basename-path);
|
||||
uprv_memcpy(pathBuffer, path, basename-path);
|
||||
|
||||
/* add prefix and basename */
|
||||
suffix=pathBuffer+(basename-path);
|
||||
icu_memcpy(suffix, LIB_PREFIX, LIB_PREFIX_LENGTH);
|
||||
icu_strcpy(suffix+LIB_PREFIX_LENGTH, basename);
|
||||
uprv_memcpy(suffix, LIB_PREFIX, LIB_PREFIX_LENGTH);
|
||||
uprv_strcpy(suffix+LIB_PREFIX_LENGTH, basename);
|
||||
basename=suffix;
|
||||
# endif
|
||||
hasPath=TRUE;
|
||||
|
@ -701,8 +701,8 @@ doOpenChoice(const char *path, const char *type, const char *name,
|
|||
/* copy the path to the path buffer */
|
||||
path=u_getDataDirectory();
|
||||
if(path!=NULL && *path!=0) {
|
||||
int length=icu_strlen(path);
|
||||
icu_memcpy(pathBuffer, path, length);
|
||||
int length=uprv_strlen(path);
|
||||
uprv_memcpy(pathBuffer, path, length);
|
||||
suffix=pathBuffer+length;
|
||||
hasPath=TRUE;
|
||||
} else {
|
||||
|
@ -712,16 +712,16 @@ doOpenChoice(const char *path, const char *type, const char *name,
|
|||
|
||||
/* add (prefix and) basename */
|
||||
# ifndef LIB_PREFIX
|
||||
icu_strcpy(suffix, basename);
|
||||
uprv_strcpy(suffix, basename);
|
||||
# else
|
||||
icu_memcpy(suffix, LIB_PREFIX, LIB_PREFIX_LENGTH);
|
||||
icu_strcpy(suffix+LIB_PREFIX_LENGTH, basename);
|
||||
uprv_memcpy(suffix, LIB_PREFIX, LIB_PREFIX_LENGTH);
|
||||
uprv_strcpy(suffix+LIB_PREFIX_LENGTH, basename);
|
||||
# endif
|
||||
basename=suffix;
|
||||
}
|
||||
hasBasename= *basename!=0;
|
||||
if(hasBasename) {
|
||||
suffix=basename+icu_strlen(basename);
|
||||
suffix=basename+uprv_strlen(basename);
|
||||
}
|
||||
}
|
||||
path=pathBuffer;
|
||||
|
@ -731,19 +731,19 @@ doOpenChoice(const char *path, const char *type, const char *name,
|
|||
#ifdef UDATA_DLL
|
||||
strcpy_dllentry(entryNameBuffer, name);
|
||||
#else
|
||||
icu_strcpy(entryNameBuffer, name);
|
||||
uprv_strcpy(entryNameBuffer, name);
|
||||
#endif
|
||||
|
||||
# ifdef UDATA_DLL
|
||||
icu_strcat(entryNameBuffer, "_");
|
||||
uprv_strcat(entryNameBuffer, "_");
|
||||
# else
|
||||
icu_strcat(entryNameBuffer, ".");
|
||||
uprv_strcat(entryNameBuffer, ".");
|
||||
# endif
|
||||
|
||||
#ifdef UDATA_DLL
|
||||
strcat_dllentry(entryNameBuffer, type);
|
||||
#else
|
||||
icu_strcat(entryNameBuffer, type);
|
||||
uprv_strcat(entryNameBuffer, type);
|
||||
#endif
|
||||
|
||||
entryName=entryNameBuffer;
|
||||
|
@ -772,7 +772,7 @@ doOpenChoice(const char *path, const char *type, const char *name,
|
|||
/* load the common data if neccessary */
|
||||
if(!IS_LIBRARY(lib)) {
|
||||
/* try path/basename first */
|
||||
icu_strcpy(suffix, LIB_SUFFIX);
|
||||
uprv_strcpy(suffix, LIB_SUFFIX);
|
||||
lib=LOAD_LIBRARY(path, basename, TRUE);
|
||||
if(!IS_LIBRARY(lib)) {
|
||||
/* try basename only next */
|
||||
|
@ -820,9 +820,9 @@ doOpenChoice(const char *path, const char *type, const char *name,
|
|||
/* try basename+"_"+entryName[+LIB_SUFFIX] first */
|
||||
if(p==NULL && hasBasename) {
|
||||
*suffix='_';
|
||||
icu_strcpy(suffix+1, entryName);
|
||||
uprv_strcpy(suffix+1, entryName);
|
||||
# ifdef UDATA_DLL
|
||||
icu_strcat(suffix+1, LIB_SUFFIX);
|
||||
uprv_strcat(suffix+1, LIB_SUFFIX);
|
||||
# endif
|
||||
|
||||
/* try path/basename first */
|
||||
|
@ -849,12 +849,12 @@ doOpenChoice(const char *path, const char *type, const char *name,
|
|||
/* try entryName[+LIB_SUFFIX] next */
|
||||
if(p==NULL) {
|
||||
# ifndef LIB_PREFIX
|
||||
icu_strcpy(basename, entryName);
|
||||
uprv_strcpy(basename, entryName);
|
||||
# else
|
||||
icu_strcpy(basename+LIB_PREFIX_LENGTH, entryName);
|
||||
uprv_strcpy(basename+LIB_PREFIX_LENGTH, entryName);
|
||||
# endif
|
||||
# ifdef UDATA_DLL
|
||||
icu_strcat(basename, LIB_SUFFIX);
|
||||
uprv_strcat(basename, LIB_SUFFIX);
|
||||
# endif
|
||||
|
||||
/* try path/basename first */
|
||||
|
@ -896,7 +896,7 @@ doOpenChoice(const char *path, const char *type, const char *name,
|
|||
# endif
|
||||
|
||||
/* allocate the data structure */
|
||||
pData=(UDataMemory *)icu_malloc(sizeof(UDataMemory));
|
||||
pData=(UDataMemory *)uprv_malloc(sizeof(UDataMemory));
|
||||
if(pData==NULL) {
|
||||
if(IS_LIBRARY(lib)) {
|
||||
UNLOAD_LIBRARY(lib);
|
||||
|
@ -909,7 +909,7 @@ doOpenChoice(const char *path, const char *type, const char *name,
|
|||
pData->lib=lib;
|
||||
# else
|
||||
/* defined(UDATA_MAP) && !IS_LIBRARY(lib) */
|
||||
icu_memset(pData, 0, sizeof(pData));
|
||||
uprv_memset(pData, 0, sizeof(pData));
|
||||
# endif
|
||||
|
||||
pData->p=p;
|
||||
|
|
|
@ -1,215 +1 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* *
|
||||
* COPYRIGHT: *
|
||||
* (C) Copyright International Business Machines Corporation, 1999 *
|
||||
* Licensed Material - Program-Property of IBM - All Rights Reserved. *
|
||||
* US Government Users Restricted Rights - Use, duplication, or disclosure *
|
||||
* restricted by GSA ADP Schedule Contract with IBM Corp. *
|
||||
* *
|
||||
*******************************************************************************
|
||||
* file name: udata.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999oct25
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UDATA_H__
|
||||
#define __UDATA_H__
|
||||
|
||||
#include "utypes.h"
|
||||
|
||||
/**
|
||||
* Information about data memory.
|
||||
* This structure may grow in the future, indicated by the
|
||||
* <code>size</code> field.
|
||||
*
|
||||
* <p>The platform data property fields help determine if a data
|
||||
* file can be efficiently used on a given machine.
|
||||
* The particular fields are of importance only if the data
|
||||
* is affected by the properties - if there is integer data
|
||||
* with word sizes > 1 byte, char* text, or UChar* text.</p>
|
||||
*
|
||||
* <p>The implementation for the <code>udata_open[Choice]()</code>
|
||||
* functions may reject data based on the value in <code>isBigEndian</code>.
|
||||
* No other field is used by the <code>udata</code> API implementation.</p>
|
||||
*
|
||||
* <p>The <code>dataFormat</code> may be used to identify
|
||||
* the kind of data, e.g. a converter table.</p>
|
||||
*
|
||||
* <p>The <code>formatVersion</code> field should be used to
|
||||
* make sure that the format can be interpreted.
|
||||
* I may be a good idea to check only for the one or two highest
|
||||
* of the version elements to allow the data memory to
|
||||
* get more or somewhat rearranged contents, for as long
|
||||
* as the using code can still interpret the older contents.</p>
|
||||
*
|
||||
* <p>The <code>dataVersion</code> field is intended to be a
|
||||
* common place to store the source version of the data;
|
||||
* for data from the Unicode character database, this could
|
||||
* reflect the Unicode version.</p>
|
||||
*/
|
||||
typedef struct {
|
||||
/** @memo sizeof(UDataInfo) */
|
||||
uint16_t size;
|
||||
|
||||
/** @memo unused, set to 0 */
|
||||
uint16_t reservedWord;
|
||||
|
||||
/* platform data properties */
|
||||
/** @memo 0 for little-endian machine, 1 for big-endian */
|
||||
uint8_t isBigEndian;
|
||||
|
||||
/** @memo see U_CHARSET_FAMILY values in utypes.h */
|
||||
uint8_t charsetFamily;
|
||||
|
||||
/** @memo sizeof(UChar), one of { 1, 2, 4 } */
|
||||
uint8_t sizeofUChar;
|
||||
|
||||
/** @memo unused, set to 0 */
|
||||
uint8_t reservedByte;
|
||||
|
||||
/** @memo data format identifier */
|
||||
uint8_t dataFormat[4];
|
||||
|
||||
/** @memo versions: [0] major [1] minor [2] milli [3] micro */
|
||||
uint8_t formatVersion[4];
|
||||
uint8_t dataVersion[4];
|
||||
} UDataInfo;
|
||||
|
||||
/* API for reading data -----------------------------------------------------*/
|
||||
|
||||
/**
|
||||
* Forward declaration of the data memory type.
|
||||
*/
|
||||
typedef struct UDataMemory UDataMemory;
|
||||
|
||||
/**
|
||||
* Callback function for udata_openChoice().
|
||||
* @param context parameter passed into <code>udata_openChoice()</code>.
|
||||
* @param type The type of the data as passed into <code>udata_openChoice()</code>.
|
||||
* It may be <code>NULL</code>.
|
||||
* @param name The name of the data as passed into <code>udata_openChoice()</code>.
|
||||
* @param pInfo A pointer to the <code>UDataInfo</code> structure
|
||||
* of data that has been loaded and will be returned
|
||||
* by <code>udata_openChoice()</code> if this function
|
||||
* returns <code>TRUE</code>.
|
||||
* @return TRUE if the current data memory is acceptable
|
||||
*/
|
||||
typedef bool_t
|
||||
UDataMemoryIsAcceptable(void *context,
|
||||
const char *type, const char *name,
|
||||
UDataInfo *pInfo);
|
||||
|
||||
|
||||
/**
|
||||
* Convenience function.
|
||||
* This function works the same as <code>udata_openChoice</code>
|
||||
* except that any data that matches the type and name
|
||||
* is assumed to be acceptable.
|
||||
*/
|
||||
U_CAPI UDataMemory * U_EXPORT2
|
||||
udata_open(const char *path, const char *type, const char *name,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Data loading function.
|
||||
* This function is used to find and load efficiently data for
|
||||
* ICU and applications using ICU.
|
||||
* It provides an abstract interface that allows to specify a data
|
||||
* type and name to find and load the data.
|
||||
*
|
||||
* <p>The implementation depends on platform properties and user preferences
|
||||
* and may involve loading shared libraries (DLLs), mapping
|
||||
* files into memory, or fopen()/fread() files.
|
||||
* It may also involve using static memory or database queries etc.
|
||||
* Several or all data items may be combined into one entity
|
||||
* (DLL, memory-mappable file).</p>
|
||||
*
|
||||
* <p>The data is always preceded by a header that includes
|
||||
* a <code>UDataInfo</code> structure.
|
||||
* The caller's <code>isAcceptable()</code> function is called to make
|
||||
* sure that the data is useful. It may be called several times if it
|
||||
* rejects the data and there is more than one location with data
|
||||
* matching the type and name.</p>
|
||||
*
|
||||
* <p>If <code>path==NULL</code>, then ICU data is loaded.
|
||||
* Otherwise, it is separated into a basename and a basename-less path string.
|
||||
* If the path string is empty, then <code>u_getDataDirectory()</code>
|
||||
* is set in its place.
|
||||
* When data is loaded from files or DLLs (shared libraries) and
|
||||
* may be stored in common files, then the data finding is roughly as follows:
|
||||
* <ul>
|
||||
* <li>common file at path/basename has entry name_type?</li>
|
||||
* <li>common file at basename has entry name_type?</li>
|
||||
* <li>separate file at path/basename_name_type?</li>
|
||||
* <li>separate file at basename_name_type?</li>
|
||||
* <li>separate file at path/name_type?</li>
|
||||
* <li>separate file at name_type?</li>
|
||||
* </ul>
|
||||
* If the basename is empty, then only the last two options are attempted.
|
||||
* Otherwise, it serves as a name for a common data file or as a basename
|
||||
* (collection name) prefix for individual files.</p>
|
||||
*
|
||||
* @param path Specifies an absolute path and/or a basename for the
|
||||
* finding of the data in the file system.
|
||||
* <code>NULL</code> for ICU data.
|
||||
* @param type A string that specifies the type of data to be loaded.
|
||||
* For example, resource bundles are loaded with type "res",
|
||||
* conversion tables with type "cnv".
|
||||
* This may be <code>NULL</code> or empty.
|
||||
* @param name A string that specifies the name of the data.
|
||||
* @param isAcceptable This function is called to verify that loaded data
|
||||
* is useful for the client code. If it returns FALSE
|
||||
* for all data items, then <code>udata_openChoice()</code>
|
||||
* will return with an error.
|
||||
* @param context Arbitrary parameter to be passed into isAcceptable.
|
||||
* @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
|
||||
* @return A pointer (handle) to a data memory object, or <code>NULL</code>
|
||||
* if an error occurs. Call <code>udata_getMemory()</code>
|
||||
* to get a pointer to the actual data.
|
||||
*/
|
||||
U_CAPI UDataMemory * U_EXPORT2
|
||||
udata_openChoice(const char *path, const char *type, const char *name,
|
||||
UDataMemoryIsAcceptable *isAcceptable, void *context,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Close the data memory.
|
||||
* This function must be called to allow the system to
|
||||
* release resources associated with this data memory.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
udata_close(UDataMemory *pData);
|
||||
|
||||
/**
|
||||
* Get the pointer to the actual data inside the data memory.
|
||||
* The data is read-only.
|
||||
*/
|
||||
U_CAPI const void * U_EXPORT2
|
||||
udata_getMemory(UDataMemory *pData);
|
||||
|
||||
/**
|
||||
* Get the information from the data memory header.
|
||||
* This allows to get access to the header containing
|
||||
* platform data properties etc. which is not part of
|
||||
* the data itself and can therefore not be accessed
|
||||
* via the pointer that <code>udata_getMemory()</code> returns.
|
||||
*
|
||||
* @param pData pointer to the data memory object
|
||||
* @param pInfo pointer to a UDataInfo object;
|
||||
* its <code>size</code> field must be set correctly,
|
||||
* typically to <code>sizeof(UDataInfo)</code>.
|
||||
*
|
||||
* <code>*pInfo</code> will be filled with the UDataInfo structure
|
||||
* in the data memory object. If this structure is smaller than
|
||||
* <code>pInfo->size</code>, then the <code>size</code> will be
|
||||
* adjusted and only part of the structure will be filled.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
udata_getInfo(UDataMemory *pData, UDataInfo *pInfo);
|
||||
|
||||
#endif
|
||||
#error Please include unicode/udata.h instead
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
*/
|
||||
|
||||
#include "uhash.h"
|
||||
#include "ustring.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "cstring.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
|
@ -90,7 +90,7 @@ uhash_openSize(UHashFunction func,
|
|||
|
||||
if(U_FAILURE(*status)) return NULL;
|
||||
|
||||
result = (UHashtable*) icu_malloc(sizeof(UHashtable));
|
||||
result = (UHashtable*) uprv_malloc(sizeof(UHashtable));
|
||||
if(result == 0) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return 0;
|
||||
|
@ -107,7 +107,7 @@ uhash_openSize(UHashFunction func,
|
|||
uhash_initialize(result, uhash_leastGreaterPrimeIndex(size), status);
|
||||
|
||||
if(U_FAILURE(*status)) {
|
||||
icu_free(result);
|
||||
uprv_free(result);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -135,9 +135,9 @@ uhash_close(UHashtable *hash)
|
|||
while (toBeDeletedCount--) my_free(toBeDeleted[toBeDeletedCount]);
|
||||
|
||||
}
|
||||
icu_free(hash->values);
|
||||
icu_free(hash->hashes);
|
||||
icu_free(hash->toBeDeleted);
|
||||
uprv_free(hash->values);
|
||||
uprv_free(hash->hashes);
|
||||
uprv_free(hash->toBeDeleted);
|
||||
}
|
||||
|
||||
U_CAPI int32_t
|
||||
|
@ -182,7 +182,7 @@ uhash_putKey(UHashtable *hash,
|
|||
void * result = hash->values[index];
|
||||
if (result != value) /*Make sure the same object isn't scheduled for a double deletion*/
|
||||
{
|
||||
hash->toBeDeleted = (void**) icu_realloc(hash->toBeDeleted, sizeof(void*)*(++(hash->toBeDeletedCount)));
|
||||
hash->toBeDeleted = (void**) uprv_realloc(hash->toBeDeleted, sizeof(void*)*(++(hash->toBeDeletedCount)));
|
||||
hash->toBeDeleted[(hash->toBeDeletedCount)-1] = result;
|
||||
}
|
||||
hash->values[index] = 0;
|
||||
|
@ -232,7 +232,7 @@ uhash_put(UHashtable *hash,
|
|||
void* result = hash->values[index];
|
||||
if (result != value) /*Make sure the same object isn't scheduled for a double deletion*/
|
||||
{
|
||||
hash->toBeDeleted = (void**) icu_realloc(hash->toBeDeleted,
|
||||
hash->toBeDeleted = (void**) uprv_realloc(hash->toBeDeleted,
|
||||
sizeof(void*)*(++(hash->toBeDeletedCount)));
|
||||
hash->toBeDeleted[(hash->toBeDeletedCount)-1] = result;
|
||||
}
|
||||
|
@ -344,16 +344,16 @@ uhash_initialize(UHashtable *hash,
|
|||
hash->primeIndex = primeIndex;
|
||||
hash->length = UHASH_PRIMES[primeIndex];
|
||||
|
||||
hash->values = (void**) icu_malloc(sizeof(void*) * hash->length);
|
||||
hash->values = (void**) uprv_malloc(sizeof(void*) * hash->length);
|
||||
if(hash->values == 0) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
|
||||
hash->hashes = (int32_t*) icu_malloc(sizeof(int32_t) * hash->length);
|
||||
hash->hashes = (int32_t*) uprv_malloc(sizeof(int32_t) * hash->length);
|
||||
if(hash->values == 0) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
icu_free(hash->values);
|
||||
uprv_free(hash->values);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -413,8 +413,8 @@ uhash_rehash(UHashtable *hash,
|
|||
}
|
||||
}
|
||||
|
||||
icu_free(oldValues);
|
||||
icu_free(oldHashList);
|
||||
uprv_free(oldValues);
|
||||
uprv_free(oldHashList);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -536,7 +536,7 @@ uhash_hashString(const void *parm)
|
|||
{
|
||||
if(parm != NULL) {
|
||||
const char *key = (const char*) parm;
|
||||
int32_t len = icu_strlen(key);
|
||||
int32_t len = uprv_strlen(key);
|
||||
int32_t hash = UHASH_INVALID;
|
||||
const char *limit = key + len;
|
||||
int32_t inc = (len >= 128 ? len/64 : 1);
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
#ifndef UHASH_H
|
||||
#define UHASH_H
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/*
|
||||
* Hashtable stores key-value pairs and does efficient lookup based on keys.
|
||||
|
|
|
@ -19,14 +19,14 @@
|
|||
******************************************************************************/
|
||||
|
||||
|
||||
#include "uloc.h"
|
||||
#include "unicode/uloc.h"
|
||||
|
||||
#include "utypes.h"
|
||||
#include "ures.h"
|
||||
#include "uchar.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "umutex.h"
|
||||
#include "cstring.h"
|
||||
#include "ustring.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "cmemory.h"
|
||||
|
||||
/****************************************************************************
|
||||
|
@ -188,11 +188,11 @@ int16_t _findIndex(const char* list, int32_t listLength, const char* key)
|
|||
const char* listEnd = anchor + listLength;
|
||||
bool_t found = FALSE;
|
||||
int index = 0;
|
||||
int tokenSize = icu_strlen(list)+1; /*gets the size of the tokens*/
|
||||
int tokenSize = uprv_strlen(list)+1; /*gets the size of the tokens*/
|
||||
|
||||
while (!found && list<listEnd)
|
||||
{
|
||||
if (icu_strcmp(key, list) == 0)
|
||||
if (uprv_strcmp(key, list) == 0)
|
||||
{
|
||||
found = TRUE;
|
||||
break;
|
||||
|
@ -227,16 +227,16 @@ void uloc_setDefault(const char* newDefaultLocale,
|
|||
|
||||
if (newDefaultLocale == NULL)
|
||||
{
|
||||
newDefaultLocale = icu_getDefaultLocaleID();
|
||||
newDefaultLocale = uprv_getDefaultLocaleID();
|
||||
}
|
||||
|
||||
umtx_lock(NULL);
|
||||
if(_defaultLocale == NULL)
|
||||
_defaultLocale = (char*)icu_malloc(sizeof(char) * (icu_strlen(newDefaultLocale) + 1));
|
||||
_defaultLocale = (char*)uprv_malloc(sizeof(char) * (uprv_strlen(newDefaultLocale) + 1));
|
||||
else
|
||||
_defaultLocale = (char*)icu_realloc(_defaultLocale,
|
||||
sizeof(char) * (icu_strlen(newDefaultLocale) + 1));
|
||||
icu_strcpy(_defaultLocale, newDefaultLocale);
|
||||
_defaultLocale = (char*)uprv_realloc(_defaultLocale,
|
||||
sizeof(char) * (uprv_strlen(newDefaultLocale) + 1));
|
||||
uprv_strcpy(_defaultLocale, newDefaultLocale);
|
||||
umtx_unlock(NULL);
|
||||
|
||||
/* propagate change to C++ */
|
||||
|
@ -281,7 +281,7 @@ int32_t uloc_getParent(const char* localeID,
|
|||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
|
||||
if (parentCapacity>0) parent[icu_min(i,parentCapacity-1)] = '\0';
|
||||
if (parentCapacity>0) parent[uprv_min(i,parentCapacity-1)] = '\0';
|
||||
|
||||
|
||||
return i+1;
|
||||
|
@ -316,7 +316,8 @@ uloc_getLanguage(const char* localeID,
|
|||
|
||||
if (languageCapacity > 0)
|
||||
{
|
||||
language[icu_min(i,languageCapacity-1)] = '\0';
|
||||
language[uprv_min(i,languageCapacity-1)] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
return i+1;
|
||||
|
@ -353,7 +354,7 @@ int32_t uloc_getCountry(const char* localeID,
|
|||
*err = U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
|
||||
if (countryCapacity > 0) {country[icu_min(i,countryCapacity-1)] = '\0';}
|
||||
if (countryCapacity > 0) {country[uprv_min(i,countryCapacity-1)] = '\0';}
|
||||
return i+1;
|
||||
}
|
||||
|
||||
|
@ -390,7 +391,7 @@ int32_t uloc_getVariant(const char* localeID,
|
|||
}
|
||||
|
||||
|
||||
if (variantCapacity>0) {variant[icu_min(i,variantCapacity-1)] = '\0';}
|
||||
if (variantCapacity>0) {variant[uprv_min(i,variantCapacity-1)] = '\0';}
|
||||
return i+1;
|
||||
}
|
||||
|
||||
|
@ -441,20 +442,20 @@ int32_t uloc_getName(const char* localeID,
|
|||
/*We fill in the users buffer*/
|
||||
if ((nameCapacity>0) && cntSze)
|
||||
{
|
||||
if (U_SUCCESS(int_err)) icu_strcat(name, "_");
|
||||
if (U_SUCCESS(int_err)) uprv_strcat(name, "_");
|
||||
|
||||
uloc_getCountry(localeID,
|
||||
name + icu_strlen(name),
|
||||
nameCapacity - icu_strlen(name),
|
||||
name + uprv_strlen(name),
|
||||
nameCapacity - uprv_strlen(name),
|
||||
&int_err);
|
||||
|
||||
if (varSze)
|
||||
{
|
||||
if (U_SUCCESS(int_err)) icu_strcat(name, "_");
|
||||
if (U_SUCCESS(int_err)) uprv_strcat(name, "_");
|
||||
|
||||
uloc_getVariant(localeID,
|
||||
name + icu_strlen(name),
|
||||
nameCapacity - icu_strlen(name),
|
||||
name + uprv_strlen(name),
|
||||
nameCapacity - uprv_strlen(name),
|
||||
&int_err);
|
||||
}
|
||||
|
||||
|
@ -542,7 +543,7 @@ int32_t uloc_getDisplayLanguage(const char* locale,
|
|||
inLocale = uloc_getDefault();
|
||||
isDefaultLocale = TRUE;
|
||||
}
|
||||
else if (icu_strcmp(inLocale, uloc_getDefault()) == 0) isDefaultLocale = TRUE;
|
||||
else if (uprv_strcmp(inLocale, uloc_getDefault()) == 0) isDefaultLocale = TRUE;
|
||||
/*truncates the fallback mechanism if we start out with a defaultLocale*/
|
||||
|
||||
if (locale == NULL) locale = uloc_getDefault();
|
||||
|
@ -669,7 +670,7 @@ int32_t uloc_getDisplayCountry(const char* locale,
|
|||
inLocale = uloc_getDefault();
|
||||
isDefaultLocale = TRUE;
|
||||
}
|
||||
else if (icu_strcmp(inLocale, uloc_getDefault()) == 0) isDefaultLocale = TRUE;
|
||||
else if (uprv_strcmp(inLocale, uloc_getDefault()) == 0) isDefaultLocale = TRUE;
|
||||
/*truncates the fallback mechanism if we start out with a defaultLocale*/
|
||||
|
||||
if (locale == NULL) locale = uloc_getDefault();
|
||||
|
@ -793,7 +794,7 @@ int32_t uloc_getDisplayVariant(const char* locale,
|
|||
inLocale = uloc_getDefault();
|
||||
isDefaultLocale = TRUE;
|
||||
}
|
||||
else if (icu_strcmp(inLocale, uloc_getDefault()) == 0) isDefaultLocale = TRUE;
|
||||
else if (uprv_strcmp(inLocale, uloc_getDefault()) == 0) isDefaultLocale = TRUE;
|
||||
/*truncates the fallback mechanism if we start out with a defaultLocale*/
|
||||
|
||||
if (locale == NULL) locale = uloc_getDefault();
|
||||
|
@ -806,20 +807,20 @@ int32_t uloc_getDisplayVariant(const char* locale,
|
|||
/*In case the variant is longer than our stack buffers*/
|
||||
if (err == U_BUFFER_OVERFLOW_ERROR)
|
||||
{
|
||||
inVariant = (char*)icu_malloc(varBufSize*sizeof(char)+1);
|
||||
inVariant = (char*)uprv_malloc(varBufSize*sizeof(char)+1);
|
||||
if (inVariant == NULL) goto NO_MEMORY;
|
||||
inVariantTag = (char*)icu_malloc(varBufSize*sizeof(char)+icu_strlen("%%")+1);
|
||||
inVariantTag = (char*)uprv_malloc(varBufSize*sizeof(char)+uprv_strlen("%%")+1);
|
||||
if (inVariantTag == NULL)
|
||||
{
|
||||
icu_free(inVariant);
|
||||
uprv_free(inVariant);
|
||||
goto NO_MEMORY;
|
||||
}
|
||||
err = U_ZERO_ERROR;
|
||||
uloc_getVariant(locale, inVariant, varBufSize, &err);
|
||||
}
|
||||
|
||||
icu_strcpy(inVariantTag,"%%");
|
||||
icu_strcat(inVariantTag, inVariant);
|
||||
uprv_strcpy(inVariantTag,"%%");
|
||||
uprv_strcat(inVariantTag, inVariant);
|
||||
|
||||
/*We need to implement a fallback mechanism here because we are getting keys out of a
|
||||
tagged array, there is no capability of doing this with fallback through the resource
|
||||
|
@ -902,8 +903,8 @@ int32_t uloc_getDisplayVariant(const char* locale,
|
|||
/*Clean up memory*/
|
||||
if (inVariant != inVariantBuffer)
|
||||
{
|
||||
icu_free(inVariant);
|
||||
icu_free(inVariantTag);
|
||||
uprv_free(inVariant);
|
||||
uprv_free(inVariantTag);
|
||||
}
|
||||
return i;
|
||||
|
||||
|
@ -1047,13 +1048,13 @@ void _lazyEvaluate_installedLocales()
|
|||
{
|
||||
temp = T_ResourceBundle_listInstalledLocales(u_getDataDirectory(),
|
||||
&_installedLocalesCount);
|
||||
temp2 = (char **) icu_malloc(sizeof(char*) * (_installedLocalesCount+1));
|
||||
temp2 = (char **) uprv_malloc(sizeof(char*) * (_installedLocalesCount+1));
|
||||
|
||||
for (i = 0; i < _installedLocalesCount; i++)
|
||||
{
|
||||
strSize = u_strlen(T_UnicodeString_getUChars(temp[i]));
|
||||
|
||||
temp2[i] = (char*) icu_malloc(sizeof(char) *
|
||||
temp2[i] = (char*) uprv_malloc(sizeof(char) *
|
||||
(strSize + 1));
|
||||
|
||||
T_UnicodeString_extract(temp[i], temp2[i]);
|
||||
|
@ -1067,8 +1068,8 @@ void _lazyEvaluate_installedLocales()
|
|||
temp2 = NULL;
|
||||
}
|
||||
else {
|
||||
for (i = 0; i < _installedLocalesCount; i++) icu_free(temp2[i]);
|
||||
icu_free(temp2);
|
||||
for (i = 0; i < _installedLocalesCount; i++) uprv_free(temp2[i]);
|
||||
uprv_free(temp2);
|
||||
}
|
||||
umtx_unlock(NULL);
|
||||
|
||||
|
@ -1095,7 +1096,7 @@ const char* const* uloc_getISOLanguages()
|
|||
|
||||
if (_isoLanguages == NULL)
|
||||
{
|
||||
_isoLanguages = (char**) icu_malloc(sizeof(char*)*(1+(sizeof(_languages) / 3)));
|
||||
_isoLanguages = (char**) uprv_malloc(sizeof(char*)*(1+(sizeof(_languages) / 3)));
|
||||
|
||||
end = _languages + (sizeof(_languages));
|
||||
from = _languages;
|
||||
|
@ -1132,7 +1133,7 @@ const char* const* uloc_getISOCountries()
|
|||
|
||||
if (_isoCountries == NULL)
|
||||
{
|
||||
_isoCountries = (char**) icu_malloc(sizeof(char*)*(1+(sizeof(_countries) / 3)));
|
||||
_isoCountries = (char**) uprv_malloc(sizeof(char*)*(1+(sizeof(_countries) / 3)));
|
||||
|
||||
end = _countries + (sizeof(_countries));
|
||||
from = _countries;
|
||||
|
|
|
@ -1,469 +1 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1997-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
* File ULOC.H
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 04/01/97 aliu Creation.
|
||||
* 08/22/98 stephen JDK 1.2 sync.
|
||||
* 12/08/98 rtg New C API for Locale
|
||||
* 03/30/99 damiba overhaul
|
||||
* 03/31/99 helena Javadoc for uloc functions.
|
||||
* 04/15/99 Madhu Updated Javadoc
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef ULOC_H
|
||||
#define ULOC_H
|
||||
|
||||
#include "utypes.h"
|
||||
|
||||
/**
|
||||
*
|
||||
* A <code>Locale</code> represents a specific geographical, political,
|
||||
* or cultural region. An operation that requires a <code>Locale</code> to perform
|
||||
* its task is called <em>locale-sensitive</em> and uses the <code>Locale</code>
|
||||
* to tailor information for the user. For example, displaying a number
|
||||
* is a locale-sensitive operation--the number should be formatted
|
||||
* according to the customs/conventions of the user's native country,
|
||||
* region, or culture. In the C APIs, a locales is simply a const char string.
|
||||
*
|
||||
* <P>
|
||||
* You create a <code>Locale</code> with one of the three options listed below.
|
||||
* Each of the component is separated by '_' in the locale string.
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* . newLanguage
|
||||
* .
|
||||
* . newLanguage + newCountry
|
||||
* .
|
||||
* . newLanguage + newCountry + newVariant
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
* The first option is a valid <STRONG>ISO
|
||||
* Language Code.</STRONG> These codes are the lower-case two-letter
|
||||
* codes as defined by ISO-639.
|
||||
* You can find a full list of these codes at a number of sites, such as:
|
||||
* <BR><a href ="http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt">
|
||||
* <code>http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt</code></a>
|
||||
*
|
||||
* <P>
|
||||
* The second option includes an additonal <STRONG>ISO Country
|
||||
* Code.</STRONG> These codes are the upper-case two-letter codes
|
||||
* as defined by ISO-3166.
|
||||
* You can find a full list of these codes at a number of sites, such as:
|
||||
* <BR><a href="http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html">
|
||||
* <code>http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html</code></a>
|
||||
*
|
||||
* <P>
|
||||
* The third option requires another additonal information--the
|
||||
* <STRONG>Variant.</STRONG>
|
||||
* The Variant codes are vendor and browser-specific.
|
||||
* For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX.
|
||||
* Where there are two variants, separate them with an underscore, and
|
||||
* put the most important one first. For
|
||||
* example, a Traditional Spanish collation might be referenced, with
|
||||
* "ES", "ES", "Traditional_WIN".
|
||||
*
|
||||
* <P>
|
||||
* Because a <code>Locale</code> is just an identifier for a region,
|
||||
* no validity check is performed when you specify a <code>Locale</code>.
|
||||
* If you want to see whether particular resources are available for the
|
||||
* <code>Locale</code> you asked for, you must query those resources. For
|
||||
* example, ask the <code>UNumberFormat</code> for the locales it supports
|
||||
* using its <code>getAvailable</code> method.
|
||||
* <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular
|
||||
* locale, you get back the best available match, not necessarily
|
||||
* precisely what you asked for. For more information, look at
|
||||
* <a href="ures.html"><code>UResourceBundle</code></a>.
|
||||
*
|
||||
* <P>
|
||||
* The <code>Locale</code> provides a number of convenient constants
|
||||
* that you can use to specify the commonly used
|
||||
* locales. For example, the following refers to a locale
|
||||
* for the United States:
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* . ULOC_US
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
*
|
||||
* <P>
|
||||
* Once you've specified a locale you can query it for information about
|
||||
* itself. Use <code>uloc_getCountry</code> to get the ISO Country Code and
|
||||
* <code>uloc_getLanguage</code> to get the ISO Language Code. You can
|
||||
* use <code>uloc_getDisplayCountry</code> to get the
|
||||
* name of the country suitable for displaying to the user. Similarly,
|
||||
* you can use <code>uloc_getDisplayLanguage</code> to get the name of
|
||||
* the language suitable for displaying to the user. Interestingly,
|
||||
* the <code>uloc_getDisplayXXX</code> methods are themselves locale-sensitive
|
||||
* and have two versions: one that uses the default locale and one
|
||||
* that takes a locale as an argument and displays the name or country in
|
||||
* a language appropriate to that locale.
|
||||
*
|
||||
* <P>
|
||||
* The ICU provides a number of services that perform locale-sensitive
|
||||
* operations. For example, the <code>unum_xxx</code> functions format
|
||||
* numbers, currency, or percentages in a locale-sensitive manner.
|
||||
* </P>
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* . UErrorCode success = U_ZERO_ERROR;
|
||||
* . UNumberFormat *nf;
|
||||
* . const char* myLocale = "fr_FR";
|
||||
* .
|
||||
* . nf = unum_open( UNUM_DEFAULT, NULL, success );
|
||||
* . unum_close(nf);
|
||||
* . nf = unum_open( UNUM_CURRENCY, NULL, success );
|
||||
* . unum_close(nf);
|
||||
* . nf = unum_open( UNUM_PERCENT, NULL, success );
|
||||
* . unum_close(nf);
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
* Each of these methods has two variants; one with an explicit locale
|
||||
* and one without; the latter using the default locale.
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* .
|
||||
* . nf = unum_open( UNUM_DEFAULT, myLocale, success );
|
||||
* . unum_close(nf);
|
||||
* . nf = unum_open( UNUM_CURRENCY, myLocale, success );
|
||||
* . unum_close(nf);
|
||||
* . nf = unum_open( UNUM_PERCENT, myLocale, success );
|
||||
* . unum_close(nf);
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
* A <code>Locale</code> is the mechanism for identifying the kind of services
|
||||
* (<code>UNumberFormat</code>) that you would like to get. The locale is
|
||||
* <STRONG>just</STRONG> a mechanism for identifying these services.
|
||||
*
|
||||
* <P>
|
||||
* Each international serivce that performs locale-sensitive operations
|
||||
* allows you
|
||||
* to get all the available objects of that type. You can sift
|
||||
* through these objects by language, country, or variant,
|
||||
* and use the display names to present a menu to the user.
|
||||
* For example, you can create a menu of all the collation objects
|
||||
* suitable for a given language. Such classes implement these
|
||||
* three class methods:
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* . const char* uloc_getAvailable(int32_t index);
|
||||
* . int32_t uloc_countAvailable();
|
||||
* . int32_t
|
||||
* . uloc_getDisplayName(const char* localeID,
|
||||
* . const char* inLocaleID,
|
||||
* . UChar* result,
|
||||
* . int32_t maxResultSize,
|
||||
* . UErrorCode* err);
|
||||
* .
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* Useful constants for language.
|
||||
*/
|
||||
#define ULOC_ENGLISH "en"
|
||||
#define ULOC_FRENCH "fr"
|
||||
#define ULOC_GERMAN "de"
|
||||
#define ULOC_ITALIAN "it"
|
||||
#define ULOC_JAPANESE "ja"
|
||||
#define ULOC_KOREAN "ko"
|
||||
#define ULOC_CHINESE "zh"
|
||||
#define ULOC_SIMPLIFIED_CHINESE "zh_CN"
|
||||
#define ULOC_TRADITIONAL_CHINESE "zh_TW"
|
||||
|
||||
/*
|
||||
*
|
||||
* Useful constants for country.
|
||||
*/
|
||||
#define ULOC_FRANCE "fr_FR"
|
||||
#define ULOC_GERMANY "de_DE"
|
||||
#define ULOC_ITALY "it_IT"
|
||||
#define ULOC_JAPAN "ja_JP"
|
||||
#define ULOC_KOREA "ko_KR"
|
||||
#define ULOC_CHINA "zh_CN"
|
||||
#define ULOC_PRC "zh_CN"
|
||||
#define ULOC_TAIWAN "zh_TW"
|
||||
#define ULOC_UK "en_GB"
|
||||
#define ULOC_US "en_US"
|
||||
#define ULOC_CANADA "en_CA"
|
||||
#define ULOC_CANADA_FRENCH "fr_CA"
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Gets the system's default locale.
|
||||
*
|
||||
* @return the system default locale
|
||||
*/
|
||||
|
||||
U_CAPI const char* U_EXPORT2
|
||||
uloc_getDefault(void);
|
||||
|
||||
/**
|
||||
* Sets the system's default locale.
|
||||
*
|
||||
* @param localeID the new system default locale
|
||||
* @param status the error information if the setting of default locale fails
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uloc_setDefault(const char* localeID,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Gets the language code for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the ISO langauge code with
|
||||
* @param language the langauge code for localeID
|
||||
* @param languageCapacity the size of the language buffer to store the
|
||||
* language code with
|
||||
* @param err error information if retrieving the language code failed
|
||||
* @return the actual buffer size needed for the langauge code. If it's greater
|
||||
* than languageCapacity, the returned language code will be truncated.
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getLanguage(const char* localeID,
|
||||
char* language,
|
||||
int32_t languageCapacity,
|
||||
UErrorCode* err);
|
||||
|
||||
/**
|
||||
* Gets the country code for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the country code with
|
||||
* @param country the country code for localeID
|
||||
* @param languageCapacity the size of the coutry buffer to store the
|
||||
* country code with
|
||||
* @param err error information if retrieving the country code failed
|
||||
* @return the actual buffer size needed for the country code. If it's greater
|
||||
* than countryCapacity, the returned country code will be truncated.
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getCountry(const char* localeID,
|
||||
char* country,
|
||||
int32_t countryCapacity,
|
||||
UErrorCode* err);
|
||||
|
||||
/**
|
||||
* Gets the variant code for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the variant code with
|
||||
* @param variant the variant code for localeID
|
||||
* @param variantCapacity the size of the variant buffer to store the
|
||||
* variant code with
|
||||
* @param err error information if retrieving the variant code failed
|
||||
* @return the actual buffer size needed for the variant code. If it's greater
|
||||
* than variantCapacity, the returned variant code will be truncated.
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getVariant(const char* localeID,
|
||||
char* variant,
|
||||
int32_t variantCapacity,
|
||||
UErrorCode* err);
|
||||
/**
|
||||
* Gets the full name for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the full name with
|
||||
* @param name the full name for localeID
|
||||
* @param nameCapacity the size of the name buffer to store the
|
||||
* full name with
|
||||
* @param err error information if retrieving the full name failed
|
||||
* @return the actual buffer size needed for the full name. If it's greater
|
||||
* than nameCapacity, the returned full name will be truncated.
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getName(const char* localeID,
|
||||
char* name,
|
||||
int32_t nameCapacity,
|
||||
UErrorCode* err);
|
||||
|
||||
/**
|
||||
* Gets the ISO language code for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the ISO langauge code with
|
||||
* @return language the ISO langauge code for localeID
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
uloc_getISO3Language(const char* localeID);
|
||||
|
||||
|
||||
/**
|
||||
* Gets the ISO country code for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the ISO country code with
|
||||
* @return country the ISO country code for localeID
|
||||
*/
|
||||
|
||||
U_CAPI const char* U_EXPORT2
|
||||
uloc_getISO3Country(const char* localeID);
|
||||
|
||||
/**
|
||||
* Gets the Win32 LCID value for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the Win32 LCID value with
|
||||
* @return country the Win32 LCID for localeID
|
||||
*/
|
||||
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
uloc_getLCID(const char* localeID);
|
||||
|
||||
/**
|
||||
* Gets the language name suitable for display for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the ISO langauge code with
|
||||
* @param language the displayable langauge code for localeID
|
||||
* @param languageCapacity the size of the language buffer to store the
|
||||
* displayable language code with
|
||||
* @param err error information if retrieving the displayable language code failed
|
||||
* @return the actual buffer size needed for the displayable langauge code. If it's greater
|
||||
* than languageCapacity, the returned language code will be truncated.
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getDisplayLanguage(const char* locale,
|
||||
const char* inLocale,
|
||||
UChar* language,
|
||||
int32_t languageCapacity,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Gets the country name suitable for display for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the displayable country code with
|
||||
* @param country the displayable country code for localeID
|
||||
* @param languageCapacity the size of the coutry buffer to store the
|
||||
* displayable country code with
|
||||
* @param err error information if retrieving the displayable country code failed
|
||||
* @return the actual buffer size needed for the displayable country code. If it's greater
|
||||
* than countryCapacity, the returned displayable country code will be truncated.
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getDisplayCountry(const char* locale,
|
||||
const char* inLocale,
|
||||
UChar* country,
|
||||
int32_t countryCapacity,
|
||||
UErrorCode* status); /* NULL may be used to specify the default */
|
||||
|
||||
|
||||
/**
|
||||
* Gets the variant code suitable for display for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the displayable variant code with
|
||||
* @param variant the displayable variant code for localeID
|
||||
* @param variantCapacity the size of the variant buffer to store the
|
||||
* displayable variant code with
|
||||
* @param err error information if retrieving the displayable variant code failed
|
||||
* @return the actual buffer size needed for the displayable variant code. If it's greater
|
||||
* than variantCapacity, the returned displayable variant code will be truncated.
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getDisplayVariant(const char* locale,
|
||||
const char* inLocale,
|
||||
UChar* variant,
|
||||
int32_t variantCapacity,
|
||||
UErrorCode* status); /* NULL may be used to specify the default */
|
||||
|
||||
/**
|
||||
* Gets the full name suitable for display for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the displayable name with
|
||||
* @param variant the displayable name for localeID
|
||||
* @param variantCapacity the size of the name buffer to store the
|
||||
* displayable full name with
|
||||
* @param err error information if retrieving the displayable name failed
|
||||
* @return the actual buffer size needed for the displayable name. If it's greater
|
||||
* than variantCapacity, the returned displayable name will be truncated.
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getDisplayName(const char* localeID,
|
||||
const char* inLocaleID, /* NULL may be used to specify the default */
|
||||
UChar* result,
|
||||
int32_t maxResultSize,
|
||||
UErrorCode* err);
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* Gets the specified locale from a list of all available locales.
|
||||
* The return value is a pointer to an item of
|
||||
* a locale name array. Both this array and the pointers
|
||||
* it contains are owned by ICU and should not be deleted or written through
|
||||
* by the caller. The locale name is terminated by a null pointer.
|
||||
* @param index the specific locale name index of the available locale list
|
||||
* @return a specified locale name of all available locales
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
uloc_getAvailable(int32_t index);
|
||||
|
||||
/**
|
||||
* Gets the size of the all available locale list.
|
||||
*
|
||||
* @return the size of the locale list
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2 uloc_countAvailable(void);
|
||||
|
||||
/**
|
||||
*
|
||||
* Gets a list of all available language codes defined in ISO 639. This is a pointer
|
||||
* to an array of pointers to arrays of char. All of these pointers are owned
|
||||
* by ICU-- do not delete them, and do not write through them. The array is
|
||||
* terminated with a null pointer.
|
||||
* @return a list of all available language codes
|
||||
*/
|
||||
U_CAPI const char* const* U_EXPORT2
|
||||
uloc_getISOLanguages(void);
|
||||
|
||||
/**
|
||||
*
|
||||
* Gets a list of all available 2-letter country codes defined in ISO 639. This is a
|
||||
* pointer to an array of pointers to arrays of char. All of these pointers are
|
||||
* owned by ICU-- do not delete them, and do not write through them. The array is
|
||||
* terminated with a null pointer.
|
||||
* @return a list of all available country codes
|
||||
*/
|
||||
U_CAPI const char* const* U_EXPORT2
|
||||
uloc_getISOCountries(void);
|
||||
|
||||
/**
|
||||
* Deprecated 1999dec14 - Gets the directory containing the locale data files.
|
||||
*
|
||||
* @return the locale data file directory
|
||||
*/
|
||||
#define uloc_getDataDirectory u_getDataDirectory
|
||||
|
||||
/**
|
||||
* Deprecated 1999dec14 - Sets the directory containing the locale data files.
|
||||
*
|
||||
* @return the new directory to fetch locale data from
|
||||
*/
|
||||
#define uloc_setDataDirectory u_setDataDirectory
|
||||
|
||||
/*Internal function */
|
||||
int32_t U_EXPORT2
|
||||
uloc_getParent(const char* localeID,
|
||||
char* parent,
|
||||
int32_t parentCapacity,
|
||||
UErrorCode* err);
|
||||
|
||||
/*eof*/
|
||||
|
||||
|
||||
#endif /*_ULOC*/
|
||||
|
||||
|
||||
|
||||
#error Please include unicode/uloc.h instead
|
||||
|
|
|
@ -1,33 +1 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: umisc.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999oct15
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef UMISC_H
|
||||
#define UMISC_H
|
||||
|
||||
#include "utypes.h"
|
||||
|
||||
/* This file contains miscellaneous definitions for the C APIs. */
|
||||
|
||||
/** A struct representing a range of text containing a specific field */
|
||||
struct UFieldPosition {
|
||||
/** The field */
|
||||
int32_t field;
|
||||
/** The start of the text range containing field */
|
||||
int32_t beginIndex;
|
||||
/** The limit of the text range containing field */
|
||||
int32_t endIndex;
|
||||
};
|
||||
typedef struct UFieldPosition UFieldPosition;
|
||||
|
||||
#endif
|
||||
#error Please include unicode/umisc.h instead
|
||||
|
|
|
@ -37,7 +37,7 @@
|
|||
|
||||
|
||||
/* Check our settings... */
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/* APP_NO_THREADS is an old symbol. We'll honour it if present. */
|
||||
#ifdef APP_NO_THREADS
|
||||
|
@ -154,12 +154,12 @@ if( mutex == NULL ) /* initialize the global mutex */
|
|||
return;
|
||||
|
||||
#if defined( _WIN32 )
|
||||
*mutex = icu_malloc(sizeof(CRITICAL_SECTION));
|
||||
*mutex = uprv_malloc(sizeof(CRITICAL_SECTION));
|
||||
InitializeCriticalSection((CRITICAL_SECTION*)*mutex);
|
||||
|
||||
#elif defined( POSIX )
|
||||
|
||||
*mutex = icu_malloc(sizeof(pthread_mutex_t));
|
||||
*mutex = uprv_malloc(sizeof(pthread_mutex_t));
|
||||
|
||||
#if defined(HPUX)
|
||||
pthread_mutex_init((pthread_mutex_t*)*mutex, pthread_mutexattr_default);
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#ifndef UMUTEX_H
|
||||
#define UMUTEX_H
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#ifndef XP_CPLUSPLUS
|
||||
typedef void * Mutex;
|
||||
|
|
|
@ -22,11 +22,11 @@
|
|||
# define U_COMMON_IMPLEMENTATION
|
||||
#endif
|
||||
|
||||
#include "utypes.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "umutex.h"
|
||||
#include "cmemory.h"
|
||||
#include "uchar.h"
|
||||
#include "udata.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/udata.h"
|
||||
|
||||
/* prototypes --------------------------------------------------------------- */
|
||||
|
||||
|
|
|
@ -33,9 +33,9 @@
|
|||
// 11/22/99 aliu Added MIN_RADIX, MAX_RADIX, digit, forDigit
|
||||
//********************************************************************************************
|
||||
|
||||
#include "unicode.h"
|
||||
#include "unicode/unicode.h"
|
||||
|
||||
#include "uchar.h"
|
||||
#include "unicode/uchar.h"
|
||||
|
||||
|
||||
const UChar Unicode::MIN_VALUE = 0x0000;
|
||||
|
|
|
@ -1,779 +1 @@
|
|||
/*
|
||||
*****************************************************************************************
|
||||
* Copyright (C) 1996-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*****************************************************************************************
|
||||
*/
|
||||
// FILE NAME : unicode.h
|
||||
//
|
||||
// CREATED
|
||||
// Wednesday, December 11, 1996
|
||||
//
|
||||
// CREATED BY
|
||||
// Helena Shih
|
||||
//
|
||||
// CHANGES
|
||||
// Thursday, April 15, 1999
|
||||
// Modified the definitions of all the functions
|
||||
// C++ Wrappers for Unicode
|
||||
// CHANGES BY
|
||||
// Madhu Katragadda
|
||||
// 5/20/99 Madhu Added the function getVersion()
|
||||
// 11/22/99 aliu Added MIN_RADIX, MAX_RADIX, digit, forDigit
|
||||
//********************************************************************************************
|
||||
|
||||
|
||||
|
||||
#ifndef UNICODE_H
|
||||
#define UNICODE_H
|
||||
|
||||
#include "utypes.h"
|
||||
#include "uchar.h"
|
||||
|
||||
/**
|
||||
* The Unicode class allows you to query the properties associated with individual
|
||||
* Unicode character values.
|
||||
* <p>
|
||||
* The Unicode character information, provided implicitly by the
|
||||
* Unicode character encoding standard, includes information about the sript
|
||||
* (for example, symbols or control characters) to which the character belongs,
|
||||
* as well as semantic information such as whether a character is a digit or
|
||||
* uppercase, lowercase, or uncased.
|
||||
* <P>
|
||||
* @subclassing Do not subclass.
|
||||
*/
|
||||
class U_COMMON_API Unicode
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* The minimum value a UChar can have. The lowest value a
|
||||
* UChar can have is 0x0000.
|
||||
*/
|
||||
static const UChar MIN_VALUE;
|
||||
|
||||
/**
|
||||
* The maximum value a UChar can have. The greatest value a
|
||||
* UChar can have is 0xffff.
|
||||
*/
|
||||
static const UChar MAX_VALUE;
|
||||
|
||||
/**
|
||||
* Public data for enumerated Unicode general category types
|
||||
*/
|
||||
|
||||
enum EUnicodeGeneralTypes
|
||||
{
|
||||
UNASSIGNED = 0,
|
||||
UPPERCASE_LETTER = 1,
|
||||
LOWERCASE_LETTER = 2,
|
||||
TITLECASE_LETTER = 3,
|
||||
MODIFIER_LETTER = 4,
|
||||
OTHER_LETTER = 5,
|
||||
NON_SPACING_MARK = 6,
|
||||
ENCLOSING_MARK = 7,
|
||||
COMBINING_SPACING_MARK = 8,
|
||||
DECIMAL_DIGIT_NUMBER = 9,
|
||||
LETTER_NUMBER = 10,
|
||||
OTHER_NUMBER = 11,
|
||||
SPACE_SEPARATOR = 12,
|
||||
LINE_SEPARATOR = 13,
|
||||
PARAGRAPH_SEPARATOR = 14,
|
||||
CONTROL = 15,
|
||||
FORMAT = 16,
|
||||
PRIVATE_USE = 17,
|
||||
SURROGATE = 18,
|
||||
DASH_PUNCTUATION = 19,
|
||||
START_PUNCTUATION = 20,
|
||||
END_PUNCTUATION = 21,
|
||||
CONNECTOR_PUNCTUATION = 22,
|
||||
OTHER_PUNCTUATION = 23,
|
||||
MATH_SYMBOL = 24,
|
||||
CURRENCY_SYMBOL = 25,
|
||||
MODIFIER_SYMBOL = 26,
|
||||
OTHER_SYMBOL = 27,
|
||||
INITIAL_PUNCTUATION = 28,
|
||||
FINAL_PUNCTUATION = 29,
|
||||
GENERAL_TYPES_COUNT = 30
|
||||
};
|
||||
|
||||
enum EUnicodeScript
|
||||
{
|
||||
kBasicLatin,
|
||||
kLatin1Supplement,
|
||||
kLatinExtendedA,
|
||||
kLatinExtendedB,
|
||||
kIPAExtension,
|
||||
kSpacingModifier,
|
||||
kCombiningDiacritical,
|
||||
kGreek,
|
||||
kCyrillic,
|
||||
kArmenian,
|
||||
kHebrew,
|
||||
kArabic,
|
||||
kDevanagari,
|
||||
kBengali,
|
||||
kGurmukhi,
|
||||
kGujarati,
|
||||
kOriya,
|
||||
kTamil,
|
||||
kTelugu,
|
||||
kKannada,
|
||||
kMalayalam,
|
||||
kThai,
|
||||
kLao,
|
||||
kTibetan,
|
||||
kGeorgian,
|
||||
kHangulJamo,
|
||||
kLatinExtendedAdditional,
|
||||
kGreekExtended,
|
||||
kGeneralPunctuation,
|
||||
kSuperSubScript,
|
||||
kCurrencySymbolScript,
|
||||
kSymbolCombiningMark,
|
||||
kLetterlikeSymbol,
|
||||
kNumberForm,
|
||||
kArrow,
|
||||
kMathOperator,
|
||||
kMiscTechnical,
|
||||
kControlPicture,
|
||||
kOpticalCharacter,
|
||||
kEnclosedAlphanumeric,
|
||||
kBoxDrawing,
|
||||
kBlockElement,
|
||||
kGeometricShape,
|
||||
kMiscSymbol,
|
||||
kDingbat,
|
||||
kCJKSymbolPunctuation,
|
||||
kHiragana,
|
||||
kKatakana,
|
||||
kBopomofo,
|
||||
kHangulCompatibilityJamo,
|
||||
kKanbun,
|
||||
kEnclosedCJKLetterMonth,
|
||||
kCJKCompatibility,
|
||||
kCJKUnifiedIdeograph,
|
||||
kHangulSyllable,
|
||||
kHighSurrogate,
|
||||
kHighPrivateUseSurrogate,
|
||||
kLowSurrogate,
|
||||
kPrivateUse,
|
||||
kCJKCompatibilityIdeograph,
|
||||
kAlphabeticPresentation,
|
||||
kArabicPresentationA,
|
||||
kCombiningHalfMark,
|
||||
kCJKCompatibilityForm,
|
||||
kSmallFormVariant,
|
||||
kArabicPresentationB,
|
||||
kNoScript,
|
||||
kHalfwidthFullwidthForm,
|
||||
kScriptCount
|
||||
};
|
||||
|
||||
/**
|
||||
* This specifies the language directional property of a character set.
|
||||
*/
|
||||
enum EDirectionProperty {
|
||||
LEFT_TO_RIGHT = 0,
|
||||
RIGHT_TO_LEFT = 1,
|
||||
EUROPEAN_NUMBER = 2,
|
||||
EUROPEAN_NUMBER_SEPARATOR = 3,
|
||||
EUROPEAN_NUMBER_TERMINATOR = 4,
|
||||
ARABIC_NUMBER = 5,
|
||||
COMMON_NUMBER_SEPARATOR = 6,
|
||||
BLOCK_SEPARATOR = 7,
|
||||
SEGMENT_SEPARATOR = 8,
|
||||
WHITE_SPACE_NEUTRAL = 9,
|
||||
OTHER_NEUTRAL = 10,
|
||||
LEFT_TO_RIGHT_EMBEDDING = 11,
|
||||
LEFT_TO_RIGHT_OVERRIDE = 12,
|
||||
RIGHT_TO_LEFT_ARABIC = 13,
|
||||
RIGHT_TO_LEFT_EMBEDDING = 14,
|
||||
RIGHT_TO_LEFT_OVERRIDE = 15,
|
||||
POP_DIRECTIONAL_FORMAT = 16,
|
||||
DIR_NON_SPACING_MARK = 17,
|
||||
BOUNDARY_NEUTRAL = 18
|
||||
};
|
||||
|
||||
/**
|
||||
* Values returned by the getCellWidth() function.
|
||||
* @see Unicode#getCellWidth
|
||||
*/
|
||||
enum ECellWidths
|
||||
{
|
||||
ZERO_WIDTH = 0,
|
||||
HALF_WIDTH = 1,
|
||||
FULL_WIDTH = 2,
|
||||
NEUTRAL = 3
|
||||
};
|
||||
|
||||
/**
|
||||
* The minimum radix available for conversion to and from Strings.
|
||||
* The constant value of this field is the smallest value permitted
|
||||
* for the radix argument in radix-conversion methods such as the
|
||||
* <code>digit</code> method and the <code>forDigit</code>
|
||||
* method.
|
||||
*
|
||||
* @see Unicode#digit
|
||||
* @see Unicode#forDigit
|
||||
*/
|
||||
static const int8_t MIN_RADIX;
|
||||
|
||||
/**
|
||||
* The maximum radix available for conversion to and from Strings.
|
||||
* The constant value of this field is the largest value permitted
|
||||
* for the radix argument in radix-conversion methods such as the
|
||||
* <code>digit</code> method and the <code>forDigit</code>
|
||||
* method.
|
||||
*
|
||||
* @see Unicode#digit
|
||||
* @see Unicode#forDigit
|
||||
*/
|
||||
static const int8_t MAX_RADIX;
|
||||
|
||||
/**
|
||||
* Determines whether the specified UChar is a lowercase character
|
||||
* according to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is lowercase; false otherwise.
|
||||
*
|
||||
* @see Unicode#isUpperCase
|
||||
* @see Unicode#isTitleCase
|
||||
* @see Unicode#toLowerCase
|
||||
*/
|
||||
static bool_t isLowerCase(UChar ch);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is an uppercase character
|
||||
* according to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is uppercase; false otherwise.
|
||||
* @see Unicode#isLowerCase
|
||||
* @see Unicode#isTitleCase
|
||||
* @see Unicode#toUpperCase
|
||||
*/
|
||||
static bool_t isUpperCase(UChar ch);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is a titlecase character
|
||||
* according to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is titlecase; false otherwise.
|
||||
* @see Unicode#isUpperCase
|
||||
* @see Unicode#isLowerCase
|
||||
* @see Unicode#toTitleCase
|
||||
*/
|
||||
static bool_t isTitleCase(UChar ch);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is a digit according to Unicode
|
||||
* 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is a digit; false otherwise.
|
||||
* @see Unicode#digit
|
||||
* @see Unicode#forDigit
|
||||
* @see Unicode#digitValue
|
||||
*/
|
||||
static bool_t isDigit(UChar ch);
|
||||
|
||||
/**
|
||||
* Determines whether the specified numeric value is actually a defined character
|
||||
* according to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character has a defined Unicode meaning; false otherwise.
|
||||
*
|
||||
* @see Unicode#isDigit
|
||||
* @see Unicode#isLetter
|
||||
* @see Unicode#isLetterOrDigit
|
||||
* @see Unicode#isUpperCase
|
||||
* @see Unicode#isLowerCase
|
||||
* @see Unicode#isTitleCase
|
||||
*/
|
||||
static bool_t isDefined(UChar ch);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is a control character according
|
||||
* to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the Unicode character is a control character; false otherwise.
|
||||
*
|
||||
* @see Unicode#isPrintable
|
||||
*/
|
||||
static bool_t isControl(UChar ch);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is a printable character according
|
||||
* to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the Unicode character is a printable character; false otherwise.
|
||||
*
|
||||
* @see Unicode#isControl
|
||||
*/
|
||||
static bool_t isPrintable(UChar ch);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is of the base form according
|
||||
* to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the Unicode character is of the base form; false otherwise.
|
||||
*
|
||||
* @see Unicode#isLetter
|
||||
* @see Unicode#isDigit
|
||||
*/
|
||||
|
||||
static bool_t isBaseForm(UChar ch);
|
||||
/**
|
||||
* Determines whether the specified character is a letter
|
||||
* according to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is a letter; false otherwise.
|
||||
*
|
||||
*
|
||||
* @see Unicode#isDigit
|
||||
* @see Unicode#isLetterOrDigit
|
||||
* @see Unicode#isUpperCase
|
||||
* @see Unicode#isLowerCase
|
||||
* @see Unicode#isTitleCase
|
||||
*/
|
||||
static bool_t isLetter(UChar ch);
|
||||
|
||||
/**
|
||||
* A convenience method for determining if a Unicode character
|
||||
* is allowed as the first character in a Java identifier.
|
||||
* <P>
|
||||
* A character may start a Java identifier if and only if
|
||||
* it is one of the following:
|
||||
* <ul>
|
||||
* <li> a letter
|
||||
* <li> a currency symbol (such as "$")
|
||||
* <li> a connecting punctuation symbol (such as "_").
|
||||
* </ul>
|
||||
*
|
||||
* @param ch the Unicode character.
|
||||
* @return TRUE if the character may start a Java identifier;
|
||||
* FALSE otherwise.
|
||||
* @see isJavaIdentifierPart
|
||||
* @see isLetter
|
||||
* @see isUnicodeIdentifierStart
|
||||
*/
|
||||
static bool_t isJavaIdentifierStart(UChar ch);
|
||||
|
||||
/**
|
||||
* A convenience method for determining if a Unicode character
|
||||
* may be part of a Java identifier other than the starting
|
||||
* character.
|
||||
* <P>
|
||||
* A character may be part of a Java identifier if and only if
|
||||
* it is one of the following:
|
||||
* <ul>
|
||||
* <li> a letter
|
||||
* <li> a currency symbol (such as "$")
|
||||
* <li> a connecting punctuation character (such as "_").
|
||||
* <li> a digit
|
||||
* <li> a numeric letter (such as a Roman numeral character)
|
||||
* <li> a combining mark
|
||||
* <li> a non-spacing mark
|
||||
* <li> an ignorable control character
|
||||
* </ul>
|
||||
*
|
||||
* @param ch the Unicode character.
|
||||
* @return TRUE if the character may be part of a Unicode identifier;
|
||||
* FALSE otherwise.
|
||||
* @see isIdentifierIgnorable
|
||||
* @see isJavaIdentifierStart
|
||||
* @see isLetter
|
||||
* @see isDigit
|
||||
* @see isUnicodeIdentifierPart
|
||||
*/
|
||||
static bool_t isJavaIdentifierPart(UChar ch);
|
||||
|
||||
/**
|
||||
* A convenience method for determining if a Unicode character
|
||||
* is allowed to start in a Unicode identifier.
|
||||
* A character may start a Unicode identifier if and only if
|
||||
* it is a letter.
|
||||
*
|
||||
* @param ch the Unicode character.
|
||||
* @return TRUE if the character may start a Unicode identifier;
|
||||
* FALSE otherwise.
|
||||
* @see isJavaIdentifierStart
|
||||
* @see isLetter
|
||||
* @see isUnicodeIdentifierPart
|
||||
*/
|
||||
static bool_t isUnicodeIdentifierStart(UChar ch);
|
||||
|
||||
/**
|
||||
* A convenience method for determining if a Unicode character
|
||||
* may be part of a Unicode identifier other than the starting
|
||||
* character.
|
||||
* <P>
|
||||
* A character may be part of a Unicode identifier if and only if
|
||||
* it is one of the following:
|
||||
* <ul>
|
||||
* <li> a letter
|
||||
* <li> a connecting punctuation character (such as "_").
|
||||
* <li> a digit
|
||||
* <li> a numeric letter (such as a Roman numeral character)
|
||||
* <li> a combining mark
|
||||
* <li> a non-spacing mark
|
||||
* <li> an ignorable control character
|
||||
* </ul>
|
||||
*
|
||||
* @param ch the Unicode character.
|
||||
* @return TRUE if the character may be part of a Unicode identifier;
|
||||
* FALSE otherwise.
|
||||
* @see isIdentifierIgnorable
|
||||
* @see isJavaIdentifierPart
|
||||
* @see isLetterOrDigit
|
||||
* @see isUnicodeIdentifierStart
|
||||
*/
|
||||
static bool_t isUnicodeIdentifierPart(UChar ch);
|
||||
|
||||
/**
|
||||
* A convenience method for determining if a Unicode character
|
||||
* should be regarded as an ignorable character in a Java
|
||||
* identifier or a Unicode identifier.
|
||||
* <P>
|
||||
* The following Unicode characters are ignorable in a Java identifier
|
||||
* or a Unicode identifier:
|
||||
* <table>
|
||||
* <tr><td>0x0000 through 0x0008,</td>
|
||||
* <td>ISO control characters that</td></tr>
|
||||
* <tr><td>0x000E through 0x001B,</td> <td>are not whitespace</td></tr>
|
||||
* <tr><td>and 0x007F through 0x009F</td></tr>
|
||||
* <tr><td>0x200C through 0x200F</td> <td>join controls</td></tr>
|
||||
* <tr><td>0x200A through 0x200E</td> <td>bidirectional controls</td></tr>
|
||||
* <tr><td>0x206A through 0x206F</td> <td>format controls</td></tr>
|
||||
* <tr><td>0xFEFF</td> <td>zero-width no-break space</td></tr>
|
||||
* </table>
|
||||
*
|
||||
* @param ch the Unicode character.
|
||||
* @return TRUE if the character may be part of a Unicode identifier;
|
||||
* FALSE otherwise.
|
||||
* @see isJavaIdentifierPart
|
||||
* @see isUnicodeIdentifierPart
|
||||
*/
|
||||
static bool_t isIdentifierIgnorable(UChar ch);
|
||||
|
||||
/**
|
||||
* The given character is mapped to its lowercase equivalent according to
|
||||
* Unicode 2.1.2; if the character has no lowercase equivalent, the character
|
||||
* itself is returned.
|
||||
* <P>
|
||||
* A character has a lowercase equivalent if and only if a lowercase mapping
|
||||
* is specified for the character in the Unicode 2.0 attribute table.
|
||||
* <P>
|
||||
* Unicode::toLowerCase() only deals with the general letter case conversion.
|
||||
* For language specific case conversion behavior, use UnicodeString::toLower().
|
||||
* For example, the case conversion for dot-less i and dotted I in Turkish,
|
||||
* or for final sigma in Greek.
|
||||
*
|
||||
* @param ch the character to be converted
|
||||
* @return the lowercase equivalent of the character, if any;
|
||||
* otherwise the character itself.
|
||||
*
|
||||
* @see UnicodeString#toLower
|
||||
* @see Unicode#isLowerCase
|
||||
* @see Unicode#isUpperCase
|
||||
* @see Unicode#toUpperCase
|
||||
* @see Unicode#toTitleCase
|
||||
*/
|
||||
static UChar toLowerCase(UChar ch);
|
||||
|
||||
/**
|
||||
* The given character is mapped to its uppercase equivalent according to Unicode
|
||||
* 2.1.2; if the character has no uppercase equivalent, the character itself is
|
||||
* returned.
|
||||
* <P>
|
||||
* Unicode::toUpperCase() only deals with the general letter case conversion.
|
||||
* For language specific case conversion behavior, use UnicodeString::toUpper().
|
||||
* For example, the case conversion for dot-less i and dotted I in Turkish,
|
||||
* or ess-zed (i.e., "sharp S") in German.
|
||||
*
|
||||
* @param ch the character to be converted
|
||||
* @return the uppercase equivalent of the character, if any;
|
||||
* otherwise the character itself.
|
||||
*
|
||||
* @see UnicodeString#toUpper
|
||||
* @see Unicode#isUpperCase
|
||||
* @see Unicode#isLowerCase
|
||||
* @see Unicode#toLowerCase
|
||||
* @see Unicode#toTitleCase
|
||||
*/
|
||||
static UChar toUpperCase(UChar ch);
|
||||
|
||||
/**
|
||||
* The given character is mapped to its titlecase equivalent according to Unicode
|
||||
* 2.1.2. There are only four Unicode characters that are truly titlecase forms
|
||||
* that are distinct from uppercase forms. As a rule, if a character has no
|
||||
* true titlecase equivalent, its uppercase equivalent is returned.
|
||||
* <P>
|
||||
* A character has a titlecase equivalent if and only if a titlecase mapping
|
||||
* is specified for the character in the Unicode 2.1.2 data.
|
||||
*
|
||||
* @param ch the character to be converted
|
||||
* @return the titlecase equivalent of the character, if any;
|
||||
* otherwise the character itself.
|
||||
* @see Unicode#isTitleCase
|
||||
* @see Unicode#toUpperCase
|
||||
* @see Unicode#toLowerCase
|
||||
*/
|
||||
static UChar toTitleCase(UChar ch);
|
||||
|
||||
/**
|
||||
* Determines if the specified character is a Unicode space character
|
||||
* according to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is a space character; false otherwise.
|
||||
*/
|
||||
static bool_t isSpaceChar(UChar ch);
|
||||
|
||||
/**
|
||||
* Returns a value indicating a character category according to Unicode
|
||||
* 2.1.2.
|
||||
* @param ch the character to be tested
|
||||
* @return a value of type int, the character category.
|
||||
* @see Unicode#UNASSIGNED
|
||||
* @see Unicode#UPPERCASE_LETTER
|
||||
* @see Unicode#LOWERCASE_LETTER
|
||||
* @see Unicode#TITLECASE_LETTER
|
||||
* @see Unicode#MODIFIER_LETTER
|
||||
* @see Unicode#OTHER_LETTER
|
||||
* @see Unicode#NON_SPACING_MARK
|
||||
* @see Unicode#ENCLOSING_MARK
|
||||
* @see Unicode#COMBINING_SPACING_MARK
|
||||
* @see Unicode#DECIMAL_DIGIT_NUMBER
|
||||
* @see Unicode#OTHER_NUMBER
|
||||
* @see Unicode#SPACE_SEPARATOR
|
||||
* @see Unicode#LINE_SEPARATOR
|
||||
* @see Unicode#PARAGRAPH_SEPARATOR
|
||||
* @see Unicode#CONTROL
|
||||
* @see Unicode#PRIVATE_USE
|
||||
* @see Unicode#SURROGATE
|
||||
* @see Unicode#DASH_PUNCTUATION
|
||||
* @see Unicode#OPEN_PUNCTUATION
|
||||
* @see Unicode#CLOSE_PUNCTUATION
|
||||
* @see Unicode#CONNECTOR_PUNCTUATION
|
||||
* @see Unicode#OTHER_PUNCTUATION
|
||||
* @see Unicode#LETTER_NUMBER
|
||||
* @see Unicode#MATH_SYMBOL
|
||||
* @see Unicode#CURRENCY_SYMBOL
|
||||
* @see Unicode#MODIFIER_SYMBOL
|
||||
* @see Unicode#OTHER_SYMBOL
|
||||
*/
|
||||
static int8_t getType(UChar ch);
|
||||
|
||||
/**
|
||||
* Returns the linguistic direction property of a character.
|
||||
* <P>
|
||||
* Returns the linguistic direction property of a character.
|
||||
* For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
|
||||
* property.
|
||||
* @see #EDirectionProperty
|
||||
*/
|
||||
static EDirectionProperty characterDirection(UChar ch);
|
||||
|
||||
/**
|
||||
* Returns the script associated with a character.
|
||||
* @see #EUnicodeScript
|
||||
*/
|
||||
static EUnicodeScript getScript(UChar ch);
|
||||
|
||||
/**
|
||||
* Returns a value indicating the display-cell width of the character
|
||||
* when used in Asian text, according to the Unicode standard (see p. 6-130
|
||||
* of The Unicode Standard, Version 2.0). The results for various characters
|
||||
* are as follows:
|
||||
* <P>
|
||||
* ZERO_WIDTH: Characters which are considered to take up no display-cell space:
|
||||
* control characters
|
||||
* format characters
|
||||
* line and paragraph separators
|
||||
* non-spacing marks
|
||||
* combining Hangul jungseong
|
||||
* combining Hangul jongseong
|
||||
* unassigned Unicode values
|
||||
* <P>
|
||||
* HALF_WIDTH: Characters which take up half a cell in standard Asian text:
|
||||
* all characters in the General Scripts Area except combining Hangul choseong
|
||||
* and the characters called out specifically above as ZERO_WIDTH
|
||||
* alphabetic and Arabic presentation forms
|
||||
* halfwidth CJK punctuation
|
||||
* halfwidth Katakana
|
||||
* halfwidth Hangul Jamo
|
||||
* halfwidth forms, arrows, and shapes
|
||||
* <P>
|
||||
* FULL_WIDTH: Characters which take up a full cell in standard Asian text:
|
||||
* combining Hangul choseong
|
||||
* all characters in the CJK Phonetics and Symbols Area
|
||||
* all characters in the CJK Ideographs Area
|
||||
* all characters in the Hangul Syllables Area
|
||||
* CJK compatibility ideographs
|
||||
* CJK compatibility forms
|
||||
* small form variants
|
||||
* fullwidth ASCII
|
||||
* fullwidth punctuation and currency signs
|
||||
* <P>
|
||||
* NEUTRAL: Characters whose cell width is context-dependent:
|
||||
* all characters in the Symbols Area, except those specifically called out above
|
||||
* all characters in the Surrogates Area
|
||||
* all charcaters in the Private Use Area
|
||||
* <P>
|
||||
* For Korean text, this algorithm should work properly with properly normalized Korean
|
||||
* text. Precomposed Hangul syllables and non-combining jamo are all considered full-
|
||||
* width characters. For combining jamo, we treat we treat choseong (initial consonants)
|
||||
* as double-width characters and junseong (vowels) and jongseong (final consonants)
|
||||
* as non-spacing marks. This will work right in text that uses the precomposed
|
||||
* choseong characters instead of teo choseong characters in a row, and which uses the
|
||||
* choseong filler character at the beginning of syllables that don't have an initial
|
||||
* consonant. The results may be slightly off with Korean text following different
|
||||
* conventions.
|
||||
*/
|
||||
static uint16_t getCellWidth(UChar ch);
|
||||
|
||||
/**
|
||||
* Retrieve the name of a Unicode character.
|
||||
* Depending on <code>nameChoice</code>, the character name written
|
||||
* into the buffer is the "modern" name or the name that was defined
|
||||
* in Unicode version 1.0.
|
||||
* The name contains only "invariant" characters
|
||||
* like A-Z, 0-9, space, and '-'.
|
||||
*
|
||||
* @param code The character (code point) for which to get the name.
|
||||
* It must be <code>0<=code<0x10ffff</code>.
|
||||
* @param buffer Destination address for copying the name.
|
||||
* @param bufferLength <code>==sizeof(buffer)</code>
|
||||
* @param nameChoice Selector for which name to get.
|
||||
*
|
||||
* @see UCharNameChoice
|
||||
*
|
||||
* Example:
|
||||
* <pre>
|
||||
*   char buffer[100];
|
||||
*   UTextOffset length=Unicode::getCharName(
|
||||
*   0x284, buffer, sizeof(buffer));
|
||||
*  
|
||||
*   // use invariant-character conversion to Unicode
|
||||
*   UnicodeString name(buffer, length, "");
|
||||
* </pre>
|
||||
*/
|
||||
static inline UTextOffset
|
||||
getCharName(uint32_t code,
|
||||
char *buffer, UTextOffset bufferLength,
|
||||
UCharNameChoice nameChoice=U_UNICODE_CHAR_NAME);
|
||||
|
||||
/**
|
||||
* Retrives the decimal numeric value of a digit character.
|
||||
* @param ch the digit character for which to get the numeric value
|
||||
* @return the numeric value of ch in decimal radix. This method returns
|
||||
* -1 if ch is not a valid digit character.
|
||||
* @see Unicode#digit
|
||||
* @see Unicode#forDigit
|
||||
* @see Unicode#isDigit
|
||||
*/
|
||||
static int32_t digitValue(UChar ch);
|
||||
|
||||
/**
|
||||
* Returns the numeric value of the character <code>ch</code> in the
|
||||
* specified radix.
|
||||
* <p>
|
||||
* If the radix is not in the range <code>MIN_RADIX</code> <=
|
||||
* <code>radix</code> <= <code>MAX_RADIX</code> or if the
|
||||
* value of <code>ch</code> is not a valid digit in the specified
|
||||
* radix, <code>-1</code> is returned. A character is a valid digit
|
||||
* if at least one of the following is true:
|
||||
* <ul>
|
||||
* <li>The method <code>isDigit</code> is true of the character
|
||||
* and the Unicode decimal digit value of the character (or its
|
||||
* single-character decomposition) is less than the specified radix.
|
||||
* In this case the decimal digit value is returned.
|
||||
* <li>The character is one of the uppercase Latin letters
|
||||
* <code>'A'</code> through <code>'Z'</code> and its code is less than
|
||||
* <code>radix + 'A' - 10</code>.
|
||||
* In this case, <code>ch - 'A' + 10</code>
|
||||
* is returned.
|
||||
* <li>The character is one of the lowercase Latin letters
|
||||
* <code>'a'</code> through <code>'z'</code> and its code is less than
|
||||
* <code>radix + 'a' - 10</code>.
|
||||
* In this case, <code>ch - 'a' + 10</code>
|
||||
* is returned.
|
||||
* </ul>
|
||||
*
|
||||
* @param ch the character to be converted.
|
||||
* @param radix the radix.
|
||||
* @return the numeric value represented by the character in the
|
||||
* specified radix.
|
||||
* @see Unicode#MIN_RADIX
|
||||
* @see Unicode#MAX_RADIX
|
||||
* @see Unicode#forDigit
|
||||
* @see Unicode#digitValue
|
||||
* @see Unicode#isDigit
|
||||
*/
|
||||
static int8_t digit(UChar ch, int8_t radix);
|
||||
|
||||
/**
|
||||
* Determines the character representation for a specific digit in
|
||||
* the specified radix. If the value of <code>radix</code> is not a
|
||||
* valid radix, or the value of <code>digit</code> is not a valid
|
||||
* digit in the specified radix, the null character
|
||||
* (<code>U+0000</code>) is returned.
|
||||
* <p>
|
||||
* The <code>radix</code> argument is valid if it is greater than or
|
||||
* equal to <code>MIN_RADIX</code> and less than or equal to
|
||||
* <code>MAX_RADIX</code>. The <code>digit</code> argument is valid if
|
||||
* <code>0 <= digit <= radix</code>.
|
||||
* <p>
|
||||
* If the digit is less than 10, then
|
||||
* <code>'0' + digit</code> is returned. Otherwise, the value
|
||||
* <code>'a' + digit - 10</code> is returned.
|
||||
*
|
||||
* @param digit the number to convert to a character.
|
||||
* @param radix the radix.
|
||||
* @return the <code>char</code> representation of the specified digit
|
||||
* in the specified radix.
|
||||
* @see Unicode#MIN_RADIX
|
||||
* @see Unicode#MAX_RADIX
|
||||
* @see Unicode#digit
|
||||
* @see Unicode#digitValue
|
||||
* @see Unicode#isDigit
|
||||
*/
|
||||
static UChar forDigit(int32_t digit, int8_t radix);
|
||||
|
||||
/**
|
||||
* Retrieves the Unicode Standard Version number that is used
|
||||
* @return the Unicode Standard Version Number.
|
||||
*/
|
||||
static const char* getVersion(void);
|
||||
|
||||
protected:
|
||||
// These constructors, destructor, and assignment operator must
|
||||
// be protected (not private, as they semantically are) to make
|
||||
// various UNIX compilers happy. [LIU]
|
||||
Unicode();
|
||||
Unicode( const Unicode& other);
|
||||
~Unicode();
|
||||
const Unicode& operator=( const Unicode& other);
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
inline UTextOffset
|
||||
Unicode::getCharName(uint32_t code,
|
||||
char *buffer, UTextOffset bufferLength,
|
||||
UCharNameChoice nameChoice) {
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
UTextOffset length=u_charName(code, nameChoice, buffer, bufferLength, &errorCode);
|
||||
return U_SUCCESS(errorCode) ? length : 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
#error Please include unicode/unicode.h instead
|
||||
|
|
255
icu4c/source/common/unicode/bidi.h
Normal file
255
icu4c/source/common/unicode/bidi.h
Normal file
|
@ -0,0 +1,255 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: ubidi.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999sep15
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef BIDI_H
|
||||
#define BIDI_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ubidi.h"
|
||||
|
||||
#ifndef XP_CPLUSPLUS
|
||||
# error This is a C++ header file.
|
||||
#endif
|
||||
|
||||
/**
|
||||
* BiDi is a C++ wrapper class for UBiDi.
|
||||
* You need one BiDi object in place of one UBiDi object.
|
||||
* For details on the API and implementation of the
|
||||
* Unicode BiDi algorithm, see ubidi.h.
|
||||
*
|
||||
* @see UBiDi
|
||||
*/
|
||||
class U_COMMON_API BiDi {
|
||||
public:
|
||||
/** @memo Default constructor, calls ubidi_open(). */
|
||||
BiDi();
|
||||
|
||||
/** @memo Constructor, calls ubidi_open(). */
|
||||
BiDi(UErrorCode &rErrorCode);
|
||||
|
||||
/** @memo Preallocating constructor, calls ubidi_openSized(). */
|
||||
BiDi(UTextOffset maxLength, UTextOffset maxRunCount, UErrorCode &rErrorCode);
|
||||
|
||||
/** @memo Destructor, calls ubidi_close(). */
|
||||
~BiDi();
|
||||
|
||||
/** @memo Set this object for one paragraph's text. */
|
||||
BiDi &
|
||||
setPara(const UChar *text, UTextOffset length,
|
||||
UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
|
||||
UErrorCode &rErrorCode);
|
||||
|
||||
|
||||
/** @memo Set this object for one line of the paragraph object's text. */
|
||||
BiDi &
|
||||
setLine(const BiDi &rParaBiDi,
|
||||
UTextOffset start, UTextOffset limit,
|
||||
UErrorCode &rErrorCode);
|
||||
|
||||
/** @memo Get the directionality of the text. */
|
||||
UBiDiDirection
|
||||
getDirection() const;
|
||||
|
||||
/** @memo Get the length of the text. */
|
||||
UTextOffset
|
||||
getLength() const;
|
||||
|
||||
/** @memo Get the paragraph level of the text. */
|
||||
UBiDiLevel
|
||||
getParaLevel() const;
|
||||
|
||||
/** @memo Get the level for one character. */
|
||||
UBiDiLevel
|
||||
getLevelAt(UTextOffset charIndex) const;
|
||||
|
||||
/** @memo Get an array of levels for each character. */
|
||||
const UBiDiLevel *
|
||||
getLevels(UErrorCode &rErrorCode);
|
||||
|
||||
/** @memo Get a logical run. */
|
||||
void
|
||||
getLogicalRun(UTextOffset logicalStart,
|
||||
UTextOffset &rLogicalLimit, UBiDiLevel &rLevel) const;
|
||||
|
||||
/** @memo Get the number of runs. */
|
||||
UTextOffset
|
||||
countRuns(UErrorCode &rErrorCode);
|
||||
|
||||
/**
|
||||
* @memo Get one run's logical start, length, and directionality,
|
||||
* which can be 0 for LTR or 1 for RTL.
|
||||
*/
|
||||
UBiDiDirection
|
||||
getVisualRun(UTextOffset runIndex, UTextOffset &rLogicalStart, UTextOffset &rLength);
|
||||
|
||||
/** @memo Get the visual position from a logical text position. */
|
||||
UTextOffset
|
||||
getVisualIndex(UTextOffset logicalIndex, UErrorCode &rErrorCode);
|
||||
|
||||
/** @memo Get the logical text position from a visual position. */
|
||||
UTextOffset
|
||||
getLogicalIndex(UTextOffset visualIndex, UErrorCode &rErrorCode);
|
||||
|
||||
/**
|
||||
* @memo Get a logical-to-visual index map (array) for the characters in the UBiDi
|
||||
* (paragraph or line) object.
|
||||
*/
|
||||
void
|
||||
getLogicalMap(UTextOffset *indexMap, UErrorCode &rErrorCode);
|
||||
|
||||
/**
|
||||
* @memo Get a visual-to-logical index map (array) for the characters in the UBiDi
|
||||
* (paragraph or line) object.
|
||||
*/
|
||||
void
|
||||
getVisualMap(UTextOffset *indexMap, UErrorCode &rErrorCode);
|
||||
|
||||
/** @memo Same as ubidi_reorderLogical(). */
|
||||
static void
|
||||
reorderLogical(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap);
|
||||
|
||||
/** @memo Same as ubidi_reorderVisual(). */
|
||||
static void
|
||||
reorderVisual(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap);
|
||||
|
||||
/** @memo Same as ubidi_invertMap(). */
|
||||
static void
|
||||
invertMap(const UTextOffset *srcMap, UTextOffset *destMap, UTextOffset length);
|
||||
|
||||
protected:
|
||||
UBiDi *pBiDi;
|
||||
};
|
||||
|
||||
/* Inline implementations. -------------------------------------------------- */
|
||||
|
||||
inline BiDi::BiDi() {
|
||||
pBiDi=ubidi_open();
|
||||
}
|
||||
|
||||
inline BiDi::BiDi(UErrorCode &rErrorCode) {
|
||||
if(U_SUCCESS(rErrorCode)) {
|
||||
pBiDi=ubidi_open();
|
||||
if(pBiDi==0) {
|
||||
rErrorCode=U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
} else {
|
||||
pBiDi=0;
|
||||
}
|
||||
}
|
||||
|
||||
inline BiDi::BiDi(UTextOffset maxLength, UTextOffset maxRunCount, UErrorCode &rErrorCode) {
|
||||
pBiDi=ubidi_openSized(maxLength, maxRunCount, &rErrorCode);
|
||||
}
|
||||
|
||||
inline BiDi::~BiDi() {
|
||||
ubidi_close(pBiDi);
|
||||
pBiDi=0;
|
||||
}
|
||||
|
||||
inline BiDi &
|
||||
BiDi::setPara(const UChar *text, UTextOffset length,
|
||||
UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
|
||||
UErrorCode &rErrorCode) {
|
||||
ubidi_setPara(pBiDi, text, length, paraLevel, embeddingLevels, &rErrorCode);
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
inline BiDi &
|
||||
BiDi::setLine(const BiDi &rParaBiDi,
|
||||
UTextOffset start, UTextOffset limit,
|
||||
UErrorCode &rErrorCode) {
|
||||
ubidi_setLine(rParaBiDi.pBiDi, start, limit, pBiDi, &rErrorCode);
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline UBiDiDirection
|
||||
BiDi::getDirection() const {
|
||||
return ubidi_getDirection(pBiDi);
|
||||
}
|
||||
|
||||
inline UTextOffset
|
||||
BiDi::getLength() const {
|
||||
return ubidi_getLength(pBiDi);
|
||||
}
|
||||
|
||||
inline UBiDiLevel
|
||||
BiDi::getParaLevel() const {
|
||||
return ubidi_getParaLevel(pBiDi);
|
||||
}
|
||||
|
||||
inline UBiDiLevel
|
||||
BiDi::getLevelAt(UTextOffset charIndex) const {
|
||||
return ubidi_getLevelAt(pBiDi, charIndex);
|
||||
}
|
||||
|
||||
inline const UBiDiLevel *
|
||||
BiDi::getLevels(UErrorCode &rErrorCode) {
|
||||
return ubidi_getLevels(pBiDi, &rErrorCode);
|
||||
}
|
||||
|
||||
inline void
|
||||
BiDi::getLogicalRun(UTextOffset logicalStart,
|
||||
UTextOffset &rLogicalLimit, UBiDiLevel &rLevel) const {
|
||||
ubidi_getLogicalRun(pBiDi, logicalStart, &rLogicalLimit, &rLevel);
|
||||
}
|
||||
|
||||
inline UTextOffset
|
||||
BiDi::countRuns(UErrorCode &rErrorCode) {
|
||||
return ubidi_countRuns(pBiDi, &rErrorCode);
|
||||
}
|
||||
|
||||
inline UBiDiDirection
|
||||
BiDi::getVisualRun(UTextOffset runIndex, UTextOffset &rLogicalStart, UTextOffset &rLength) {
|
||||
return ubidi_getVisualRun(pBiDi, runIndex, &rLogicalStart, &rLength);
|
||||
}
|
||||
|
||||
inline UTextOffset
|
||||
BiDi::getVisualIndex(UTextOffset logicalIndex, UErrorCode &rErrorCode) {
|
||||
return ubidi_getVisualIndex(pBiDi, logicalIndex, &rErrorCode);
|
||||
}
|
||||
|
||||
inline UTextOffset
|
||||
BiDi::getLogicalIndex(UTextOffset visualIndex, UErrorCode &rErrorCode) {
|
||||
return ubidi_getLogicalIndex(pBiDi, visualIndex, &rErrorCode);
|
||||
}
|
||||
|
||||
inline void
|
||||
BiDi::getLogicalMap(UTextOffset *indexMap, UErrorCode &rErrorCode) {
|
||||
ubidi_getLogicalMap(pBiDi, indexMap, &rErrorCode);
|
||||
}
|
||||
|
||||
inline void
|
||||
BiDi::getVisualMap(UTextOffset *indexMap, UErrorCode &rErrorCode) {
|
||||
ubidi_getVisualMap(pBiDi, indexMap, &rErrorCode);
|
||||
}
|
||||
|
||||
inline void
|
||||
BiDi::reorderLogical(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap) {
|
||||
ubidi_reorderLogical(levels, length, indexMap);
|
||||
}
|
||||
|
||||
inline void
|
||||
BiDi::reorderVisual(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap) {
|
||||
ubidi_reorderVisual(levels, length, indexMap);
|
||||
}
|
||||
|
||||
inline void
|
||||
BiDi::invertMap(const UTextOffset *srcMap, UTextOffset *destMap, UTextOffset length) {
|
||||
ubidi_invertMap(srcMap, destMap, length);
|
||||
}
|
||||
|
||||
#endif
|
194
icu4c/source/common/unicode/chariter.h
Normal file
194
icu4c/source/common/unicode/chariter.h
Normal file
|
@ -0,0 +1,194 @@
|
|||
|
||||
/*
|
||||
********************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef CHARITER_H
|
||||
#define CHARITER_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
|
||||
/**
|
||||
* Abstract class defining a protcol for accessing characters in a text-storage object.
|
||||
<P>Examples:<P>
|
||||
|
||||
Function processing characters, in this example simple output
|
||||
<pre>
|
||||
. void processChar( UChar c )
|
||||
. {
|
||||
. cout << " " << c;
|
||||
. }
|
||||
</pre>
|
||||
Traverse the text from start to finish
|
||||
<pre>
|
||||
. void traverseForward(CharacterIterator& iter)
|
||||
. {
|
||||
. for(UChar c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
|
||||
. processChar(c);
|
||||
. }
|
||||
. }
|
||||
</pre>
|
||||
Traverse the text backwards, from end to start
|
||||
<pre>
|
||||
. void traverseBackward(CharacterIterator& iter)
|
||||
. {
|
||||
. for(UChar c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
|
||||
. processChar(c);
|
||||
. }
|
||||
. }
|
||||
</pre>
|
||||
Traverse both forward and backward from a given position in the text.
|
||||
Calls to notBoundary() in this example represents some additional stopping criteria.
|
||||
<pre>
|
||||
. void traverseOut(CharacterIterator& iter, UTextOffset pos)
|
||||
. {
|
||||
. UChar c;
|
||||
. for (c = iter.setIndex(pos);
|
||||
. c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
|
||||
. c = iter.next()) {}
|
||||
. UTextOffset end = iter.getIndex();
|
||||
. for (c = iter.setIndex(pos);
|
||||
. c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
|
||||
. c = iter.previous()) {}
|
||||
. UTextOffset start = iter.getIndex() + 1;
|
||||
.
|
||||
. cout << "start: " << start << " end: " << end << endl;
|
||||
. for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
|
||||
. processChar(c);
|
||||
. }
|
||||
. }
|
||||
</pre>
|
||||
Creating a StringCharacterIteratorand calling the test functions
|
||||
<pre>
|
||||
. void CharacterIterator_Example( void )
|
||||
. {
|
||||
. cout << endl << "===== CharacterIterator_Example: =====" << endl;
|
||||
. UnicodeString text("Ein kleiner Satz.");
|
||||
. StringCharacterIterator iterator(text);
|
||||
. cout << "----- traverseForward: -----------" << endl;
|
||||
. traverseForward( iterator );
|
||||
. cout << endl << endl << "----- traverseBackward: ----------" << endl;
|
||||
. traverseBackward( iterator );
|
||||
. cout << endl << endl << "----- traverseOut: ---------------" << endl;
|
||||
. traverseOut( iterator, 7 );
|
||||
. cout << endl << endl << "-----" << endl;
|
||||
. }
|
||||
</pre>
|
||||
*/
|
||||
class U_COMMON_API CharacterIterator
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Value returned by most of CharacterIterator's functions
|
||||
* when the iterator has reached the limits of its iteration. */
|
||||
static const UChar DONE;
|
||||
|
||||
/**
|
||||
* Destructor. */
|
||||
virtual ~CharacterIterator();
|
||||
|
||||
/**
|
||||
* Returns true when both iterators refer to the same
|
||||
* character in the same character-storage object. */
|
||||
virtual bool_t operator==(const CharacterIterator& that) const = 0;
|
||||
|
||||
/**
|
||||
* Returns true when the iterators refer to different
|
||||
* text-storage objects, or to different characters in the
|
||||
* same text-storage object. */
|
||||
bool_t operator!=(const CharacterIterator& that) const { return !operator==(that); }
|
||||
|
||||
/**
|
||||
* Returns a pointer to a new CharacterIterator of the same
|
||||
* concrete class as this one, and referring to the same
|
||||
* character in the same text-storage object as this one. The
|
||||
* caller is responsible for deleting the new clone. */
|
||||
virtual CharacterIterator*
|
||||
clone(void) const = 0;
|
||||
|
||||
/**
|
||||
* Generates a hash code for this iterator. */
|
||||
virtual int32_t hashCode(void) const = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first character in its
|
||||
* iteration range, and returns that character, */
|
||||
virtual UChar first(void) = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last character in its
|
||||
* iteration range, and returns that character. */
|
||||
virtual UChar last(void) = 0;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the "position"-th character
|
||||
* in the text-storage object the iterator refers to, and
|
||||
* returns that character. */
|
||||
virtual UChar setIndex(UTextOffset position) = 0;
|
||||
|
||||
/**
|
||||
* Returns the character the iterator currently refers to. */
|
||||
virtual UChar current(void) const = 0;
|
||||
|
||||
/**
|
||||
* Advances to the next character in the iteration range
|
||||
* (toward last()), and returns that character. If there are
|
||||
* no more characters to return, returns DONE. */
|
||||
virtual UChar next(void) = 0;
|
||||
|
||||
/**
|
||||
* Advances to the previous character in the iteration rance
|
||||
* (toward first()), and returns that character. If there are
|
||||
* no more characters to return, returns DONE. */
|
||||
virtual UChar previous(void) = 0;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying text-storage
|
||||
* object of the character returned by first(). Since it's
|
||||
* possible to create an iterator that iterates across only
|
||||
* part of a text-storage object, this number isn't
|
||||
* necessarily 0. */
|
||||
virtual UTextOffset startIndex(void) const = 0;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying text-storage
|
||||
* object of the position immediately BEYOND the character
|
||||
* returned by last(). */
|
||||
virtual UTextOffset endIndex(void) const = 0;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying text-storage
|
||||
* object of the character the iterator currently refers to
|
||||
* (i.e., the character returned by current()). */
|
||||
virtual UTextOffset getIndex(void) const = 0;
|
||||
|
||||
/**
|
||||
* Copies the text under iteration into the UnicodeString
|
||||
* referred to by "result". @param result Receives a copy of
|
||||
* the text under iteration. */
|
||||
virtual void getText(UnicodeString& result) = 0;
|
||||
|
||||
/**
|
||||
* Returns a UClassID for this CharacterIterator ("poor man's
|
||||
* RTTI").<P> Despite the fact that this function is public,
|
||||
* DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API! */
|
||||
virtual UClassID getDynamicClassID(void) const = 0;
|
||||
|
||||
protected:
|
||||
CharacterIterator() {}
|
||||
CharacterIterator(const CharacterIterator&) {}
|
||||
CharacterIterator& operator=(const CharacterIterator&) { return *this; }
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
325
icu4c/source/common/unicode/convert.h
Normal file
325
icu4c/source/common/unicode/convert.h
Normal file
|
@ -0,0 +1,325 @@
|
|||
/*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1998-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef CONVERT_H
|
||||
#define CONVERT_H
|
||||
|
||||
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/ucnv.h"
|
||||
|
||||
class U_COMMON_API UnicodeConverterCPP
|
||||
{
|
||||
private:
|
||||
/*Internal Data representation of the Converter*/
|
||||
UConverter* myUnicodeConverter;
|
||||
/*Debug method*/
|
||||
void printRef(void) const;
|
||||
|
||||
/* list of converter and alias names */
|
||||
static const char **availableConverterNames;
|
||||
static int32_t availableConverterNamesCount;
|
||||
|
||||
public:
|
||||
|
||||
//Constructors and a destructor
|
||||
|
||||
/**
|
||||
* Creates Unicode Conversion Object will default to LATIN1 <-> encoding
|
||||
* @return An object Handle if successful or a NULL if the creation failed
|
||||
*/
|
||||
UnicodeConverterCPP();
|
||||
|
||||
/**
|
||||
* Creates Unicode Conversion Object by specifying the codepage name. The name
|
||||
* string is in ASCII format.
|
||||
* @param code_set the pointer to a char[] object containing a codepage name. (I)
|
||||
* @param UErrorCode Error status (I/O) IILLEGAL_ARGUMENT_ERROR will be returned if the string is empty.
|
||||
* If the internal program does not work correctly, for example, if there's no such codepage,
|
||||
* U_INTERNAL_PROGRAM_ERROR will be returned.
|
||||
* @return An object Handle if successful or a NULL if the creation failed
|
||||
*/
|
||||
UnicodeConverterCPP(const char* name,
|
||||
UErrorCode& err);
|
||||
|
||||
/**
|
||||
*Creates a UnicodeConverter object with the names specified as unicode strings. The name should be limited to
|
||||
*the ASCII-7 alphanumerics. Dash and underscore characters are allowed for readability, but are ignored in the
|
||||
*search.
|
||||
*@param code_set name of the uconv table in Unicode string (I)
|
||||
*@param err error status (I/O) IILLEGAL_ARGUMENT_ERROR will be returned if the string is empty. If the internal
|
||||
*program does not work correctly, for example, if there's no such codepage, U_INTERNAL_PROGRAM_ERROR will be
|
||||
*returned.
|
||||
*@return the created Unicode converter object
|
||||
*/
|
||||
UnicodeConverterCPP(const UnicodeString& name,
|
||||
UErrorCode& err);
|
||||
|
||||
/**
|
||||
* Creates Unicode Conversion Object using the codepage ID number.
|
||||
* @param code_set a codepage # (I)
|
||||
* @UErrorCode Error status (I/O) IILLEGAL_ARGUMENT_ERROR will be returned if the string is empty.
|
||||
* If the internal program does not work correctly, for example, if there's no such codepage,
|
||||
* U_INTERNAL_PROGRAM_ERROR will be returned.
|
||||
* @return An object Handle if successful or a NULL if failed
|
||||
*
|
||||
*/
|
||||
UnicodeConverterCPP(int32_t codepageNumber,
|
||||
UConverterPlatform platform,
|
||||
UErrorCode& err);
|
||||
|
||||
~UnicodeConverterCPP();
|
||||
|
||||
|
||||
/**
|
||||
* Transcodes the source UnicodeString to the target string in a codepage encoding
|
||||
* with the specified Unicode converter. For example, if a Unicode to/from JIS
|
||||
* converter is specified, the source string in Unicode will be transcoded to JIS
|
||||
* encoding. The result will be stored in JIS encoding.
|
||||
*
|
||||
* @param source the source Unicode string
|
||||
* @param target the target string in codepage encoding
|
||||
* @param targetSize Input the number of bytes available in the "target" buffer, Output the number of bytes copied to it
|
||||
* @param err the error status code. U_MEMORY_ALLOCATION_ERROR will be returned if the
|
||||
* the internal process buffer cannot be allocated for transcoding. U_ILLEGAL_ARGUMENT_ERROR
|
||||
* is returned if the converter is null or the source or target string is empty.
|
||||
*/
|
||||
void fromUnicodeString(char* target,
|
||||
int32_t& targetSize,
|
||||
const UnicodeString& source,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Transcode the source string in codepage encoding to the target string in
|
||||
* Unicode encoding. For example, if a Unicode to/from JIS
|
||||
* converter is specified, the source string in JIS encoding will be transcoded
|
||||
* to Unicode encoding. The result will be stored in Unicode encoding.
|
||||
* @param source the source string in codepage encoding
|
||||
* @param target the target string in Unicode encoding
|
||||
* @param targetSize : I/O parameter, Input size buffer, Output # of bytes copied to it
|
||||
* @param err the error status code U_MEMORY_ALLOCATION_ERROR will be returned if the
|
||||
* the internal process buffer cannot be allocated for transcoding. U_ILLEGAL_ARGUMENT_ERROR
|
||||
* is returned if the converter is null or the source or target string is empty.
|
||||
*/
|
||||
void toUnicodeString(UnicodeString& target,
|
||||
const char* source,
|
||||
int32_t sourceSize,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Transcodes an array of unicode characters to an array of codepage characters.
|
||||
* The source pointer is an I/O parameter, it starts out pointing at the place
|
||||
* to begin translating, and ends up pointing after the first sequence of the bytes
|
||||
* that it encounters that are semantically invalid.
|
||||
* if T_UnicodeConverter_setMissingCharAction is called with an action other than STOP
|
||||
* before a call is made to this API, consumed and source should point to the same place
|
||||
* (unless target ends with an imcomplete sequence of bytes and flush is FALSE).
|
||||
* @param target : I/O parameter. Input : Points to the beginning of the buffer to copy
|
||||
* codepage characters to. Output : points to after the last codepage character copied
|
||||
* to target.
|
||||
* @param targetLimit the pointer to the end of the target array
|
||||
* @param source the source Unicode character array
|
||||
* @param sourceLimit the pointer to the end of the source array
|
||||
* @param flush TRUE if the buffer is the last buffer and the conversion will finish
|
||||
* in this call, FALSE otherwise. (future feature pending)
|
||||
* @param UErrorCode the error status. U_ILLEGAL_ARGUMENT_ERROR will be returned if the
|
||||
* converter is null.
|
||||
*/
|
||||
void fromUnicode(char*& target,
|
||||
const char* targetLimit,
|
||||
const UChar*& source,
|
||||
const UChar* sourceLimit,
|
||||
int32_t * offsets,
|
||||
bool_t flush,
|
||||
UErrorCode& err);
|
||||
|
||||
|
||||
/**
|
||||
* Converts an array of codepage characters into an array of unicode characters.
|
||||
* The source pointer is an I/O parameter, it starts out pointing at the place
|
||||
* to begin translating, and ends up pointing after the first sequence of the bytes
|
||||
* that it encounters that are semantically invalid.
|
||||
* if T_UnicodeConverter_setMissingUnicodeAction is called with an action other than STOP
|
||||
* before a call is made to this API, consumed and source should point to the same place
|
||||
* (unless target ends with an imcomplete sequence of bytes and flush is FALSE).
|
||||
* @param target : I/O parameter. Input : Points to the beginning of the buffer to copy
|
||||
* Unicode characters to. Output : points to after the last UChar copied to target.
|
||||
* @param targetLimit the pointer to the end of the target array
|
||||
* @param source the source codepage character array
|
||||
* @param sourceLimit the pointer to the end of the source array
|
||||
* @param flush TRUE if the buffer is the last buffer and the conversion will finish
|
||||
* in this call, FALSE otherwise. (future feature pending)
|
||||
* @param err the error code status U_ILLEGAL_ARGUMENT_ERROR will be returned if the
|
||||
* converter is null, targetLimit < target, sourceLimit < source
|
||||
*/
|
||||
void toUnicode(UChar*& target,
|
||||
const UChar* targetLimit,
|
||||
const char*& source,
|
||||
const char* sourceLimit,
|
||||
int32_t * offsets,
|
||||
bool_t flush,
|
||||
UErrorCode& err);
|
||||
|
||||
|
||||
/*
|
||||
* Returns the maximum length of bytes used by a character. This varies between 1 and 4
|
||||
* @return the max number of bytes per codepage character * converter is null, targetLimit < target, sourceLimit < source
|
||||
*/
|
||||
int8_t getMaxBytesPerChar(void) const;
|
||||
|
||||
/**
|
||||
* Returns the minimum byte length for characters in this codepage. This is either
|
||||
* 1 or 2 for all supported codepages.
|
||||
* @return the minimum number of byte per codepage character
|
||||
*/
|
||||
int8_t getMinBytesPerChar(void) const;
|
||||
|
||||
/**
|
||||
*Gets the type of conversion associated with the converter
|
||||
* e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022, EBCDIC_STATEFUL, LATIN_1
|
||||
* @return the type of the converter
|
||||
*/
|
||||
UConverterType getType(void) const;
|
||||
|
||||
/**
|
||||
*Gets the "starter" bytes for the converters of type MBCS
|
||||
*will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in
|
||||
*is not MBCS.
|
||||
*fills in an array of boolean, with the value of the byte as offset to the array.
|
||||
*At return, if TRUE is found in at offset 0x20, it means that the byte 0x20 is a starter byte
|
||||
*in this converter.
|
||||
* @param starters: an array of size 256 to be filled in
|
||||
* @param err: an array of size 256 to be filled in
|
||||
* @see ucnv_getType
|
||||
*/
|
||||
void getStarters(bool_t starters[256],
|
||||
UErrorCode& err) const;
|
||||
/**
|
||||
* Fills in the output parameter, subChars, with the substitution characters
|
||||
* as multiple bytes.
|
||||
* @param subChars the subsitution characters
|
||||
* @param len the number of bytes of the substitution character array
|
||||
* @param err the error status code. U_ILLEGAL_ARGUMENT_ERROR will be returned if
|
||||
* the converter is null. If the substitution character array is too small, an
|
||||
* U_INDEX_OUTOFBOUNDS_ERROR will be returned.
|
||||
*/
|
||||
void getSubstitutionChars(char* subChars,
|
||||
int8_t& len,
|
||||
UErrorCode& err) const;
|
||||
/**
|
||||
* Sets the substitution chars when converting from unicode to a codepage. The
|
||||
* substitution is specified as a string of 1-4 bytes, and may contain null byte.
|
||||
* The fill-in parameter err will get the error status on return.
|
||||
* @param cstr the substitution character array to be set with
|
||||
* @param len the number of bytes of the substitution character array and upon return will contain the
|
||||
* number of bytes copied to that buffer
|
||||
* @param err the error status code. U_ILLEGAL_ARGUMENT_ERROR if the converter is
|
||||
* null. or if the number of bytes provided are not in the codepage's range (e.g length 1 for ucs-2)
|
||||
*/
|
||||
void setSubstitutionChars(const char* subChars,
|
||||
int8_t len,
|
||||
UErrorCode& err);
|
||||
|
||||
/**
|
||||
* Resets the state of stateful conversion to the default state. This is used
|
||||
* in the case of error to restart a conversion from a known default state.
|
||||
*/
|
||||
void resetState(void);
|
||||
|
||||
/**
|
||||
* Gets the name of the converter (zero-terminated).
|
||||
* the name will be the internal name of the converter
|
||||
* @param converter the Unicode converter
|
||||
* @param err the error status code. U_INDEX_OUTOFBOUNDS_ERROR in the converterNameLen is too
|
||||
* small to contain the name.
|
||||
*/
|
||||
const char* getName( UErrorCode& err) const;
|
||||
|
||||
|
||||
/**
|
||||
* Gets a codepage number associated with the converter. This is not guaranteed
|
||||
* to be the one used to create the converter. Some converters do not represent
|
||||
* IBM registered codepages and return zero for the codepage number.
|
||||
* The error code fill-in parameter indicates if the codepage number is available.
|
||||
* @param err the error status code. U_ILLEGAL_ARGUMENT_ERROR will returned if
|
||||
* the converter is null or if converter's data table is null.
|
||||
* @return If any error occurrs, null will be returned.
|
||||
*/
|
||||
int32_t getCodepage(UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Returns the current setting action taken when a character from a codepage
|
||||
* is missing. (Currently STOP or SUBSTITUTE).
|
||||
* @return the action constant when a Unicode character cannot be converted to a
|
||||
* codepage equivalent
|
||||
*/
|
||||
UConverterToUCallback getMissingCharAction(void) const;
|
||||
|
||||
/**
|
||||
* Return the current setting action taken when a unicode character is missing.
|
||||
* (Currently STOP or SUBSTITUTE).
|
||||
* @return the action constant when a codepage character cannot be converted to a
|
||||
* Unicode eqivalent
|
||||
*/
|
||||
UConverterFromUCallback getMissingUnicodeAction(void) const;
|
||||
|
||||
/**
|
||||
* Sets the current setting action taken when a character from a codepage is
|
||||
* missing. (Currently STOP or SUBSTITUTE).
|
||||
* @param action the action constant if an equivalent codepage character is missing
|
||||
*/
|
||||
void setMissingCharAction(UConverterToUCallback action,
|
||||
UErrorCode& err);
|
||||
|
||||
/**
|
||||
* Sets the current setting action taken when a unicode character is missing.
|
||||
* (currently T_UnicodeConverter_MissingUnicodeAction is either STOP or SUBSTITUTE,
|
||||
* SKIP, CLOSEST_MATCH, ESCAPE_SEQ may be added in the future).
|
||||
* @param action the action constant if an equivalent Unicode character is missing
|
||||
* @param err the error status code
|
||||
*/
|
||||
void setMissingUnicodeAction(UConverterFromUCallback action,
|
||||
UErrorCode& err);
|
||||
/**
|
||||
* Returns the localized name of the UnicodeConverter, if for any reason it is
|
||||
* available, the internal name will be returned instead.
|
||||
* @param displayLocale the valid Locale, from which we want to localize
|
||||
* @param displayString a UnicodeString that is going to be filled in.
|
||||
*/
|
||||
void getDisplayName(const Locale& displayLocale,
|
||||
UnicodeString& displayName) const;
|
||||
|
||||
/**
|
||||
* Returns the T_UnicodeConverter_platform (ICU defined enum) of a UnicodeConverter
|
||||
* available, the internal name will be returned instead.
|
||||
* @param err the error code status
|
||||
* @return the codepages platform
|
||||
*/
|
||||
UConverterPlatform getCodepagePlatform(UErrorCode& err) const;
|
||||
|
||||
|
||||
UnicodeConverterCPP& operator=(const UnicodeConverterCPP& that);
|
||||
bool_t operator==(const UnicodeConverterCPP& that) const;
|
||||
bool_t operator!=(const UnicodeConverterCPP& that) const;
|
||||
UnicodeConverterCPP(const UnicodeConverterCPP& that);
|
||||
|
||||
/**
|
||||
* Returns the available names. Lazy evaluated, Library owns the storage
|
||||
* @param num the number of available converters
|
||||
* @param err the error code status
|
||||
* @return the name array
|
||||
*/
|
||||
static const char* const* getAvailableNames(int32_t& num,
|
||||
UErrorCode& err);
|
||||
|
||||
/**
|
||||
* Iterates through every cached converter and frees all the unused ones
|
||||
* @return the number of cached converters successfully deleted
|
||||
*/
|
||||
static int32_t flushCache(void);
|
||||
};
|
||||
#endif
|
570
icu4c/source/common/unicode/locid.h
Normal file
570
icu4c/source/common/unicode/locid.h
Normal file
|
@ -0,0 +1,570 @@
|
|||
/*
|
||||
*****************************************************************************************
|
||||
*
|
||||
* Copyright (C) 1996-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*****************************************************************************************
|
||||
*
|
||||
* File locid.h
|
||||
*
|
||||
* Created by: Helena Shih
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 02/11/97 aliu Changed gLocPath to fgLocPath and added methods to
|
||||
* get and set it.
|
||||
* 04/02/97 aliu Made operator!= inline; fixed return value of getName().
|
||||
* 04/15/97 aliu Cleanup for AIX/Win32.
|
||||
* 04/24/97 aliu Numerous changes per code review.
|
||||
* 08/18/98 stephen Added tokenizeString(),changed getDisplayName()
|
||||
* 09/08/98 stephen Moved definition of kEmptyString for Mac Port
|
||||
* 11/09/99 weiv Added const char * getName() const;
|
||||
*****************************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef LOCID_H
|
||||
#define LOCID_H
|
||||
|
||||
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
typedef struct ULocale ULocale;
|
||||
typedef struct UHashtable UHashtable;
|
||||
|
||||
#define ULOC_LANG_CAPACITY 3
|
||||
#define ULOC_COUNTRY_CAPACITY 3
|
||||
#define ULOC_FULLNAME_CAPACITY 50
|
||||
|
||||
/**
|
||||
*
|
||||
* A <code>Locale</code> object represents a specific geographical, political,
|
||||
* or cultural region. An operation that requires a <code>Locale</code> to perform
|
||||
* its task is called <em>locale-sensitive</em> and uses the <code>Locale</code>
|
||||
* to tailor information for the user. For example, displaying a number
|
||||
* is a locale-sensitive operation--the number should be formatted
|
||||
* according to the customs/conventions of the user's native country,
|
||||
* region, or culture.
|
||||
*
|
||||
* <P>
|
||||
* You create a <code>Locale</code> object using one of the three constructors in
|
||||
* this class:
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* . Locale( const UnicodeString& newLanguage);
|
||||
* .
|
||||
* . Locale( const UnicodeString& language,
|
||||
* . const UnicodeString& country);
|
||||
* .
|
||||
* . Locale( const UnicodeString& language,
|
||||
* . const UnicodeString& country,
|
||||
* . const UnicodeString& variant);
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
* The first argument to the constructors is a valid <STRONG>ISO
|
||||
* Language Code.</STRONG> These codes are the lower-case two-letter
|
||||
* codes as defined by ISO-639.
|
||||
* You can find a full list of these codes at a number of sites, such as:
|
||||
* <BR><a href ="http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt">
|
||||
* <code>http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt</code></a>
|
||||
*
|
||||
* <P>
|
||||
* The second argument to the constructors is a valid <STRONG>ISO Country
|
||||
* Code.</STRONG> These codes are the upper-case two-letter codes
|
||||
* as defined by ISO-3166.
|
||||
* You can find a full list of these codes at a number of sites, such as:
|
||||
* <BR><a href="http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html">
|
||||
* <code>http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html</code></a>
|
||||
*
|
||||
* <P>
|
||||
* The third constructor requires a third argument--the <STRONG>Variant.</STRONG>
|
||||
* The Variant codes are vendor and browser-specific.
|
||||
* For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX.
|
||||
* Where there are two variants, separate them with an underscore, and
|
||||
* put the most important one first. For
|
||||
* example, a Traditional Spanish collation might be referenced, with
|
||||
* "ES", "ES", "Traditional_WIN".
|
||||
*
|
||||
* <P>
|
||||
* Because a <code>Locale</code> object is just an identifier for a region,
|
||||
* no validity check is performed when you construct a <code>Locale</code>.
|
||||
* If you want to see whether particular resources are available for the
|
||||
* <code>Locale</code> you construct, you must query those resources. For
|
||||
* example, ask the <code>NumberFormat</code> for the locales it supports
|
||||
* using its <code>getAvailableLocales</code> method.
|
||||
* <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular
|
||||
* locale, you get back the best available match, not necessarily
|
||||
* precisely what you asked for. For more information, look at
|
||||
* <a href="java.util.ResourceBundle.html"><code>ResourceBundle</code></a>.
|
||||
*
|
||||
* <P>
|
||||
* The <code>Locale</code> class provides a number of convenient constants
|
||||
* that you can use to create <code>Locale</code> objects for commonly used
|
||||
* locales. For example, the following refers to a <code>Locale</code> object
|
||||
* for the United States:
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* . Locale::US
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
*
|
||||
* <P>
|
||||
* Once you've created a <code>Locale</code> you can query it for information about
|
||||
* itself. Use <code>getCountry</code> to get the ISO Country Code and
|
||||
* <code>getLanguage</code> to get the ISO Language Code. You can
|
||||
* use <code>getDisplayCountry</code> to get the
|
||||
* name of the country suitable for displaying to the user. Similarly,
|
||||
* you can use <code>getDisplayLanguage</code> to get the name of
|
||||
* the language suitable for displaying to the user. Interestingly,
|
||||
* the <code>getDisplayXXX</code> methods are themselves locale-sensitive
|
||||
* and have two versions: one that uses the default locale and one
|
||||
* that takes a locale as an argument and displays the name or country in
|
||||
* a language appropriate to that locale.
|
||||
*
|
||||
* <P>
|
||||
* The TIFC provides a number of classes that perform locale-sensitive
|
||||
* operations. For example, the <code>NumberFormat</code> class formats
|
||||
* numbers, currency, or percentages in a locale-sensitive manner. Classes
|
||||
* such as <code>NumberFormat</code> have a number of convenience methods
|
||||
* for creating a default object of that type. For example, the
|
||||
* <code>NumberFormat</code> class provides these three convenience methods
|
||||
* for creating a default <code>NumberFormat</code> object:
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* . UErrorCode success = U_ZERO_ERROR;
|
||||
* . Locale myLocale;
|
||||
* . NumberFormat *nf;
|
||||
* .
|
||||
* . nf = NumberFormat::createInstance( success ); delete nf;
|
||||
* . nf = NumberFormat::createCurrencyInstance( success ); delete nf;
|
||||
* . nf = NumberFormat::createPercentInstance( success ); delete nf;
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
* Each of these methods has two variants; one with an explicit locale
|
||||
* and one without; the latter using the default locale.
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* . nf = NumberFormat::createInstance( myLocale, success ); delete nf;
|
||||
* . nf = NumberFormat::createCurrencyInstance( myLocale, success ); delete nf;
|
||||
* . nf = NumberFormat::createPercentInstance( myLocale, success ); delete nf;
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
* A <code>Locale</code> is the mechanism for identifying the kind of object
|
||||
* (<code>NumberFormat</code>) that you would like to get. The locale is
|
||||
* <STRONG>just</STRONG> a mechanism for identifying objects,
|
||||
* <STRONG>not</STRONG> a container for the objects themselves.
|
||||
*
|
||||
* <P>
|
||||
* Each class that performs locale-sensitive operations allows you
|
||||
* to get all the available objects of that type. You can sift
|
||||
* through these objects by language, country, or variant,
|
||||
* and use the display names to present a menu to the user.
|
||||
* For example, you can create a menu of all the collation objects
|
||||
* suitable for a given language. Such classes implement these
|
||||
* three class methods:
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* . static Locale* getAvailableLocales(int32_t& numLocales)
|
||||
* . static UnicodeString& getDisplayName(const Locale& objectLocale,
|
||||
* . const Locale& displayLocale,
|
||||
* . UnicodeString& displayName)
|
||||
* . static UnicodeString& getDisplayName(const Locale& objectLocale,
|
||||
* . UnicodeString& displayName)
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
*/
|
||||
class U_COMMON_API Locale
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Useful constants for language.
|
||||
*/
|
||||
static const Locale ENGLISH;
|
||||
static const Locale FRENCH;
|
||||
static const Locale GERMAN;
|
||||
static const Locale ITALIAN;
|
||||
static const Locale JAPANESE;
|
||||
static const Locale KOREAN;
|
||||
static const Locale CHINESE;
|
||||
static const Locale SIMPLIFIED_CHINESE;
|
||||
static const Locale TRADITIONAL_CHINESE;
|
||||
|
||||
/**
|
||||
* Useful constants for country.
|
||||
*/
|
||||
static const Locale FRANCE;
|
||||
static const Locale GERMANY;
|
||||
static const Locale ITALY;
|
||||
static const Locale JAPAN;
|
||||
static const Locale KOREA;
|
||||
static const Locale CHINA; // Alias for PRC
|
||||
static const Locale PRC; // Peoples Republic of China
|
||||
static const Locale TAIWAN; // Republic of China
|
||||
static const Locale UK;
|
||||
static const Locale US;
|
||||
static const Locale CANADA;
|
||||
static const Locale CANADA_FRENCH;
|
||||
|
||||
/**
|
||||
* Construct an empty locale. It's only used when a fill-in parameter is
|
||||
* needed.
|
||||
*/
|
||||
Locale();
|
||||
|
||||
/**
|
||||
* Construct a locale from language, country, variant.
|
||||
*
|
||||
* @param language Lowercase two-letter ISO-639 code.
|
||||
* @param country Uppercase two-letter ISO-3166 code. (optional)
|
||||
* @param variant Uppercase vendor and browser specific code. See class
|
||||
* description. (optional)
|
||||
*/
|
||||
Locale( const UnicodeString& language,
|
||||
const UnicodeString& country ,
|
||||
const UnicodeString& variant );
|
||||
|
||||
Locale( const UnicodeString& language,
|
||||
const UnicodeString& country );
|
||||
|
||||
Locale( const UnicodeString& language);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Initializes a Locale object from another Locale object.
|
||||
*
|
||||
* @param other The Locale object being copied in.
|
||||
*/
|
||||
Locale(const Locale& other);
|
||||
|
||||
|
||||
/**
|
||||
* Destructor
|
||||
*/
|
||||
~Locale() ;
|
||||
|
||||
/**
|
||||
* Replaces the entire contents of *this with the specified value.
|
||||
*
|
||||
* @param other The Locale object being copied in.
|
||||
* @return *this
|
||||
*/
|
||||
Locale& operator=(const Locale& other);
|
||||
|
||||
/**
|
||||
* Checks if two locale keys are the same.
|
||||
*
|
||||
* @param other The locale key object to be compared with this.
|
||||
* @return True if the two locale keys are the same, false otherwise.
|
||||
*/
|
||||
bool_t operator==(const Locale& other) const;
|
||||
|
||||
/**
|
||||
* Checks if two locale keys are not the same.
|
||||
*
|
||||
* @param other The locale key object to be compared with this.
|
||||
* @return True if the two locale keys are not the same, false
|
||||
* otherwise.
|
||||
*/
|
||||
bool_t operator!=(const Locale& other) const;
|
||||
|
||||
/**
|
||||
* Common methods of getting the current default Locale. Used for the
|
||||
* presentation: menus, dialogs, etc. Generally set once when your applet or
|
||||
* application is initialized, then never reset. (If you do reset the
|
||||
* default locale, you probably want to reload your GUI, so that the change
|
||||
* is reflected in your interface.)
|
||||
*
|
||||
* More advanced programs will allow users to use different locales for
|
||||
* different fields, e.g. in a spreadsheet.
|
||||
*
|
||||
* Note that the initial setting will match the host system.
|
||||
*/
|
||||
static Locale& getDefault(void);
|
||||
|
||||
/**
|
||||
* Sets the default. Normally set once at the beginning of applet or
|
||||
* application, then never reset. setDefault does NOT reset the host locale.
|
||||
*
|
||||
* @param newLocale Locale to set to.
|
||||
*/
|
||||
static void setDefault(const Locale& newLocale,
|
||||
UErrorCode& success);
|
||||
|
||||
/**
|
||||
* Fills in "lang" with the locale's two-letter ISO-639 language code.
|
||||
* @param lang Receives the language code.
|
||||
* @return A reference to "lang".
|
||||
*/
|
||||
UnicodeString& getLanguage( UnicodeString& lang) const;
|
||||
/**
|
||||
* Fills in "cntry" with the locale's two-letter ISO-3166 country code.
|
||||
* @param cntry Receives the country code.
|
||||
* @return A reference to "cntry".
|
||||
*/
|
||||
UnicodeString& getCountry( UnicodeString& cntry) const;
|
||||
/**
|
||||
* Fills in "var" with the locale's variant code.
|
||||
* @param var Receives the variant code.
|
||||
* @return A reference to "var".
|
||||
*/
|
||||
UnicodeString& getVariant( UnicodeString& var) const;
|
||||
|
||||
/**
|
||||
* Fills in "name" the programmatic name of the entire locale, with the language,
|
||||
* country and variant separated by underbars. If a field is missing, at
|
||||
* most one underbar will occur. Example: "en", "de_DE", "en_US_WIN",
|
||||
* "de_POSIX", "fr_MAC"
|
||||
* @param var Receives the programmatic locale name.
|
||||
* @return A reference to "name".
|
||||
*/
|
||||
UnicodeString& getName( UnicodeString& name) const;
|
||||
|
||||
/**
|
||||
* Returns the programmatic name of the entire locale, with the language,
|
||||
* country and variant separated by underbars. If a field is missing, at
|
||||
* most one underbar will occur. Example: "en", "de_DE", "en_US_WIN",
|
||||
* "de_POSIX", "fr_MAC"
|
||||
* @return A pointer to "name".
|
||||
*/
|
||||
const char * getName() const;
|
||||
|
||||
/**
|
||||
* Fills in "name" with the locale's three-letter language code, as specified
|
||||
* in ISO draft standard ISO-639-2..
|
||||
* @param name Receives the three-letter language code.
|
||||
* @param status An UErrorCode to receive any MISSING_RESOURCE_ERRORs
|
||||
* @return A reference to "name".
|
||||
*/
|
||||
UnicodeString& getISO3Language(UnicodeString& name, UErrorCode& status) const;
|
||||
|
||||
// this version is deprecated, use getISO3Language(UnicodeString&, UErrorCode&)
|
||||
UnicodeString& getISO3Language(UnicodeString& name) const;
|
||||
|
||||
/**
|
||||
* Fills in "name" with the locale's three-letter ISO-3166 country code.
|
||||
* @param name Receives the three-letter country code.
|
||||
* @param status An UErrorCode to receive any MISSING_RESOURCE_ERRORs
|
||||
* @return A reference to "name".
|
||||
*/
|
||||
UnicodeString& getISO3Country( UnicodeString& name, UErrorCode& status) const;
|
||||
|
||||
// this version is deprecated, use getISO3Country(UnicodeString&, UErrorCode&);
|
||||
UnicodeString& getISO3Country( UnicodeString& name) const;
|
||||
|
||||
/**
|
||||
* Returns the Windows LCID value corresponding to this locale.
|
||||
* This value is stored in the resource data for the locale as a one-to-four-digit
|
||||
* hexadecimal number. If the resource is missing, in the wrong format, or
|
||||
* there is no Windows LCID value that corresponds to this locale, returns 0.
|
||||
*/
|
||||
uint32_t getLCID(void) const;
|
||||
|
||||
/**
|
||||
* Fills in "dispLang" with the name of this locale's language in a format suitable for
|
||||
* user display in the default locale. For example, if the locale's language code is
|
||||
* "fr" and the default locale's language code is "en", this function would set
|
||||
* dispLang to "French".
|
||||
* @param dispLang Receives the language's display name.
|
||||
* @return A reference to "dispLang".
|
||||
*/
|
||||
UnicodeString& getDisplayLanguage(UnicodeString& dispLang) const;
|
||||
|
||||
/**
|
||||
* Fills in "dispLang" with the name of this locale's language in a format suitable for
|
||||
* user display in the locale specified by "inLocale". For example, if the locale's
|
||||
* language code is "en" and inLocale's language code is "fr", this function would set
|
||||
* dispLang to "Anglais".
|
||||
* @param inLocale Specifies the locale to be used to display the name. In other words,
|
||||
* if the locale's language code is "en", passing Locale::FRENCH for
|
||||
* inLocale would result in "Anglais", while passing Locale::GERMAN
|
||||
* for inLocale would result in "Englisch".
|
||||
* @param dispLang Receives the language's display name.
|
||||
* @return A reference to "dispLang".
|
||||
*/
|
||||
UnicodeString& getDisplayLanguage( const Locale& inLocale,
|
||||
UnicodeString& dispLang) const;
|
||||
/**
|
||||
* Fills in "dispCountry" with the name of this locale's country in a format suitable
|
||||
* for user display in the default locale. For example, if the locale's country code
|
||||
* is "FR" and the default locale's language code is "en", this function would set
|
||||
* dispCountry to "France".
|
||||
* @param dispCountry Receives the country's display name.
|
||||
* @return A reference to "dispCountry".
|
||||
*/
|
||||
UnicodeString& getDisplayCountry( UnicodeString& dispCountry) const;
|
||||
/**
|
||||
* Fills in "dispCountry" with the name of this locale's country in a format suitable
|
||||
* for user display in the locale specified by "inLocale". For example, if the locale's
|
||||
* country code is "US" and inLocale's language code is "fr", this function would set
|
||||
* dispCountry to "Etats-Unis".
|
||||
* @param inLocale Specifies the locale to be used to display the name. In other
|
||||
* words, if the locale's country code is "US", passing
|
||||
* Locale::FRENCH for inLocale would result in "États-Unis", while
|
||||
* passing Locale::GERMAN for inLocale would result in
|
||||
* "Vereinigte Staaten".
|
||||
* @param dispCountry Receives the country's display name.
|
||||
* @return A reference to "dispCountry".
|
||||
*/
|
||||
UnicodeString& getDisplayCountry( const Locale& inLocale,
|
||||
UnicodeString& dispCountry) const;
|
||||
|
||||
/**
|
||||
* Fills in "dispVar" with the name of this locale's variant code in a format suitable
|
||||
* for user display in the default locale.
|
||||
* @param dispVar Receives the variant's name.
|
||||
* @return A reference to "dispVar".
|
||||
*/
|
||||
UnicodeString& getDisplayVariant( UnicodeString& dispVar) const;
|
||||
/**
|
||||
* Fills in "dispVar" with the name of this locale's variant code in a format
|
||||
* suitable for user display in the locale specified by "inLocale".
|
||||
* @param inLocale Specifies the locale to be used to display the name.
|
||||
* @param dispVar Receives the variant's display name.
|
||||
* @return A reference to "dispVar".
|
||||
*/
|
||||
UnicodeString& getDisplayVariant( const Locale& inLocale,
|
||||
UnicodeString& dispVar) const;
|
||||
/**
|
||||
* Fills in "name" with the name of this locale in a format suitable for user display
|
||||
* in the default locale. This function uses getDisplayLanguage(), getDisplayCountry(),
|
||||
* and getDisplayVariant() to do its work, and outputs the display name in the format
|
||||
* "language (country[,variant])". For example, if the default locale is en_US, then
|
||||
* fr_FR's display name would be "French (France)", and es_MX_Traditional's display name
|
||||
* would be "Spanish (Mexico,Traditional)".
|
||||
* @param name Receives the locale's display name.
|
||||
* @return A reference to "name".
|
||||
*/
|
||||
UnicodeString& getDisplayName( UnicodeString& name) const;
|
||||
/**
|
||||
* Fills in "name" with the name of this locale in a format suitable for user display
|
||||
* in the locale specfied by "inLocale". This function uses getDisplayLanguage(),
|
||||
* getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display
|
||||
* name in the format "language (country[,variant])". For example, if inLocale is
|
||||
* fr_FR, then en_US's display name would be "Anglais (États-Unis)", and no_NO_NY's
|
||||
* display name would be "norvégien (Norvège,NY)".
|
||||
* @param inLocale Specifies the locale to be used to display the name.
|
||||
* @param name Receives the locale's display name.
|
||||
* @return A reference to "name".
|
||||
*/
|
||||
UnicodeString& getDisplayName( const Locale& inLocale,
|
||||
UnicodeString& name) const;
|
||||
/**
|
||||
* Generates a hash code for the locale. Since Locales are often used in hashtables,
|
||||
* caches the value for speed.
|
||||
*/
|
||||
int32_t hashCode(void) const;
|
||||
|
||||
/**
|
||||
* Returns a list of all installed locales.
|
||||
* @param count Receives the number of locales in the list.
|
||||
* @return A pointer to an array of Locale objects. This array is the list
|
||||
* of all locales with installed resource files. The called does NOT
|
||||
* get ownership of this list, and must NOT delete it.
|
||||
*/
|
||||
static const Locale* getAvailableLocales(int32_t& count);
|
||||
|
||||
/**
|
||||
* Returns a list of all 2-letter country codes defined in ISO 3166.
|
||||
* Can be used to create Locales.
|
||||
* @param count Receives the number of countries in the list.
|
||||
* @return A pointer to an array of UnicodeString objects. The caller does NOT
|
||||
* get ownership of this list, and must NOT delete it.
|
||||
*/
|
||||
static const UnicodeString* getISOCountries(int32_t& count);
|
||||
|
||||
/**
|
||||
* Returns a list of all 2-letter language codes defined in ISO 639.
|
||||
* Can be used to create Locales.
|
||||
* [NOTE: ISO 639 is not a stable standard-- some languages' codes have changed.
|
||||
* The list this function returns includes both the new and the old codes for the
|
||||
* languages whose codes have changed.]
|
||||
* @param count Receives the number of languages in the list.
|
||||
* @return A pointer to an array of UnicodeString objects. The caller does NOT
|
||||
* get ownership of this list, and must NOT delete it.
|
||||
*/
|
||||
static const UnicodeString* getISOLanguages(int32_t& count);
|
||||
|
||||
/**
|
||||
* Deprecated 1999dec14 - Get the path to the ResourceBundle locale files. This path will be a
|
||||
* platform-specific path name ending in a directory separator, so that file
|
||||
* names may be concatenated to it. This path may be changed by calling
|
||||
* setDataDirectory(). If setDataDirectory() has not been called yet,
|
||||
* getDataDirectory() will return a platform-dependent default path as
|
||||
* specified by TPlatformUtilities::getDefaultDataDirectory().
|
||||
*
|
||||
* @return Current data path.
|
||||
*/
|
||||
static const char* getDataDirectory(void);
|
||||
|
||||
/**
|
||||
* Deprecated 1999dec14 - Set the path to the ResourceBundle locale files. After making this call,
|
||||
* all objects in the Unicode Analytics package will read ResourceBundle
|
||||
* data files in the specified directory in order to obtain locale data.
|
||||
*
|
||||
* @param path The new data path to be set to.
|
||||
*/
|
||||
static void setDataDirectory(const char* path);
|
||||
|
||||
Locale& init(const char* cLocaleID);
|
||||
|
||||
protected: // only protected for testing purposes. DO NOT USE.
|
||||
void setFromPOSIXID(const UnicodeString& posixID); // set it from a single string.
|
||||
void setFromPOSIXID(const char *posixID); // set it from a single string.
|
||||
|
||||
/**
|
||||
* Given an ISO country code, returns an array of Strings containing the ISO
|
||||
* codes of the languages spoken in that country. Official languages are listed
|
||||
* in the returned table before unofficial languages, but other than that, the
|
||||
* order of the returned list is indeterminate. If the value the user passes in
|
||||
* for "country" is not a valid ISO 316 country code, or if we don't have language
|
||||
* information for the specified country, this function returns an empty array.
|
||||
*
|
||||
* [This function is not currently part of Locale's API, but is needed in the
|
||||
* implementation. We hope to add it to the API in a future release.]
|
||||
* @param country The ISO 2-letter country code of the desired country
|
||||
* @param count Receives the number of languages in the list.
|
||||
* @return A pointer to an array of UnicodeString objects. The caller does NOT
|
||||
* get ownership of this list, and must NOT delete it.
|
||||
*/
|
||||
static const UnicodeString* getLanguagesForCountry( const UnicodeString& country,
|
||||
int32_t& count);
|
||||
|
||||
|
||||
private:
|
||||
|
||||
/**
|
||||
* Initializes a Locale object from a ULocale struct, which is the C locale object,
|
||||
* and where the actual implementation is.
|
||||
*/
|
||||
|
||||
void setHashCode(void);
|
||||
char language[ULOC_LANG_CAPACITY];
|
||||
char country[ULOC_COUNTRY_CAPACITY];
|
||||
char* variant;
|
||||
char* fullName;
|
||||
char fullNameBuffer[ULOC_FULLNAME_CAPACITY];
|
||||
int32_t khashCode;
|
||||
|
||||
static Locale *localeList;
|
||||
static int32_t localeListCount;
|
||||
static UnicodeString *isoLanguages;
|
||||
static int32_t isoLanguagesCount;
|
||||
static UnicodeString *isoCountries;
|
||||
static int32_t isoCountriesCount;
|
||||
static UHashtable *ctry2LangMapping;
|
||||
static const UnicodeString compressedCtry2LangMapping;
|
||||
|
||||
static Locale fgDefaultLocale;
|
||||
};
|
||||
|
||||
inline bool_t
|
||||
Locale::operator!=(const Locale& other) const
|
||||
{
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
717
icu4c/source/common/unicode/normlzr.h
Normal file
717
icu4c/source/common/unicode/normlzr.h
Normal file
|
@ -0,0 +1,717 @@
|
|||
/*
|
||||
********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1996-1999, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef NORMLZR_H
|
||||
#define NORMLZR_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/chariter.h"
|
||||
|
||||
/**
|
||||
* <tt>Normalizer</tt> transforms Unicode text into an equivalent composed or
|
||||
* decomposed form, allowing for easier sorting and searching of text.
|
||||
* <tt>Normalizer</tt> supports the standard normalization forms described in
|
||||
* <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
|
||||
* Unicode Technical Report #15</a>.
|
||||
* <p>
|
||||
* Characters with accents or other adornments can be encoded in
|
||||
* several different ways in Unicode. For example, take the character "Á"
|
||||
* (A-acute). In Unicode, this can be encoded as a single character (the
|
||||
* "composed" form):
|
||||
* <pre>
|
||||
* 00C1 LATIN CAPITAL LETTER A WITH ACUTE</pre>
|
||||
* or as two separate characters (the "decomposed" form):
|
||||
* <pre>
|
||||
* 0041 LATIN CAPITAL LETTER A
|
||||
* 0301 COMBINING ACUTE ACCENT</pre>
|
||||
* <p>
|
||||
* To a user of your program, however, both of these sequences should be
|
||||
* treated as the same "user-level" character "Á". When you are searching or
|
||||
* comparing text, you must ensure that these two sequences are treated
|
||||
* equivalently. In addition, you must handle characters with more than one
|
||||
* accent. Sometimes the order of a character's combining accents is
|
||||
* significant, while in other cases accent sequences in different orders are
|
||||
* really equivalent.
|
||||
* <p>
|
||||
* Similarly, the string "ffi" can be encoded as three separate letters:
|
||||
* <pre>
|
||||
* 0066 LATIN SMALL LETTER F
|
||||
* 0066 LATIN SMALL LETTER F
|
||||
* 0069 LATIN SMALL LETTER I</pre>
|
||||
* or as the single character
|
||||
* <pre>
|
||||
* FB03 LATIN SMALL LIGATURE FFI</pre>
|
||||
* <p>
|
||||
* The ffi ligature is not a distinct semantic character, and strictly speaking
|
||||
* it shouldn't be in Unicode at all, but it was included for compatibility
|
||||
* with existing character sets that already provided it. The Unicode standard
|
||||
* identifies such characters by giving them "compatibility" decompositions
|
||||
* into the corresponding semantic characters. When sorting and searching, you
|
||||
* will often want to use these mappings.
|
||||
* <p>
|
||||
* <tt>Normalizer</tt> helps solve these problems by transforming text into the
|
||||
* canonical composed and decomposed forms as shown in the first example above.
|
||||
* In addition, you can have it perform compatibility decompositions so that
|
||||
* you can treat compatibility characters the same as their equivalents.
|
||||
* Finally, <tt>Normalizer</tt> rearranges accents into the proper canonical
|
||||
* order, so that you do not have to worry about accent rearrangement on your
|
||||
* own.
|
||||
* <p>
|
||||
* <tt>Normalizer</tt> adds one optional behavior, {@link #IGNORE_HANGUL},
|
||||
* that differs from
|
||||
* the standard Unicode Normalization Forms. This option can be passed
|
||||
* to the {@link #Normalizer constructors} and to the static
|
||||
* {@link #compose compose} and {@link #decompose decompose} methods. This
|
||||
* option, and any that are added in the future, will be turned off by default.
|
||||
* <p>
|
||||
* There are three common usage models for <tt>Normalizer</tt>. In the first,
|
||||
* the static {@link #normalize normalize()} method is used to process an
|
||||
* entire input string at once. Second, you can create a <tt>Normalizer</tt>
|
||||
* object and use it to iterate through the normalized form of a string by
|
||||
* calling {@link #first} and {@link #next}. Finally, you can use the
|
||||
* {@link #setIndex setIndex()} and {@link #getIndex} methods to perform
|
||||
* random-access iteration, which is very useful for searching.
|
||||
* <p>
|
||||
* <b>Note:</b> <tt>Normalizer</tt> objects behave like iterators and have
|
||||
* methods such as <tt>setIndex</tt>, <tt>next</tt>, <tt>previous</tt>, etc.
|
||||
* You should note that while the <tt>setIndex</tt> and <tt>getIndex</tt> refer
|
||||
* to indices in the underlying <em>input</em> text being processed, the
|
||||
* <tt>next</tt> and <tt>previous</tt> methods it iterate through characters
|
||||
* in the normalized <em>output</em>. This means that there is not
|
||||
* necessarily a one-to-one correspondence between characters returned
|
||||
* by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
|
||||
* returned from <tt>setIndex</tt> and <tt>getIndex</tt>. It is for this
|
||||
* reason that <tt>Normalizer</tt> does not implement the
|
||||
* {@link CharacterIterator} interface.
|
||||
* <p>
|
||||
* <b>Note:</b> <tt>Normalizer</tt> is currently based on version 2.1.8
|
||||
* of the <a href="http://www.unicode.org" target="unicode">Unicode Standard</a>.
|
||||
* It will be updated as later versions of Unicode are released. If you are
|
||||
* using this class on a JDK that supports an earlier version of Unicode, it
|
||||
* is possible that <tt>Normalizer</tt> may generate composed or dedecomposed
|
||||
* characters for which your JDK's {@link java.lang.Character} class does not
|
||||
* have any data.
|
||||
* <p>
|
||||
* @author Laura Werner, Mark Davis
|
||||
*/
|
||||
class U_COMMON_API Normalizer
|
||||
{
|
||||
|
||||
public:
|
||||
// This tells us what the bits in the "mode" mean.
|
||||
enum {
|
||||
COMPAT_BIT = 1,
|
||||
DECOMP_BIT = 2,
|
||||
COMPOSE_BIT = 4
|
||||
};
|
||||
|
||||
|
||||
|
||||
/** */
|
||||
static const UChar DONE;
|
||||
|
||||
/** The mode of a Normalizer object */
|
||||
enum EMode {
|
||||
|
||||
/**
|
||||
* Null operation for use with the {@link #Normalizer constructors}
|
||||
* and the static {@link #normalize normalize} method. This value tells
|
||||
* the <tt>Normalizer</tt> to do nothing but return unprocessed characters
|
||||
* from the underlying String or CharacterIterator. If you have code which
|
||||
* requires raw text at some times and normalized text at others, you can
|
||||
* use <tt>NO_OP</tt> for the cases where you want raw text, rather
|
||||
* than having a separate code path that bypasses <tt>Normalizer</tt>
|
||||
* altogether.
|
||||
* <p>
|
||||
* @see #setMode
|
||||
*/
|
||||
NO_OP = 0,
|
||||
|
||||
/**
|
||||
* Canonical decomposition followed by canonical composition. Used with
|
||||
* the {@link #Normalizer constructors} and the static
|
||||
* {@link #normalize normalize}
|
||||
* method to determine the operation to be performed.
|
||||
* <p>
|
||||
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
|
||||
* off, this operation produces output that is in
|
||||
* <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
|
||||
* Form</a>
|
||||
* <b>C</b>.
|
||||
* <p>
|
||||
* @see #setMode
|
||||
*/
|
||||
COMPOSE = COMPOSE_BIT,
|
||||
|
||||
/**
|
||||
* Compatibility decomposition followed by canonical composition.
|
||||
* Used with the {@link #Normalizer constructors} and the static
|
||||
* {@link #normalize normalize} method to determine the operation to be
|
||||
* performed.
|
||||
* <p>
|
||||
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
|
||||
* off, this operation produces output that is in
|
||||
* <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
|
||||
* Form</a>
|
||||
* <b>KC</b>.
|
||||
* <p>
|
||||
* @see #setMode
|
||||
*/
|
||||
COMPOSE_COMPAT = COMPOSE_BIT | COMPAT_BIT,
|
||||
|
||||
/**
|
||||
* Canonical decomposition. This value is passed to the
|
||||
* {@link #Normalizer constructors} and the static
|
||||
* {@link #normalize normalize}
|
||||
* method to determine the operation to be performed.
|
||||
* <p>
|
||||
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
|
||||
* off, this operation produces output that is in
|
||||
* <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
|
||||
* Form</a>
|
||||
* <b>D</b>.
|
||||
* <p>
|
||||
* @see #setMode
|
||||
*/
|
||||
DECOMP = DECOMP_BIT,
|
||||
|
||||
/**
|
||||
* Compatibility decomposition. This value is passed to the
|
||||
* {@link #Normalizer constructors} and the static
|
||||
* {@link #normalize normalize}
|
||||
* method to determine the operation to be performed.
|
||||
* <p>
|
||||
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
|
||||
* off, this operation produces output that is in
|
||||
* <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
|
||||
* Form</a>
|
||||
* <b>KD</b>.
|
||||
* <p>
|
||||
* @see #setMode
|
||||
*/
|
||||
DECOMP_COMPAT = DECOMP_BIT | COMPAT_BIT
|
||||
};
|
||||
|
||||
/** The options for a Normalizer object */
|
||||
enum {
|
||||
|
||||
/**
|
||||
* Option to disable Hangul/Jamo composition and decomposition.
|
||||
* This option applies to Korean text,
|
||||
* which can be represented either in the Jamo alphabet or in Hangul
|
||||
* characters, which are really just two or three Jamo combined
|
||||
* into one visual glyph. Since Jamo takes up more storage space than
|
||||
* Hangul, applications that process only Hangul text may wish to turn
|
||||
* this option on when decomposing text.
|
||||
* <p>
|
||||
* The Unicode standard treates Hangul to Jamo conversion as a
|
||||
* canonical decomposition, so this option must be turned <b>off</b> if you
|
||||
* wish to transform strings into one of the standard
|
||||
* <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
|
||||
* Unicode Normalization Forms</a>.
|
||||
* <p>
|
||||
* @see #setOption
|
||||
*/
|
||||
IGNORE_HANGUL = 0x001
|
||||
};
|
||||
|
||||
// Constructors
|
||||
|
||||
/**
|
||||
* Creates a new <tt>Normalizer</tt> object for iterating over the
|
||||
* normalized form of a given string.
|
||||
* <p>
|
||||
* @param str The string to be normalized. The normalization
|
||||
* will start at the beginning of the string.
|
||||
*
|
||||
* @param mode The normalization mode.
|
||||
*/
|
||||
Normalizer(const UnicodeString& str,
|
||||
EMode mode);
|
||||
|
||||
/**
|
||||
* Creates a new <tt>Normalizer</tt> object for iterating over the
|
||||
* normalized form of a given string.
|
||||
* <p>
|
||||
* The <tt>options</tt> parameter specifies which optional
|
||||
* <tt>Normalizer</tt> features are to be enabled for this object.
|
||||
* <p>
|
||||
* @param str The string to be normalized. The normalization
|
||||
* will start at the beginning of the string.
|
||||
*
|
||||
* @param mode The normalization mode.
|
||||
*
|
||||
* @param opt Any optional features to be enabled.
|
||||
* Currently the only available option is {@link #IGNORE_HANGUL}
|
||||
* If you want the default behavior corresponding to one of the
|
||||
* standard Unicode Normalization Forms, use 0 for this argument
|
||||
*/
|
||||
Normalizer(const UnicodeString& str,
|
||||
EMode mode,
|
||||
int32_t opt);
|
||||
|
||||
/**
|
||||
* Creates a new <tt>Normalizer</tt> object for iterating over the
|
||||
* normalized form of a given UChar string.
|
||||
* <p>
|
||||
* @param str The string to be normalized. The normalization
|
||||
* will start at the beginning of the string.
|
||||
*
|
||||
* @param length Lenght of the string
|
||||
*
|
||||
*/
|
||||
Normalizer(const UChar* str,
|
||||
int32_t length,
|
||||
EMode mode);
|
||||
|
||||
/**
|
||||
* Creates a new <tt>Normalizer</tt> object for iterating over the
|
||||
* normalized form of the given text.
|
||||
* <p>
|
||||
* @param iter The input text to be normalized. The normalization
|
||||
* will start at the beginning of the string.
|
||||
*
|
||||
* @param mode The normalization mode.
|
||||
*
|
||||
*/
|
||||
Normalizer(const CharacterIterator& iter,
|
||||
EMode mode);
|
||||
|
||||
/**
|
||||
* Creates a new <tt>Normalizer</tt> object for iterating over the
|
||||
* normalized form of the given text.
|
||||
* <p>
|
||||
* @param iter The input text to be normalized. The normalization
|
||||
* will start at the beginning of the string.
|
||||
*
|
||||
* @param mode The normalization mode.
|
||||
*
|
||||
* @param opt Any optional features to be enabled.
|
||||
* Currently the only available option is {@link #IGNORE_HANGUL}
|
||||
* If you want the default behavior corresponding to one of the
|
||||
* standard Unicode Normalization Forms, use 0 for this argument
|
||||
*/
|
||||
Normalizer(const CharacterIterator& iter,
|
||||
EMode mode,
|
||||
int32_t opt);
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
*/
|
||||
Normalizer(const Normalizer& copy);
|
||||
|
||||
/**
|
||||
* Destructor
|
||||
*/
|
||||
~Normalizer();
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Static utility methods
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Normalizes a <tt>String</tt> using the given normalization operation.
|
||||
* <p>
|
||||
* The <tt>options</tt> parameter specifies which optional
|
||||
* <tt>Normalizer</tt> features are to be enabled for this operation.
|
||||
* Currently the only available option is {@link #IGNORE_HANGUL}.
|
||||
* If you want the default behavior corresponding to one of the standard
|
||||
* Unicode Normalization Forms, use 0 for this argument.
|
||||
* <p>
|
||||
* @param source the input string to be normalized.
|
||||
*
|
||||
* @param aMode the normalization mode
|
||||
*
|
||||
* @param options the optional features to be enabled.
|
||||
*
|
||||
* @param result The normalized string (on output).
|
||||
*
|
||||
* @param status The error code.
|
||||
*/
|
||||
static void normalize(const UnicodeString& source,
|
||||
EMode mode,
|
||||
int32_t options,
|
||||
UnicodeString& result,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Compose a <tt>String</tt>.
|
||||
* <p>
|
||||
* The <tt>options</tt> parameter specifies which optional
|
||||
* <tt>Normalizer</tt> features are to be enabled for this operation.
|
||||
* Currently the only available option is {@link #IGNORE_HANGUL}.
|
||||
* If you want the default behavior corresponding
|
||||
* to Unicode Normalization Form <b>C</b> or <b>KC</b>,
|
||||
* use 0 for this argument.
|
||||
* <p>
|
||||
* @param source the string to be composed.
|
||||
*
|
||||
* @param compat Perform compatibility decomposition before composition.
|
||||
* If this argument is <tt>false</tt>, only canonical
|
||||
* decomposition will be performed.
|
||||
*
|
||||
* @param options the optional features to be enabled.
|
||||
*
|
||||
* @param result The composed string (on output).
|
||||
*
|
||||
* @param status The error code.
|
||||
*/
|
||||
static void compose(const UnicodeString& source,
|
||||
bool_t compat,
|
||||
int32_t options,
|
||||
UnicodeString& result,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Static method to decompose a <tt>String</tt>.
|
||||
* <p>
|
||||
* The <tt>options</tt> parameter specifies which optional
|
||||
* <tt>Normalizer</tt> features are to be enabled for this operation.
|
||||
* Currently the only available option is {@link #IGNORE_HANGUL}.
|
||||
* The desired options should be OR'ed together to determine the value
|
||||
* of this argument. If you want the default behavior corresponding
|
||||
* to Unicode Normalization Form <b>D</b> or <b>KD</b>,
|
||||
* use 0 for this argument.
|
||||
* <p>
|
||||
* @param str the string to be decomposed.
|
||||
*
|
||||
* @param compat Perform compatibility decomposition.
|
||||
* If this argument is <tt>false</tt>, only canonical
|
||||
* decomposition will be performed.
|
||||
*
|
||||
* @param options the optional features to be enabled.
|
||||
*
|
||||
* @param result The composed string (on output).
|
||||
*
|
||||
* @param status The error code.
|
||||
*
|
||||
* @return the decomposed string.
|
||||
*/
|
||||
static void decompose(const UnicodeString& source,
|
||||
bool_t compat,
|
||||
int32_t options,
|
||||
UnicodeString& result,
|
||||
UErrorCode &status);
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// CharacterIterator overrides
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Return the current character in the normalized text.
|
||||
*/
|
||||
UChar current(void) const;
|
||||
|
||||
/**
|
||||
* Return the first character in the normalized text. This resets
|
||||
* the <tt>Normalizer's</tt> position to the beginning of the text.
|
||||
*/
|
||||
UChar first(void);
|
||||
|
||||
/**
|
||||
* Return the last character in the normalized text. This resets
|
||||
* the <tt>Normalizer's</tt> position to be just before the
|
||||
* the input text corresponding to that normalized character.
|
||||
*/
|
||||
UChar last(void);
|
||||
|
||||
/**
|
||||
* Return the next character in the normalized text and advance
|
||||
* the iteration position by one. If the end
|
||||
* of the text has already been reached, {@link #DONE} is returned.
|
||||
*/
|
||||
UChar next(void);
|
||||
|
||||
/**
|
||||
* Return the previous character in the normalized text and decrement
|
||||
* the iteration position by one. If the beginning
|
||||
* of the text has already been reached, {@link #DONE} is returned.
|
||||
*/
|
||||
UChar previous(void);
|
||||
|
||||
/**
|
||||
* Set the iteration position in the input text that is being normalized
|
||||
* and return the first normalized character at that position.
|
||||
* <p>
|
||||
* <b>Note:</b> This method sets the position in the <em>input</em> text,
|
||||
* while {@link #next} and {@link #previous} iterate through characters
|
||||
* in the normalized <em>output</em>. This means that there is not
|
||||
* necessarily a one-to-one correspondence between characters returned
|
||||
* by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
|
||||
* returned from <tt>setIndex</tt> and {@link #getIndex}.
|
||||
* <p>
|
||||
* @param index the desired index in the input text.
|
||||
*
|
||||
* @return the first normalized character that is the result of iterating
|
||||
* forward starting at the given index.
|
||||
*
|
||||
* @throws IllegalArgumentException if the given index is less than
|
||||
* {@link #getBeginIndex} or greater than {@link #getEndIndex}.
|
||||
*/
|
||||
UChar setIndex(UTextOffset index);
|
||||
|
||||
/**
|
||||
* Reset the iterator so that it is in the same state that it was just after
|
||||
* it was constructed. A subsequent call to <tt>next</tt> will return the first
|
||||
* character in the normalized text. In contrast, calling <tt>setIndex(0)</tt> followed
|
||||
* by <tt>next</tt> will return the <em>second</em> character in the normalized text,
|
||||
* because <tt>setIndex</tt> itself returns the first character
|
||||
*/
|
||||
void reset(void);
|
||||
|
||||
/**
|
||||
* Retrieve the current iteration position in the input text that is
|
||||
* being normalized. This method is useful in applications such as
|
||||
* searching, where you need to be able to determine the position in
|
||||
* the input text that corresponds to a given normalized output character.
|
||||
* <p>
|
||||
* <b>Note:</b> This method sets the position in the <em>input</em>, while
|
||||
* {@link #next} and {@link #previous} iterate through characters in the
|
||||
* <em>output</em>. This means that there is not necessarily a one-to-one
|
||||
* correspondence between characters returned by <tt>next</tt> and
|
||||
* <tt>previous</tt> and the indices passed to and returned from
|
||||
* <tt>setIndex</tt> and {@link #getIndex}.
|
||||
*
|
||||
*/
|
||||
UTextOffset getIndex(void) const;
|
||||
|
||||
/**
|
||||
* Retrieve the index of the start of the input text. This is the begin index
|
||||
* of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
|
||||
* over which this <tt>Normalizer</tt> is iterating
|
||||
*/
|
||||
UTextOffset startIndex(void) const;
|
||||
|
||||
/**
|
||||
* Retrieve the index of the end of the input text. This is the end index
|
||||
* of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
|
||||
* over which this <tt>Normalizer</tt> is iterating
|
||||
*/
|
||||
UTextOffset endIndex(void) const;
|
||||
|
||||
|
||||
/**
|
||||
* Returns true when both iterators refer to the same character in the same
|
||||
* character-storage object.
|
||||
*/
|
||||
// virtual bool_t operator==(const CharacterIterator& that) const;
|
||||
bool_t operator==(const Normalizer& that) const;
|
||||
inline bool_t operator!=(const Normalizer& that) const;
|
||||
|
||||
/**
|
||||
* Returns a pointer to a new Normalizer that is a clone of this one.
|
||||
* The caller is responsible for deleting the new clone.
|
||||
*/
|
||||
Normalizer* clone(void) const;
|
||||
|
||||
/**
|
||||
* Generates a hash code for this iterator.
|
||||
*/
|
||||
int32_t hashCode(void) const;
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Property access methods
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Set the normalization mode for this object.
|
||||
* <p>
|
||||
* <b>Note:</b>If the normalization mode is changed while iterating
|
||||
* over a string, calls to {@link #next} and {@link #previous} may
|
||||
* return previously buffers characters in the old normalization mode
|
||||
* until the iteration is able to re-sync at the next base character.
|
||||
* It is safest to call {@link #setText setText()}, {@link #first},
|
||||
* {@link #last}, etc. after calling <tt>setMode</tt>.
|
||||
* <p>
|
||||
* @param newMode the new mode for this <tt>Normalizer</tt>.
|
||||
* The supported modes are:
|
||||
* <ul>
|
||||
* <li>{@link #COMPOSE} - Unicode canonical decompositiion
|
||||
* followed by canonical composition.
|
||||
* <li>{@link #COMPOSE_COMPAT} - Unicode compatibility decompositiion
|
||||
* follwed by canonical composition.
|
||||
* <li>{@link #DECOMP} - Unicode canonical decomposition
|
||||
* <li>{@link #DECOMP_COMPAT} - Unicode compatibility decomposition.
|
||||
* <li>{@link #NO_OP} - Do nothing but return characters
|
||||
* from the underlying input text.
|
||||
* </ul>
|
||||
*
|
||||
* @see #getMode
|
||||
*/
|
||||
void setMode(EMode newMode);
|
||||
|
||||
/**
|
||||
* Return the basic operation performed by this <tt>Normalizer</tt>
|
||||
*
|
||||
* @see #setMode
|
||||
*/
|
||||
EMode getMode(void) const;
|
||||
|
||||
/**
|
||||
* Set options that affect this <tt>Normalizer</tt>'s operation.
|
||||
* Options do not change the basic composition or decomposition operation
|
||||
* that is being performed , but they control whether
|
||||
* certain optional portions of the operation are done.
|
||||
* Currently the only available option is:
|
||||
* <p>
|
||||
* <ul>
|
||||
* <li>{@link #IGNORE_HANGUL} - Do not decompose Hangul syllables into the
|
||||
* Jamo alphabet and vice-versa. This option is off by default
|
||||
* (<i>i.e.</i> Hangul processing is enabled) since the Unicode
|
||||
* standard specifies that Hangul to Jamo is a canonical decomposition.
|
||||
* For any of the standard Unicode Normalization
|
||||
* Forms, you should leave this option off.
|
||||
* </ul>
|
||||
* <p>
|
||||
* @param option the option whose value is to be set.
|
||||
* @param value the new setting for the option. Use <tt>true</tt> to
|
||||
* turn the option on and <tt>false</tt> to turn it off.
|
||||
*
|
||||
* @see #getOption
|
||||
*/
|
||||
void setOption(int32_t option,
|
||||
bool_t value);
|
||||
|
||||
/**
|
||||
* Determine whether an option is turned on or off.
|
||||
* <p>
|
||||
* @see #setOption
|
||||
*/
|
||||
bool_t getOption(int32_t option) const;
|
||||
|
||||
/**
|
||||
* Set the input text over which this <tt>Normalizer</tt> will iterate.
|
||||
* The iteration position is set to the beginning.
|
||||
*/
|
||||
void setText(const UnicodeString& newText,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Set the input text over which this <tt>Normalizer</tt> will iterate.
|
||||
* The iteration position is set to the beginning.
|
||||
*/
|
||||
void setText(const CharacterIterator& newText,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
* Set the input text over which this <tt>Normalizer</tt> will iterate.
|
||||
* The iteration position is set to the beginning.
|
||||
*/
|
||||
void setText(const UChar* newText,
|
||||
int32_t length,
|
||||
UErrorCode &status);
|
||||
/**
|
||||
* Copies the text under iteration into the UnicodeString referred to by
|
||||
* "result".
|
||||
* @param result Receives a copy of the text under iteration.
|
||||
*/
|
||||
void getText(UnicodeString& result);
|
||||
|
||||
private:
|
||||
// Private utility methods for iteration
|
||||
// For documentation, see the source code
|
||||
UChar nextCompose(void);
|
||||
UChar prevCompose(void);
|
||||
UChar nextDecomp(void);
|
||||
UChar prevDecomp(void);
|
||||
|
||||
UChar curForward(void);
|
||||
UChar curBackward(void);
|
||||
|
||||
void init(CharacterIterator* iter,
|
||||
EMode mode,
|
||||
int32_t option);
|
||||
void initBuffer(void);
|
||||
void clearBuffer(void);
|
||||
|
||||
// Utilities used by Compose
|
||||
static void bubbleAppend(UnicodeString& target,
|
||||
UChar ch,
|
||||
uint32_t cclass);
|
||||
static uint32_t getComposeClass(UChar ch);
|
||||
static uint16_t composeLookup(UChar ch);
|
||||
static uint16_t composeAction(uint16_t baseIndex,
|
||||
uint16_t comIndex);
|
||||
static void explode(UnicodeString& target,
|
||||
uint16_t index);
|
||||
static UChar pairExplode(UnicodeString& target,
|
||||
uint16_t action);
|
||||
|
||||
// Utilities used by Decompose
|
||||
static void fixCanonical(UnicodeString& result); // Reorders combining marks
|
||||
static uint8_t getClass(UChar ch); // Gets char's combining class
|
||||
|
||||
// Other static utility methods
|
||||
static void doAppend(const UChar source[],
|
||||
uint16_t offset,
|
||||
UnicodeString& dest);
|
||||
static void doInsert(const UChar source[],
|
||||
uint16_t offset,
|
||||
UnicodeString& dest,
|
||||
UTextOffset pos);
|
||||
|
||||
static void hangulToJamo(UChar ch,
|
||||
UnicodeString& result,
|
||||
uint16_t decompLimit);
|
||||
static void jamoAppend(UChar ch,
|
||||
uint16_t decompLimit,
|
||||
UnicodeString& dest);
|
||||
static void jamoToHangul(UnicodeString& buffer,
|
||||
UTextOffset start);
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// Private data
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
EMode fMode;
|
||||
int32_t fOptions;
|
||||
int16_t minDecomp;
|
||||
|
||||
// The input text and our position in it
|
||||
CharacterIterator* text;
|
||||
|
||||
// A buffer for holding intermediate results
|
||||
UnicodeString buffer;
|
||||
UTextOffset bufferPos;
|
||||
UTextOffset bufferLimit;
|
||||
UChar currentChar;
|
||||
|
||||
// Another buffer for use during iterative composition
|
||||
UnicodeString explodeBuf;
|
||||
|
||||
enum {
|
||||
EMPTY = -1,
|
||||
STR_INDEX_SHIFT = 2, //Must agree with the constants used in NormalizerBuilder
|
||||
STR_LENGTH_MASK = 0x0003
|
||||
};
|
||||
|
||||
static const UChar HANGUL_BASE;
|
||||
static const UChar HANGUL_LIMIT;
|
||||
static const UChar JAMO_LBASE;
|
||||
static const UChar JAMO_VBASE;
|
||||
static const UChar JAMO_TBASE;
|
||||
static const int16_t JAMO_LCOUNT;
|
||||
static const int16_t JAMO_VCOUNT;
|
||||
static const int16_t JAMO_TCOUNT;
|
||||
static const int16_t JAMO_NCOUNT;
|
||||
|
||||
friend class ComposedCharIter;
|
||||
};
|
||||
|
||||
inline bool_t
|
||||
Normalizer::operator!= (const Normalizer& other) const
|
||||
{ return ! operator==(other); }
|
||||
|
||||
#endif // _NORMLZR
|
||||
|
||||
|
||||
|
||||
|
90
icu4c/source/common/unicode/pos2.h
Normal file
90
icu4c/source/common/unicode/pos2.h
Normal file
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* FILE NAME : platform.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/13/98 nos Creation (content moved here from ptypes.h).
|
||||
* 03/02/99 stephen Added AS400 support.
|
||||
* 03/30/99 stephen Added Linux support.
|
||||
* 04/13/99 stephen Reworked for autoconf.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/* Define the platform we're on. */
|
||||
#ifndef OS2
|
||||
#define OS2 1
|
||||
#endif
|
||||
|
||||
/* Define whether inttypes.h is available */
|
||||
#define HAVE_INTTYPES_H 0
|
||||
|
||||
/* Determines whether specific types are available */
|
||||
#define HAVE_INT8_T 0
|
||||
#define HAVE_UINT8_T 0
|
||||
#define HAVE_INT16_T 0
|
||||
#define HAVE_UINT16_T 0
|
||||
#define HAVE_INT32_T 0
|
||||
#define HAVE_UINT32_T 0
|
||||
#define HAVE_BOOL_T 0
|
||||
|
||||
/* Determines the endianness of the platform */
|
||||
#define U_IS_BIG_ENDIAN 0
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Generic data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
/* If your platform does not have the <inttypes.h> header, you may
|
||||
need to edit the typedefs below. */
|
||||
#if HAVE_INTTYPES_H
|
||||
#include <inttypes.h>
|
||||
#else
|
||||
|
||||
#if ! HAVE_INT8_T
|
||||
typedef signed char int8_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_UINT8_T
|
||||
typedef unsigned char uint8_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_INT16_T
|
||||
typedef signed short int16_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_UINT16_T
|
||||
typedef unsigned short uint16_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_INT32_T
|
||||
typedef signed long int32_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_UINT32_T
|
||||
typedef unsigned long uint32_t;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#include <limits.h>
|
||||
#define T_INT32_MAX (LONG_MAX)
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Character data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
#define U_SIZEOF_WCHAR_T 2
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Symbol import-export control */
|
||||
/*===========================================================================*/
|
||||
|
||||
#define U_EXPORT
|
||||
#define U_EXPORT2
|
||||
#define U_IMPORT
|
92
icu4c/source/common/unicode/pos400.h
Normal file
92
icu4c/source/common/unicode/pos400.h
Normal file
|
@ -0,0 +1,92 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* FILE NAME : platform.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/13/98 nos Creation (content moved here from ptypes.h).
|
||||
* 03/02/99 stephen Added AS400 support.
|
||||
* 03/30/99 stephen Added Linux support.
|
||||
* 04/13/99 stephen Reworked for autoconf.
|
||||
* 09/21/99 barry Created new for OS/400 platform.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/* Define the platform we're on. */
|
||||
#ifndef OS400
|
||||
#define OS400
|
||||
#endif
|
||||
|
||||
/* Define whether inttypes.h is available */
|
||||
#define HAVE_INTTYPES_H 0
|
||||
|
||||
/* Determines whether specific types are available */
|
||||
#define HAVE_INT8_T 0
|
||||
#define HAVE_UINT8_T 0
|
||||
#define HAVE_INT16_T 0
|
||||
#define HAVE_UINT16_T 0
|
||||
#define HAVE_INT32_T 0
|
||||
#define HAVE_UINT32_T 0
|
||||
#define HAVE_BOOL_T 0
|
||||
|
||||
/* Determines the endianness of the platform */
|
||||
#define U_IS_BIG_ENDIAN 1
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Generic data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
/* If your platform does not have the <inttypes.h> header, you may
|
||||
need to edit the typedefs below. */
|
||||
#if HAVE_INTTYPES_H
|
||||
#include <inttypes.h>
|
||||
#else
|
||||
|
||||
#if ! HAVE_INT8_T
|
||||
typedef signed char int8_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_UINT8_T
|
||||
typedef unsigned char uint8_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_INT16_T
|
||||
typedef signed short int16_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_UINT16_T
|
||||
typedef unsigned short uint16_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_INT32_T
|
||||
typedef signed long int32_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_UINT32_T
|
||||
typedef unsigned long uint32_t;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#include <limits.h>
|
||||
#define T_INT32_MAX (LONG_MAX)
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Character data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
#define U_CHARSET_FAMILY 1
|
||||
#define U_SIZEOF_WCHAR_T 2
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Symbol import-export control */
|
||||
/*===========================================================================*/
|
||||
|
||||
#define U_EXPORT
|
||||
#define U_EXPORT2
|
||||
#define U_IMPORT
|
215
icu4c/source/common/unicode/putil.h
Normal file
215
icu4c/source/common/unicode/putil.h
Normal file
|
@ -0,0 +1,215 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* FILE NAME : putil.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/14/98 nos Creation (content moved here from utypes.h).
|
||||
* 06/17/99 erm Added IEEE_754
|
||||
* 07/22/98 stephen Added IEEEremainder, max, min, trunc
|
||||
* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
|
||||
* 08/24/98 stephen Added longBitsFromDouble
|
||||
* 03/02/99 stephen Removed openFile(). Added AS400 support.
|
||||
* 04/15/99 stephen Converted to C
|
||||
* 11/15/99 helena Integrated S/390 changes for IEEE support.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef PUTIL_H
|
||||
#define PUTIL_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/* Define this if your platform supports IEEE 754 floating point */
|
||||
#define IEEE_754
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Platform utilities */
|
||||
/*===========================================================================*/
|
||||
|
||||
/**
|
||||
* Platform utilities isolates the platform dependencies of the
|
||||
* libarary. For each platform which this code is ported to, these
|
||||
* functions may have to be re-implemented. */
|
||||
|
||||
/* Floating point utilities */
|
||||
U_CAPI bool_t U_EXPORT2 uprv_isNaN(double);
|
||||
U_CAPI bool_t U_EXPORT2 uprv_isInfinite(double);
|
||||
U_CAPI bool_t U_EXPORT2 uprv_isPositiveInfinity(double);
|
||||
U_CAPI bool_t U_EXPORT2 uprv_isNegativeInfinity(double);
|
||||
U_CAPI double U_EXPORT2 uprv_getNaN(void);
|
||||
U_CAPI double U_EXPORT2 uprv_getInfinity(void);
|
||||
|
||||
U_CAPI double U_EXPORT2 uprv_floor(double x);
|
||||
U_CAPI double U_EXPORT2 uprv_ceil(double x);
|
||||
U_CAPI double U_EXPORT2 uprv_fabs(double x);
|
||||
U_CAPI double U_EXPORT2 uprv_modf(double x, double* y);
|
||||
U_CAPI double U_EXPORT2 uprv_fmod(double x, double y);
|
||||
U_CAPI double U_EXPORT2 uprv_pow10(int32_t x);
|
||||
U_CAPI double U_EXPORT2 uprv_IEEEremainder(double x, double y);
|
||||
U_CAPI double U_EXPORT2 uprv_fmax(double x, double y);
|
||||
U_CAPI double U_EXPORT2 uprv_fmin(double x, double y);
|
||||
U_CAPI int32_t U_EXPORT2 uprv_max(int32_t x, int32_t y);
|
||||
U_CAPI int32_t U_EXPORT2 uprv_min(int32_t x, int32_t y);
|
||||
U_CAPI double U_EXPORT2 uprv_trunc(double d);
|
||||
U_CAPI void U_EXPORT2 uprv_longBitsFromDouble(double d, int32_t *hi, uint32_t *lo);
|
||||
#if U_IS_BIG_ENDIAN
|
||||
# define uprv_isNegative(number) (*((signed char *)&(number))<0)
|
||||
#else
|
||||
# define uprv_isNegative(number) (*((signed char *)&(number)+sizeof(number)-1)<0)
|
||||
#endif
|
||||
|
||||
/* Conversion from a digit to the character with radix base from 2-19 */
|
||||
#ifndef OS390
|
||||
#define T_CString_itosOffset(a) a<=9?(0x30+a):(0x30+a+7)
|
||||
#else
|
||||
#define T_CString_itosOffset(a) a<=9?(0xF0+a):(0xC1+a-10) /* C1 is EBCDIC 'A' */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Return the floor of the log base 10 of a given double.
|
||||
* This method compensates for inaccuracies which arise naturally when
|
||||
* computing logs, and always gives the correct value. The parameter
|
||||
* must be positive and finite.
|
||||
* (Thanks to Alan Liu for supplying this function.)
|
||||
*/
|
||||
/**
|
||||
* Returns the common log of the double value d.
|
||||
*
|
||||
* @param d the double value to apply the common log function for.
|
||||
* @return the log of value d.
|
||||
*/
|
||||
U_CAPI int16_t U_EXPORT2 uprv_log10(double d);
|
||||
|
||||
/**
|
||||
* Returns the number of digits after the decimal point in a double number x.
|
||||
*
|
||||
* @param x the double number
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2 uprv_digitsAfterDecimal(double x);
|
||||
|
||||
/**
|
||||
* Time zone utilities
|
||||
*
|
||||
* Wrappers for C runtime library functions relating to timezones.
|
||||
* The t_tzset() function (similar to tzset) uses the current setting
|
||||
* of the environment variable TZ to assign values to three global
|
||||
* variables: daylight, timezone, and tzname. These variables have the
|
||||
* following meanings, and are declared in <time.h>.
|
||||
*
|
||||
* daylight Nonzero if daylight-saving-time zone (DST) is specified
|
||||
* in TZ; otherwise, 0. Default value is 1.
|
||||
* timezone Difference in seconds between coordinated universal
|
||||
* time and local time. E.g., -28,800 for PST (GMT-8hrs)
|
||||
* tzname(0) Three-letter time-zone name derived from TZ environment
|
||||
* variable. E.g., "PST".
|
||||
* tzname(1) Three-letter DST zone name derived from TZ environment
|
||||
* variable. E.g., "PDT". If DST zone is omitted from TZ,
|
||||
* tzname(1) is an empty string.
|
||||
*
|
||||
* Notes: For example, to set the TZ environment variable to correspond
|
||||
* to the current time zone in Germany, you can use one of the
|
||||
* following statements:
|
||||
*
|
||||
* set TZ=GST1GDT
|
||||
* set TZ=GST+1GDT
|
||||
*
|
||||
* If the TZ value is not set, t_tzset() attempts to use the time zone
|
||||
* information specified by the operating system. Under Windows NT
|
||||
* and Windows 95, this information is specified in the Control Panel’s
|
||||
* Date/Time application.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 uprv_tzset(void);
|
||||
U_CAPI int32_t U_EXPORT2 uprv_timezone(void);
|
||||
U_CAPI char* U_EXPORT2 uprv_tzname(int index);
|
||||
|
||||
/* Get UTC (GMT) time measured in seconds since 0:00 on 1/1/70. */
|
||||
U_CAPI int32_t U_EXPORT2 uprv_getUTCtime(void);
|
||||
|
||||
/* Return the data directory for this platform. */
|
||||
U_CAPI const char* U_EXPORT2 u_getDataDirectory(void);
|
||||
|
||||
/* Set the data directory. */
|
||||
U_CAPI void U_EXPORT2 u_setDataDirectory(const char *directory);
|
||||
|
||||
/* Return the default codepage for this platform and locale */
|
||||
U_CAPI const char* U_EXPORT2 uprv_getDefaultCodepage(void);
|
||||
|
||||
/* Return the default locale ID string by querying ths system, or
|
||||
zero if one cannot be found. */
|
||||
U_CAPI const char* U_EXPORT2 uprv_getDefaultLocaleID(void);
|
||||
|
||||
/*
|
||||
* Finds the least double greater than d (if positive == true),
|
||||
* or the greatest double less than d (if positive == false).
|
||||
*
|
||||
* This is a special purpose function defined by the ChoiceFormat API
|
||||
* documentation.
|
||||
* It is not a general purpose function and not defined for NaN or Infinity
|
||||
*/
|
||||
U_CAPI double U_EXPORT2 uprv_nextDouble(double d, bool_t positive);
|
||||
|
||||
/**
|
||||
* Filesystem file and path separator characters.
|
||||
* Example: '/' and ':' on Unix, '\\' and ';' on Windows.
|
||||
*/
|
||||
#ifdef XP_MAC
|
||||
# define U_FILE_SEP_CHAR ':'
|
||||
# define U_PATH_SEP_CHAR ';'
|
||||
# define U_FILE_SEP_STRING ":"
|
||||
# define U_PATH_SEP_STRING ";"
|
||||
#elif defined(WIN32) || defined(OS2)
|
||||
# define U_FILE_SEP_CHAR '\\'
|
||||
# define U_PATH_SEP_CHAR ';'
|
||||
# define U_FILE_SEP_STRING "\\"
|
||||
# define U_PATH_SEP_STRING ";"
|
||||
#else
|
||||
# define U_FILE_SEP_CHAR '/'
|
||||
# define U_PATH_SEP_CHAR ':'
|
||||
# define U_FILE_SEP_STRING "/"
|
||||
# define U_PATH_SEP_STRING ":"
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Convert char characters to UChar characters.
|
||||
* This utility function is useful only for "invariant characters"
|
||||
* that are encoded in the platform default encoding.
|
||||
* They are a small, constant subset of the encoding and include
|
||||
* just the latin letters, digits, and some punctuation.
|
||||
* For details, see utypes.h .
|
||||
*
|
||||
* @param cs Input string, points to <code>length</code>
|
||||
* character bytes from a subset of the platform encoding.
|
||||
* @param us Output string, points to memory for <code>length</code>
|
||||
* Unicode characters.
|
||||
* @param length The number of characters to convert; this may
|
||||
* include the terminating <code>NUL</code>.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_charsToUChars(const char *cs, UChar *us, UTextOffset length);
|
||||
|
||||
/**
|
||||
* Convert UChar characters to char characters.
|
||||
* This utility function is useful only for "invariant characters"
|
||||
* that can be encoded in the platform default encoding.
|
||||
* They are a small, constant subset of the encoding and include
|
||||
* just the latin letters, digits, and some punctuation.
|
||||
* For details, see utypes.h .
|
||||
*
|
||||
* @param us Input string, points to <code>length</code>
|
||||
* Unicode characters that can be encoded with the
|
||||
* codepage-invariant subset of the platform encoding.
|
||||
* @param cs Output string, points to memory for <code>length</code>
|
||||
* character bytes.
|
||||
* @param length The number of characters to convert; this may
|
||||
* include the terminating <code>NUL</code>.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
u_UCharsToChars(const UChar *us, char *cs, UTextOffset length);
|
||||
|
||||
#endif
|
90
icu4c/source/common/unicode/pwin32.h
Normal file
90
icu4c/source/common/unicode/pwin32.h
Normal file
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1997-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* FILE NAME : platform.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/13/98 nos Creation (content moved here from ptypes.h).
|
||||
* 03/02/99 stephen Added AS400 support.
|
||||
* 03/30/99 stephen Added Linux support.
|
||||
* 04/13/99 stephen Reworked for autoconf.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
/* Define the platform we're on. */
|
||||
#ifndef WIN32
|
||||
#define WIN32
|
||||
#endif
|
||||
|
||||
/* Define whether inttypes.h is available */
|
||||
#define HAVE_INTTYPES_H 0
|
||||
|
||||
/* Determines whether specific types are available */
|
||||
#define HAVE_INT8_T 0
|
||||
#define HAVE_UINT8_T 0
|
||||
#define HAVE_INT16_T 0
|
||||
#define HAVE_UINT16_T 0
|
||||
#define HAVE_INT32_T 0
|
||||
#define HAVE_UINT32_T 0
|
||||
#define HAVE_BOOL_T 0
|
||||
|
||||
/* Determines the endianness of the platform */
|
||||
#define U_IS_BIG_ENDIAN 0
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Generic data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
/* If your platform does not have the <inttypes.h> header, you may
|
||||
need to edit the typedefs below. */
|
||||
#if HAVE_INTTYPES_H
|
||||
#include <inttypes.h>
|
||||
#else
|
||||
|
||||
#if ! HAVE_INT8_T
|
||||
typedef signed char int8_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_UINT8_T
|
||||
typedef unsigned char uint8_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_INT16_T
|
||||
typedef signed short int16_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_UINT16_T
|
||||
typedef unsigned short uint16_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_INT32_T
|
||||
typedef signed long int32_t;
|
||||
#endif
|
||||
|
||||
#if ! HAVE_UINT32_T
|
||||
typedef unsigned long uint32_t;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#include <limits.h>
|
||||
#define T_INT32_MAX (LONG_MAX)
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Character data types */
|
||||
/*===========================================================================*/
|
||||
|
||||
#define U_SIZEOF_WCHAR_T 2
|
||||
|
||||
/*===========================================================================*/
|
||||
/* Symbol import-export control */
|
||||
/*===========================================================================*/
|
||||
|
||||
#define U_EXPORT __declspec(dllexport)
|
||||
#define U_EXPORT2
|
||||
#define U_IMPORT __declspec(dllimport)
|
123
icu4c/source/common/unicode/rep.h
Normal file
123
icu4c/source/common/unicode/rep.h
Normal file
|
@ -0,0 +1,123 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
* 11/17/99 aliu Creation. Ported from java. Modified to
|
||||
* match current UnicodeString API. Forced
|
||||
* to use name "handleReplaceBetween" because
|
||||
* of existing methods in UnicodeString.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef REP_H
|
||||
#define REP_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
class UnicodeString;
|
||||
|
||||
/**
|
||||
* <code>Replaceable</code> is an abstract base class representing a
|
||||
* string of characters that supports the replacement of a range of
|
||||
* itself with a new string of characters. It is used by APIs that
|
||||
* change a piece of text while retaining style attributes. In other
|
||||
* words, an implicit aspect of the <code>Replaceable</code> API is
|
||||
* that during a replace operation, new characters take on the
|
||||
* attributes, if any, of the old characters. For example, if the
|
||||
* string "the <b>bold</b> font" has range (4, 8) replaced with
|
||||
* "strong", then it becomes "the <b>strong</b> font".
|
||||
*
|
||||
* <p><code>Replaceable</code> specifies ranges using an initial
|
||||
* offset and a limit offset. The range of characters thus specified
|
||||
* includes the characters at offset initial..limit-1. That is, the
|
||||
* start offset is inclusive, and the limit offset is exclusive.
|
||||
*
|
||||
* <p><code>Replaceable</code> also includes API to access characters
|
||||
* in the string: <code>length()</code>, <code>charAt()</code>, and
|
||||
* <code>extractBetween()</code>.
|
||||
*
|
||||
* @author Alan Liu
|
||||
*/
|
||||
class U_COMMON_API Replaceable {
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
*/
|
||||
virtual ~Replaceable();
|
||||
|
||||
/**
|
||||
* Return the number of characters in the text.
|
||||
* @return number of characters in text
|
||||
*/
|
||||
virtual int32_t length() const = 0;
|
||||
|
||||
/**
|
||||
* Return the character at the given offset into the text.
|
||||
* @param offset an integer between 0 and <code>length()</code>-1
|
||||
* inclusive
|
||||
* @return character of text at given offset
|
||||
*/
|
||||
virtual UChar charAt(UTextOffset offset) const = 0;
|
||||
|
||||
/**
|
||||
* Copy characters from this object into the destination character
|
||||
* array. The first character to be copied is at index
|
||||
* <code>srcStart</code>; the last character to be copied is at
|
||||
* index <code>srcLimit-1</code> (thus the total number of
|
||||
* characters to be copied is <code>srcLimit-srcStart</code>). The
|
||||
* characters are copied into the subarray of <code>dst</code>
|
||||
* starting at index <code>dstStart</code> and ending at index
|
||||
* <code>dstStart + (srcLimit-srcStart) - 1</code>.
|
||||
*
|
||||
* @param srcStart the beginning index to copy, inclusive; <code>0
|
||||
* <= srcStart <= srcLimit</code>.
|
||||
* @param srcLimit the ending index to copy, exclusive;
|
||||
* <code>srcStart <= srcLimit <= length()</code>.
|
||||
* @param dst the destination array.
|
||||
* @param dstStart the start offset in the destination array. */
|
||||
virtual void extractBetween(UTextOffset srcStart,
|
||||
UTextOffset srcLimit,
|
||||
UChar* dst,
|
||||
UTextOffset dstStart = 0) const = 0;
|
||||
|
||||
/**
|
||||
* Replace a substring of this object with the given text. If the
|
||||
* characters being replaced have attributes, the new characters
|
||||
* that replace them should be given the same attributes.
|
||||
*
|
||||
* @param start the beginning index, inclusive; <code>0 <= start
|
||||
* <= limit</code>.
|
||||
* @param limit the ending index, exclusive; <code>start <= limit
|
||||
* <= length()</code>.
|
||||
* @param text the text to replace characters <code>start</code>
|
||||
* to <code>limit - 1</code> */
|
||||
virtual void handleReplaceBetween(UTextOffset start,
|
||||
UTextOffset limit,
|
||||
const UnicodeString& text) = 0;
|
||||
// Note: All other methods in this class take the names of
|
||||
// existing UnicodeString methods. This method is the exception.
|
||||
// It is named differently because all replace methods of
|
||||
// UnicodeString return a UnicodeString&. The 'between' is
|
||||
// required in order to conform to the UnicodeString naming
|
||||
// convention; API taking start/length are named <operation>, and
|
||||
// those taking start/limit are named <operationBetween>. The
|
||||
// 'handle' is added because 'replaceBetween' and
|
||||
// 'doReplaceBetween' are already taken.
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* Default constructor.
|
||||
*/
|
||||
Replaceable();
|
||||
};
|
||||
|
||||
inline Replaceable::Replaceable() {}
|
||||
|
||||
inline Replaceable::~Replaceable() {}
|
||||
|
||||
#endif
|
633
icu4c/source/common/unicode/resbund.h
Normal file
633
icu4c/source/common/unicode/resbund.h
Normal file
|
@ -0,0 +1,633 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1996-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* File resbund.h
|
||||
*
|
||||
* CREATED BY
|
||||
* Richard Gillam
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 2/5/97 aliu Added scanForLocaleInFile. Added
|
||||
* constructor which attempts to read resource bundle
|
||||
* from a specific file, without searching other files.
|
||||
* 2/11/97 aliu Added UErrorCode return values to constructors. Fixed
|
||||
* infinite loops in scanForFile and scanForLocale.
|
||||
* Modified getRawResourceData to not delete storage in
|
||||
* localeData and resourceData which it doesn't own.
|
||||
* Added Mac compatibility #ifdefs for tellp() and
|
||||
* ios::nocreate.
|
||||
* 2/18/97 helena Updated with 100% documentation coverage.
|
||||
* 3/13/97 aliu Rewrote to load in entire resource bundle and store
|
||||
* it as a Hashtable of ResourceBundleData objects.
|
||||
* Added state table to govern parsing of files.
|
||||
* Modified to load locale index out of new file distinct
|
||||
* from default.txt.
|
||||
* 3/25/97 aliu Modified to support 2-d arrays, needed for timezone data.
|
||||
* Added support for custom file suffixes. Again, needed to
|
||||
* support timezone data.
|
||||
* 4/7/97 aliu Cleaned up.
|
||||
* 03/02/99 stephen Removed dependency on FILE*.
|
||||
* 03/29/99 helena Merged Bertrand and Stephen's changes.
|
||||
* 06/11/99 stephen Removed parsing of .txt files.
|
||||
* Reworked to use new binary format.
|
||||
* Cleaned up.
|
||||
* 06/14/99 stephen Removed methods taking a filename suffix.
|
||||
* 11/09/99 weiv Added getLocale(), fRealLocale, removed fRealLocaleID
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef RESBUND_H
|
||||
#define RESBUND_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/locid.h"
|
||||
#include <wchar.h>
|
||||
|
||||
|
||||
class RBHashtable;
|
||||
class ResourceBundleData;
|
||||
class ResourceBundleCache;
|
||||
class VisitedFileCache;
|
||||
#ifndef _FILESTRM
|
||||
typedef struct _FileStream FileStream;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* A class representing a collection of resource information pertaining to a given
|
||||
* locale. A resource bundle provides a way of accessing locale- specfic information in
|
||||
* a data file. You create a resource bundle that manages the resources for a given
|
||||
* locale and then ask it for individual resources.
|
||||
* <P>
|
||||
* The resource bundle file is a text (ASCII or Unicode) file with the format:
|
||||
* <pre>
|
||||
* . locale {
|
||||
* . tag1 {...}
|
||||
* . tag2 {...}
|
||||
* . }
|
||||
* </pre>
|
||||
* The tags are used to retrieve the data later. You may not have multiple instances of
|
||||
* the same tag.
|
||||
* <P>
|
||||
* Four data types are supported. These are solitary strings, comma-delimited lists of
|
||||
* strings, 2-dimensional arrays of strings, and tagged lists of strings.
|
||||
* <P>
|
||||
* Note that all data is textual. Adjacent strings are merged by the low-level
|
||||
* tokenizer, so that the following effects occur: foo bar, baz // 2 elements, "foo
|
||||
* bar", and "baz" "foo" "bar", baz // 2 elements, "foobar", and "baz" Note that a
|
||||
* single intervening space is added between merged strings, unless they are both double
|
||||
* quoted. This extends to more than two strings in a row.
|
||||
* <P>
|
||||
* Whitespace is ignored, as in a C source file.
|
||||
* <P>
|
||||
* Solitary strings have the format:
|
||||
* <pre>
|
||||
* . Tag { Data }
|
||||
* </pre>
|
||||
* This is indistinguishable from a comma-delimited list with only one element, and in
|
||||
* fact may be retrieved as such (as an array, or as element 0 or an array).
|
||||
* <P>
|
||||
* Comma-delimited lists have the format:
|
||||
* <pre>
|
||||
* . Tag { Data, Data, Data }
|
||||
* </pre>
|
||||
* Parsing is lenient; a final string, after the last element, is allowed.
|
||||
* <P>
|
||||
* Tagged lists have the format:
|
||||
* <pre>
|
||||
* . Tag { Subtag { Data } Subtag {Data} }
|
||||
* </pre>
|
||||
* Data is retrieved by specifying the subtag.
|
||||
* <P>
|
||||
* Two-dimensional arrays have the format:
|
||||
* <pre>
|
||||
* . TwoD {
|
||||
* . { r1c1, r1c2, ..., r1cm },
|
||||
* . { r2c1, r2c2, ..., r2cm },
|
||||
* . ...
|
||||
* . { rnc1, rnc2, ..., rncm }
|
||||
* . }
|
||||
* </pre>
|
||||
* where n is the number of rows, and m is the number of columns. Parsing is lenient (as
|
||||
* in other data types). A final comma is always allowed after the last element; either
|
||||
* the last string in a row, or the last row itself. Furthermore, since there is no
|
||||
* ambiguity, the commas between the rows are entirely optional. (However, if a comma is
|
||||
* present, there can only be one comma, no more.) It is possible to have zero columns,
|
||||
* as follows:
|
||||
* <pre>
|
||||
* . Odd { {} {} {} } // 3 x 0 array
|
||||
* </pre>
|
||||
* But it is impossible to have zero rows. The smallest array is thus a 1 x 0 array,
|
||||
* which looks like this:
|
||||
* <pre>
|
||||
* . Smallest { {} } // 1 x 0 array
|
||||
* </pre>
|
||||
* The array must be strictly rectangular; that is, each row must have the same number
|
||||
* of elements.
|
||||
* <P>
|
||||
* This is an example for using a possible custom resource:
|
||||
* <pre>
|
||||
* . Locale currentLocale;
|
||||
* . UErrorCode success = U_ZERO_ERROR;
|
||||
* . ResourceBundle myResources("MyResources", currentLocale, success );
|
||||
* .
|
||||
* . UnicodeString button1Title, button2Title;
|
||||
* . myResources.getString("OkKey", button1Title, success );
|
||||
* . myResources.getString("CancelKey", button2Title, success );
|
||||
* </pre>
|
||||
*/
|
||||
class U_COMMON_API ResourceBundle {
|
||||
public:
|
||||
/**
|
||||
* Constructor
|
||||
*
|
||||
* @param path This is a full pathname in the platform-specific format for the
|
||||
* directory containing the resource data files we want to load
|
||||
* resources from. We use locale IDs to generate filenames, and the
|
||||
* filenames have this string prepended to them before being passed
|
||||
* to the C++ I/O functions. Therefore, this string must always end
|
||||
* with a directory delimiter (whatever that is for the target OS)
|
||||
* for this class to work correctly.
|
||||
* @param locale This is the locale this resource bundle is for. To get resources
|
||||
* for the French locale, for example, you would create a
|
||||
* ResourceBundle passing Locale::FRENCH for the "locale" parameter,
|
||||
* and all subsequent calls to that resource bundle will return
|
||||
* resources that pertain to the French locale. If the caller doesn't
|
||||
* pass a locale parameter, the default locale for the system (as
|
||||
* returned by Locale::getDefault()) will be used.
|
||||
* The UErrorCode& err parameter is used to return status information to the user. To
|
||||
* check whether the construction succeeded or not, you should check the value of
|
||||
* U_SUCCESS(err). If you wish more detailed information, you can check for
|
||||
* informational error results which still indicate success. U_USING_FALLBACK_ERROR
|
||||
* indicates that a fall back locale was used. For example, 'de_CH' was requested,
|
||||
* but nothing was found there, so 'de' was used. U_USING_DEFAULT_ERROR indicates that
|
||||
* the default locale data was used; neither the requested locale nor any of its
|
||||
* fall back locales could be found.
|
||||
*/
|
||||
ResourceBundle( const UnicodeString& path,
|
||||
const Locale& locale,
|
||||
UErrorCode& err);
|
||||
ResourceBundle( const UnicodeString& path,
|
||||
UErrorCode& err);
|
||||
ResourceBundle( const wchar_t* path,
|
||||
const Locale& locale,
|
||||
UErrorCode& err);
|
||||
~ResourceBundle();
|
||||
|
||||
/**
|
||||
* Returns the contents of a string resource. Resource data is undifferentiated
|
||||
* Unicode text. The resource file may contain quoted strings or escape sequences;
|
||||
* these will be parsed prior to the data's return.
|
||||
* [THIS FUNCTION IS DERECATED; USE THE OVERLOAD BELOW INSTEAD]
|
||||
*
|
||||
* @param resourceTag The resource tag of the string resource the caller wants
|
||||
* @param theString Receives the actual data in the resource
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified tag couldn't be found.
|
||||
*/
|
||||
void getString( const char *resourceTag,
|
||||
UnicodeString& theString,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Returns the contents of a string resource. Resource data is undifferentiated
|
||||
* Unicode text. The resource file may contain quoted strings or escape sequences;
|
||||
* these will be parsed prior to the data's return.
|
||||
*
|
||||
* @param resourceTag The resource tag of the string resource the caller wants
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified tag couldn't be found.
|
||||
* @return A pointer to the string from the resource bundle, or NULL if there was
|
||||
* an error.
|
||||
*/
|
||||
const UnicodeString* getString( const char *resourceTag,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Returns the contents of a string-array resource. This will return the contents of
|
||||
* a string-array (comma-delimited-list) resource as a C++ array of UnicodeString
|
||||
* objects. The number of elements in the array is returned in numArrayItems.
|
||||
* Calling getStringArray on a resource of type string will return an array with one
|
||||
* element; calling it on a resource of type tagged-array results in a
|
||||
* U_MISSING_RESOURCE_ERROR error.
|
||||
*
|
||||
* @param resourceTag The resource tag of the string-array resource the caller
|
||||
* wants
|
||||
* @param numArrayItems Receives the number of items in the array the function
|
||||
* returns.
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified tag couldn't be found.
|
||||
* @return The resource requested, as a pointer to an array of
|
||||
* UnicodeStrings. The caller does not own the storage and
|
||||
* must not delete it.
|
||||
*/
|
||||
const UnicodeString* getStringArray( const char *resourceTag,
|
||||
int32_t& numArrayItems,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Returns a single item from a string-array resource. This will return the contents
|
||||
* of a single item in a resource of string-array (comma-delimited-list) type. If
|
||||
* the resource is not an array, a U_MISSING_RESOURCE_ERROR will be returned in err.
|
||||
* [THIS FUNCTION IS DEPRECATED; USE THE OVERLOAD BELOW INSTEAD]
|
||||
*
|
||||
* @param resourceTag The resource tag of the resource the caller wants to extract
|
||||
* an item from.
|
||||
* @param index The index (zero-based) of the particular array item the user
|
||||
* wants to extract from the resource.
|
||||
* @param theArrayItem Receives the actual text of the desired array item.
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified tag couldn't be found, or if the index was out of range.
|
||||
*/
|
||||
void getArrayItem( const char *resourceTag,
|
||||
int32_t index,
|
||||
UnicodeString& theArrayItem,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Returns a single item from a string-array resource. This will return the contents
|
||||
* of a single item in a resource of string-array (comma-delimited-list) type. If
|
||||
* the resource is not an array, a U_MISSING_RESOURCE_ERROR will be returned in err.
|
||||
*
|
||||
* @param resourceTag The resource tag of the resource the caller wants to extract
|
||||
* an item from.
|
||||
* @param index The index (zero-based) of the particular array item the user
|
||||
* wants to extract from the resource.
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified tag couldn't be found, or if the index was out of range.
|
||||
* @return A pointer to the text of the array item, or NULL is there was an error.
|
||||
*/
|
||||
const UnicodeString* getArrayItem( const char *resourceTag,
|
||||
int32_t index,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Return the contents of a 2-dimensional array resource. The return value will be a
|
||||
* UnicodeString** array. (This is really an array of pointers; each pointer is a
|
||||
* ROW of the data.) The number of rows and columns is returned. If the resource is
|
||||
* of the wrong type, or not present, U_MISSING_RESOURCE_ERROR is placed in err.
|
||||
*
|
||||
* @param resourceTag The resource tag of the string-array resource the caller
|
||||
* wants
|
||||
* @param rowCount Receives the number of rows in the array the function
|
||||
* returns.
|
||||
* @param columnCount Receives the number of columns in the array the function
|
||||
* returns.
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified tag couldn't be found.
|
||||
* @return The resource requested, as a UnicodeStrings**. The caller
|
||||
* does not own the storage and must not delete it.
|
||||
*/
|
||||
const UnicodeString** get2dArray(const char *resourceTag,
|
||||
int32_t& rowCount,
|
||||
int32_t& columnCount,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Return a single string from a 2-dimensional array resource. If the resource does
|
||||
* not exists, or if it is not a 2-d array, or if the row or column indices are out
|
||||
* of bounds, err is set to U_MISSING_RESOURCE_ERROR.
|
||||
* [THIS FUNCTION IS DEPRECATED; USE THE OVERLOAD BELOW INSTEAD]
|
||||
*
|
||||
* @param resourceTag The resource tag of the resource the caller wants to extract
|
||||
* an item from.
|
||||
* @param rowIndex The row index (zero-based) of the array item the user wants
|
||||
* to extract from the resource.
|
||||
* @param columnIndex The column index (zero-based) of the array item the user
|
||||
* wants to extract from the resource.
|
||||
* @param theArrayItem Receives the actual text of the desired array item.
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified tag couldn't be found, if the resource data was in
|
||||
* the wrong format, or if either index is out of bounds.
|
||||
*/
|
||||
void get2dArrayItem(const char *resourceTag,
|
||||
int32_t rowIndex,
|
||||
int32_t columnIndex,
|
||||
UnicodeString& theArrayItem,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Return a single string from a 2-dimensional array resource. If the resource does
|
||||
* not exists, or if it is not a 2-d array, or if the row or column indices are out
|
||||
* of bounds, err is set to U_MISSING_RESOURCE_ERROR.
|
||||
*
|
||||
* @param resourceTag The resource tag of the resource the caller wants to extract
|
||||
* an item from.
|
||||
* @param rowIndex The row index (zero-based) of the array item the user wants
|
||||
* to extract from the resource.
|
||||
* @param columnIndex The column index (zero-based) of the array item the user
|
||||
* wants to extract from the resource.
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified tag couldn't be found, if the resource data was in
|
||||
* the wrong format, or if either index is out of bounds.
|
||||
* @return A pointer to the text of the array item, or NULL is there was an error.
|
||||
*/
|
||||
const UnicodeString* get2dArrayItem( const char *resourceTag,
|
||||
int32_t rowIndex,
|
||||
int32_t columnIndex,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Returns a single item from a tagged-array resource This will return the contents
|
||||
* of a single item in a resource of type tagged-array. If this function is called
|
||||
* for a resource that is not of type tagged-array, it will set err to
|
||||
* MISSING_RESOUCE_ERROR.
|
||||
* [THIS FUNCTION IS DEPRECATED; USE THE OVERLOAD BELOW INSTEAD]
|
||||
*
|
||||
* @param resourceTag The resource tag of the resource the caller wants to extract
|
||||
* an item from.
|
||||
* @param itemTag The item tag for the item the caller wants to extract.
|
||||
* @param theArrayItem Receives the text of the desired array item.
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified resource tag couldn't be found, or if an item
|
||||
* with the specified item tag couldn't be found in the resource.
|
||||
*/
|
||||
void getTaggedArrayItem( const char *resourceTag,
|
||||
const UnicodeString& itemTag,
|
||||
UnicodeString& theArrayItem,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Returns a single item from a tagged-array resource This will return the contents
|
||||
* of a single item in a resource of type tagged-array. If this function is called
|
||||
* for a resource that is not of type tagged-array, it will set err to
|
||||
* MISSING_RESOUCE_ERROR.
|
||||
*
|
||||
* @param resourceTag The resource tag of the resource the caller wants to extract
|
||||
* an item from.
|
||||
* @param itemTag The item tag for the item the caller wants to extract.
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified resource tag couldn't be found, or if an item
|
||||
* with the specified item tag coldn't be found in the resource.
|
||||
* @return A pointer to the text of the array item, or NULL is there was an error.
|
||||
*/
|
||||
const UnicodeString* getTaggedArrayItem( const char *resourceTag,
|
||||
const UnicodeString& itemTag,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Returns a tagged-array resource. The contents of the resource is returned as two
|
||||
* separate arrays of UnicodeStrings, the addresses of which are placed in "itemTags"
|
||||
* and "items". After calling this function, the items in the resource will be in the
|
||||
* list pointed to by "items", and for each items[i], itemTags[i] will be the tag that
|
||||
* corresponds to it. The total number of entries in both arrays is returned in
|
||||
* numItems.
|
||||
*
|
||||
* @param resourceTag The resource tag of the resource the caller wants to extract
|
||||
* an item from.
|
||||
* @param itemTags Set to point to an array of UnicodeStrings representing the
|
||||
* tags in the specified resource. The caller DOES own this array,
|
||||
* and must delete it.
|
||||
* @param items Set to point to an array of UnicodeStrings containing the
|
||||
* individual resource items themselves. itemTags[i] will
|
||||
* contain the tag corresponding to items[i]. The caller DOES
|
||||
* own this array, and must delete it.
|
||||
* @param numItems Receives the number of items in the arrays pointed to by
|
||||
* items and itemTags.
|
||||
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
|
||||
* specified tag couldn't be found.
|
||||
*/
|
||||
void getTaggedArray( const char *resourceTag,
|
||||
UnicodeString*& itemTags,
|
||||
UnicodeString*& items,
|
||||
int32_t& numItems,
|
||||
UErrorCode& err) const;
|
||||
|
||||
/**
|
||||
* Return the version number associated with this ResourceBundle. This version
|
||||
* number is a string of the form MAJOR.MINOR, where MAJOR is the version number of
|
||||
* the current analytic code package, and MINOR is the version number contained in
|
||||
* the resource file as the value of the tag "Version". A change in the MINOR
|
||||
* version indicated an updated data file. A change in the MAJOR version indicates a
|
||||
* new version of the code which is not binary-compatible with the previous version.
|
||||
* If no "Version" tag is present in a resource file, the MINOR version "0" is assigned.
|
||||
*
|
||||
* For example, if the Collation sort key algorithm changes, the MAJOR version
|
||||
* increments. If the collation data in a resource file changes, the MINOR version
|
||||
* for that file increments.
|
||||
*
|
||||
* @return A string of the form N.n, where N is the major version number,
|
||||
* representing the code version, and n is the minor version number,
|
||||
* representing the resource data file. The caller does not own this
|
||||
* string.
|
||||
*/
|
||||
const char* getVersionNumber(void) const;
|
||||
|
||||
/**
|
||||
* Return the Locale associated with this ResourceBundle.
|
||||
*
|
||||
* @return a Locale object
|
||||
*/
|
||||
const Locale &getLocale(void) const ;
|
||||
|
||||
private:
|
||||
class U_COMMON_API PathInfo {
|
||||
public:
|
||||
PathInfo();
|
||||
PathInfo(const PathInfo& source);
|
||||
PathInfo(const UnicodeString& path);
|
||||
PathInfo(const UnicodeString& path, const UnicodeString& suffix);
|
||||
PathInfo(const wchar_t* path, const wchar_t* suffix);
|
||||
~PathInfo();
|
||||
|
||||
PathInfo& operator=(const PathInfo& source);
|
||||
|
||||
bool_t fileExists(const UnicodeString& localeName) const;
|
||||
UnicodeString makeCacheKey(const UnicodeString& localeName) const;
|
||||
UnicodeString makeHashkey(const UnicodeString& localeName) const;
|
||||
FileStream* openFile(const UnicodeString& localeName) const;
|
||||
|
||||
private:
|
||||
static const UChar kSeparator;
|
||||
UnicodeString fPrefix;
|
||||
UnicodeString fSuffix;
|
||||
wchar_t* fWPrefix;
|
||||
wchar_t* fWSuffix;
|
||||
};
|
||||
|
||||
private:
|
||||
friend class Locale;
|
||||
friend class RuleBasedCollator;
|
||||
friend int32_t T_ResourceBundle_countArrayItemsImplementation(const ResourceBundle* resourceBundle,
|
||||
const char* resourceKey,
|
||||
UErrorCode& err) ;
|
||||
friend const UnicodeString** listInstalledLocalesImplementation(const char* path,
|
||||
int32_t* numInstalledLocales);
|
||||
friend void getTaggedArrayUCharsImplementation(
|
||||
const ResourceBundle* bundle,
|
||||
const char *resourceTag,
|
||||
UChar const** itemTags,
|
||||
UChar const** items,
|
||||
int32_t maxItems,
|
||||
int32_t& numItems,
|
||||
UErrorCode& err);
|
||||
|
||||
|
||||
/**
|
||||
* This constructor is used by Collation to load a resource bundle from a specific
|
||||
* file, without trying other files. This is used by the Collation caching
|
||||
* mechanism.
|
||||
*/
|
||||
ResourceBundle( const UnicodeString& path,
|
||||
const UnicodeString& localeName,
|
||||
UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Return a list of all installed locales. This function returns a list of the IDs
|
||||
* of all locales represented in the directory specified by this ResourceBundle. It
|
||||
* depends on that directory having an "Index" tagged-list item in its "index.txt"
|
||||
* file; it parses that list to determine its return value (therefore, that list
|
||||
* also has to be up to date). This function is static.
|
||||
*
|
||||
* This function is the implementation of the Locale::listInstalledLocales()
|
||||
* function. It's private because the API for it real;ly belongs in Locale.
|
||||
*
|
||||
* @param path The path to the locale data files. The function will
|
||||
* look here for "index.txt".
|
||||
* @param numInstalledLocales Receives the number of installed locales, according
|
||||
* to the Index resource in index.txt.
|
||||
* @return A list of the installed locales, as a pointer to an
|
||||
* array of UnicodeStrings. This storage is not owned by
|
||||
* the caller, who must not delete it. The information
|
||||
* in this list is derived from the Index resource in
|
||||
* default.txt, which must be kept up to date.
|
||||
*/
|
||||
static const UnicodeString* listInstalledLocales(const UnicodeString& path,
|
||||
int32_t& numInstalledLocales);
|
||||
|
||||
/**
|
||||
* Retrieve a ResourceBundle from the cache. Return NULL if not found.
|
||||
*/
|
||||
static const UHashtable* getFromCache(const PathInfo& path,
|
||||
const UnicodeString& localeName,
|
||||
ResourceBundleCache* someCache);
|
||||
|
||||
static const UHashtable* getFromCacheWithFallback(const PathInfo& path,
|
||||
const UnicodeString& desiredLocale,
|
||||
UnicodeString& returnedLocale,
|
||||
ResourceBundleCache* someCache,
|
||||
UErrorCode& error);
|
||||
|
||||
/**
|
||||
* Handlers which are passed to parse() have this signature.
|
||||
*/
|
||||
typedef void (*Handler)(const UnicodeString& localeName,
|
||||
UHashtable* hashtable,
|
||||
void* context,
|
||||
ResourceBundleCache* someCache);
|
||||
|
||||
/**
|
||||
* Parse a file, storing the resource data in the cache.
|
||||
*/
|
||||
static void parse(const PathInfo& path,
|
||||
const UnicodeString& localeName,
|
||||
Handler handler,
|
||||
void* context,
|
||||
ResourceBundleCache* someCache,
|
||||
UErrorCode &error);
|
||||
|
||||
/**
|
||||
* If the given file exists and has not been parsed, then parse it (caching the
|
||||
* resultant data) and return true.
|
||||
*/
|
||||
static bool_t parseIfUnparsed(const PathInfo& path,
|
||||
const UnicodeString& locale,
|
||||
ResourceBundleCache* fCache,
|
||||
VisitedFileCache* vCache,
|
||||
UErrorCode& error);
|
||||
|
||||
const UHashtable* getHashtableForLocale(const UnicodeString& localeName,
|
||||
UnicodeString& returnedLocale,
|
||||
UErrorCode& err);
|
||||
|
||||
const UHashtable* getHashtableForLocale(const UnicodeString& desiredLocale,
|
||||
UErrorCode& error);
|
||||
|
||||
const ResourceBundleData* getDataForTag(const char *tag,
|
||||
UErrorCode& err) const;
|
||||
|
||||
void constructForLocale(const PathInfo& path,
|
||||
const Locale& locale,
|
||||
UErrorCode& error);
|
||||
|
||||
static void addToCache(const UnicodeString& localeName,
|
||||
UHashtable* hashtable,
|
||||
void* context,
|
||||
ResourceBundleCache* someCache);
|
||||
|
||||
static void saveCollationHashtable(const UnicodeString& localeName,
|
||||
UHashtable* hashtable,
|
||||
void* context,
|
||||
ResourceBundleCache* cache);
|
||||
private:
|
||||
/**
|
||||
* This internal class iterates over the fallback and/or default locales. It
|
||||
* progresses as follows: Specific: language+country+variant language+country
|
||||
* language Default: language+country+variant language+country language Root:
|
||||
*/
|
||||
class LocaleFallbackIterator
|
||||
{
|
||||
public:
|
||||
LocaleFallbackIterator(const UnicodeString& startingLocale,
|
||||
const UnicodeString& root,
|
||||
bool_t useDefaultLocale);
|
||||
|
||||
const UnicodeString& getLocale(void) const { return fLocale; }
|
||||
|
||||
bool_t nextLocale(UErrorCode& status);
|
||||
|
||||
private:
|
||||
void chopLocale(void);
|
||||
|
||||
UnicodeString fLocale;
|
||||
UnicodeString fDefaultLocale;
|
||||
UnicodeString fRoot;
|
||||
bool_t fUseDefaultLocale;
|
||||
bool_t fTriedDefaultLocale;
|
||||
bool_t fTriedRoot;
|
||||
};
|
||||
|
||||
private:
|
||||
static const char* kDefaultSuffix;
|
||||
static const int32_t kDefaultSuffixLen;
|
||||
static const char* kDefaultFilename;
|
||||
static const char* kDefaultLocaleName;
|
||||
static const char* kIndexLocaleName;
|
||||
static const char* kIndexFilename;
|
||||
static const char* kIndexTag;
|
||||
|
||||
static const char* kDefaultMinorVersion;
|
||||
static const char* kVersionSeparator;
|
||||
static const char* kVersionTag;
|
||||
|
||||
static ResourceBundleCache* fgUserCache;
|
||||
static VisitedFileCache* fgUserVisitedFiles;
|
||||
|
||||
ResourceBundleCache* fgCache;
|
||||
VisitedFileCache* fgVisitedFiles;
|
||||
|
||||
/**
|
||||
* Data members. The ResourceBundle object is kept lightweight by having the fData[]
|
||||
* array entries be non-owned pointers. The cache (fgCache) owns the entries and
|
||||
* will delete them at static destruction time.
|
||||
*/
|
||||
PathInfo fPath;
|
||||
|
||||
enum { kDataCount = 4 };
|
||||
const UHashtable* fData[kDataCount]; // These aren't const if fIsDataOwned is true
|
||||
bool_t fLoaded[kDataCount];
|
||||
UErrorCode fDataStatus[kDataCount]; // Returns the appropriate error code for each data table.
|
||||
bool_t fIsDataOwned;
|
||||
Locale fRealLocale;
|
||||
LocaleFallbackIterator* fLocaleIterator;
|
||||
char* fVersionID;
|
||||
};
|
||||
|
||||
#endif
|
174
icu4c/source/common/unicode/schriter.h
Normal file
174
icu4c/source/common/unicode/schriter.h
Normal file
|
@ -0,0 +1,174 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1998-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* File schriter.h
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/05/99 stephen Cleaned up.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef SCHRITER_H
|
||||
#define SCHRITER_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/chariter.h"
|
||||
|
||||
/**
|
||||
* A concrete subclass of CharacterIterator that iterates over the
|
||||
* characters in a UnicodeString. It's possible not only to create an
|
||||
* iterator that iterates over an entire UnicodeString, but also to
|
||||
* create only that iterates over only a subrange of a UnicodeString
|
||||
* (iterators over different subranges of the same UnicodeString don't
|
||||
* compare equal). */
|
||||
class U_COMMON_API StringCharacterIterator : public CharacterIterator {
|
||||
public:
|
||||
/**
|
||||
* Create an iterator over the UnicodeString referred to by "text".
|
||||
* The iteration range is the whole string, and the starting position is 0.
|
||||
*/
|
||||
StringCharacterIterator(const UnicodeString& text);
|
||||
|
||||
/**
|
||||
* Create an iterator over the UnicodeString referred to by "text".
|
||||
* The iteration range is the whole string, and the starting
|
||||
* position is specified by "pos". If "pos" is outside the valid
|
||||
* iteration range, the behavior of this object is undefined. */
|
||||
StringCharacterIterator(const UnicodeString& text,
|
||||
UTextOffset pos);
|
||||
|
||||
/**
|
||||
* Create an iterator over the UnicodeString referred to by "text".
|
||||
* The iteration range begins with the character specified by
|
||||
* "begin" and ends with the character BEFORE the character specfied
|
||||
* by "end". The starting position is specified by "pos". If
|
||||
* "begin" and "end" don't form a valid range on "text" (i.e., begin
|
||||
* >= end or either is negative or greater than text.size()), or
|
||||
* "pos" is outside the range defined by "begin" and "end", the
|
||||
* behavior of this iterator is undefined. */
|
||||
StringCharacterIterator(const UnicodeString& text,
|
||||
UTextOffset begin,
|
||||
UTextOffset end,
|
||||
UTextOffset pos);
|
||||
|
||||
/**
|
||||
* Copy constructor. The new iterator iterates over the same range
|
||||
* of the same string as "that", and its initial position is the
|
||||
* same as "that"'s current position. */
|
||||
StringCharacterIterator(const StringCharacterIterator& that);
|
||||
|
||||
/**
|
||||
* Destructor. */
|
||||
virtual ~StringCharacterIterator();
|
||||
|
||||
/**
|
||||
* Assignment operator. *this is altered to iterate over the sane
|
||||
* range of the same string as "that", and refers to the same
|
||||
* character within that string as "that" does. */
|
||||
StringCharacterIterator&
|
||||
operator=(const StringCharacterIterator& that);
|
||||
|
||||
/**
|
||||
* Returns true if the iterators iterate over the same range of the
|
||||
* same string and are pointing at the same character. */
|
||||
virtual bool_t operator==(const CharacterIterator& that) const;
|
||||
|
||||
/**
|
||||
* Generates a hash code for this iterator. */
|
||||
virtual int32_t hashCode(void) const;
|
||||
|
||||
/**
|
||||
* Returns a new StringCharacterIterator referring to the same
|
||||
* character in the same range of the same string as this one. The
|
||||
* caller must delete the new iterator. */
|
||||
virtual CharacterIterator* clone(void) const;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first character in its
|
||||
* iteration range, and returns that character, */
|
||||
virtual UChar first(void);
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last character in its iteration
|
||||
* range, and returns that character. */
|
||||
virtual UChar last(void);
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the "position"-th character in the
|
||||
* UnicodeString the iterator refers to, and returns that character.
|
||||
* If the index is outside the iterator's iteration range, the
|
||||
* behavior of the iterator is undefined. */
|
||||
virtual UChar setIndex(UTextOffset pos);
|
||||
|
||||
/**
|
||||
* Returns the character the iterator currently refers to. */
|
||||
virtual UChar current(void) const;
|
||||
|
||||
/**
|
||||
* Advances to the next character in the iteration range (toward
|
||||
* last()), and returns that character. If there are no more
|
||||
* characters to return, returns DONE. */
|
||||
virtual UChar next(void);
|
||||
|
||||
/**
|
||||
* Advances to the previous character in the iteration rance (toward
|
||||
* first()), and returns that character. If there are no more
|
||||
* characters to return, returns DONE. */
|
||||
virtual UChar previous(void);
|
||||
|
||||
/**
|
||||
* Returns the numeric index of the first character in this
|
||||
* iterator's iteration range. */
|
||||
virtual UTextOffset startIndex(void) const;
|
||||
|
||||
/**
|
||||
* Returns the numeric index of the character immediately BEYOND the
|
||||
* last character in this iterator's iteration range. */
|
||||
virtual UTextOffset endIndex(void) const;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying UnicodeString of the
|
||||
* character the iterator currently refers to (i.e., the character
|
||||
* returned by current()). */
|
||||
virtual UTextOffset getIndex(void) const;
|
||||
|
||||
/**
|
||||
* Copies the UnicodeString under iteration into the UnicodeString
|
||||
* referred to by "result". Even if this iterator iterates across
|
||||
* only a part of this string, the whole string is copied. @param
|
||||
* result Receives a copy of the text under iteration. */
|
||||
virtual void getText(UnicodeString& result);
|
||||
|
||||
/**
|
||||
* Return a class ID for this object (not really public) */
|
||||
virtual UClassID getDynamicClassID(void) const
|
||||
{ return getStaticClassID(); }
|
||||
|
||||
/**
|
||||
* Return a class ID for this class (not really public) */
|
||||
static UClassID getStaticClassID(void)
|
||||
{ return (UClassID)(&fgClassID); }
|
||||
|
||||
private:
|
||||
StringCharacterIterator();
|
||||
|
||||
UnicodeString text;
|
||||
UTextOffset pos;
|
||||
UTextOffset begin;
|
||||
UTextOffset end;
|
||||
|
||||
static UClassID fgClassID;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
142
icu4c/source/common/unicode/scsu.h
Normal file
142
icu4c/source/common/unicode/scsu.h
Normal file
|
@ -0,0 +1,142 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1998-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
*
|
||||
* File scsu.h
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 05/17/99 stephen Creation (ported from java UnicodeCompressor.java)
|
||||
* 09/21/99 stephen Updated to handle data splits on decompression.
|
||||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef SCSU_H
|
||||
#define SCSU_H 1
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/* Number of windows */
|
||||
#define USCSU_NUM_WINDOWS 8
|
||||
#define USCSU_NUM_STATIC_WINDOWS 8
|
||||
|
||||
/* Maximum value for a window's index */
|
||||
#define USCSU_MAX_INDEX 0xFF
|
||||
|
||||
/* The size of the internal buffer for a UnicodeCompressor. */
|
||||
#define USCSU_BUFSIZE 3
|
||||
|
||||
/** The UnicodeCompressor struct */
|
||||
struct UnicodeCompressor {
|
||||
|
||||
/** Alias to current dynamic window */
|
||||
int32_t fCurrentWindow;
|
||||
|
||||
/** Dynamic compression window offsets */
|
||||
int32_t fOffsets [ USCSU_NUM_WINDOWS ];
|
||||
|
||||
/** Current compression mode */
|
||||
int32_t fMode;
|
||||
|
||||
/** Keeps count of times character indices are encountered */
|
||||
int32_t fIndexCount [ USCSU_MAX_INDEX + 1 ];
|
||||
|
||||
/** The time stamps indicate when a window was last defined */
|
||||
int32_t fTimeStamps [ USCSU_NUM_WINDOWS ];
|
||||
|
||||
/** The current time stamp */
|
||||
int32_t fTimeStamp;
|
||||
|
||||
/** Internal buffer for saving state */
|
||||
uint8_t fBuffer [ USCSU_BUFSIZE ];
|
||||
|
||||
/** Number of characters in our internal buffer */
|
||||
int32_t fBufferLength;
|
||||
};
|
||||
typedef struct UnicodeCompressor UnicodeCompressor;
|
||||
|
||||
/**
|
||||
* Initialize a UnicodeCompressor.
|
||||
* Sets all windows to their default values.
|
||||
* @see #reset
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 scsu_init(UnicodeCompressor *comp);
|
||||
|
||||
/**
|
||||
* Reset the compressor to its initial state.
|
||||
* @param comp The UnicodeCompressor to reset.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 scsu_reset(UnicodeCompressor *comp);
|
||||
|
||||
/**
|
||||
* Compress a Unicode character array into a byte array.
|
||||
*
|
||||
* This function is not guaranteed to completely fill the output buffer, nor
|
||||
* is it guaranteed to compress the entire input.
|
||||
* If the source data is completely compressed, <TT>status</TT> will be set
|
||||
* to <TT>U_ZERO_ERROR</TT>.
|
||||
* If the source data is not completely compressed, <TT>status</TT> will be
|
||||
* set to <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT>. If this occurs, larger buffers
|
||||
* should be allocated, or data flushed, and the function should be called
|
||||
* again with the new buffers.
|
||||
*
|
||||
* @param comp A pointer to a previously-initialized UnicodeCompressor
|
||||
* @param target I/O parameter. On input, a pointer to a buffer of bytes to
|
||||
* receive the compressed data. On output, points to the byte following
|
||||
* the last byte written. This buffer must be at least 4 bytes.
|
||||
* @param targetLimit A pointer to the end of the array <TT>target</TT>.
|
||||
* @param source I/O parameter. On input, a pointer to a buffer of
|
||||
* Unicode characters to be compressed. On output, points to the character
|
||||
* following the last character compressed.
|
||||
* @param sourceLimit A pointer to the end of the array <TT>source</TT>.
|
||||
* @param status A pointer to an UErrorCode to receive any errors.
|
||||
*
|
||||
* @see #decompress
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 scsu_compress(UnicodeCompressor *comp,
|
||||
uint8_t **target,
|
||||
const uint8_t *targetLimit,
|
||||
const UChar **source,
|
||||
const UChar *sourceLimit,
|
||||
UErrorCode *status);
|
||||
|
||||
/**
|
||||
* Decompress a byte array into a Unicode character array.
|
||||
*
|
||||
* This function will either completely fill the output buffer, or
|
||||
* consume the entire input.
|
||||
* If the source data is completely compressed, <TT>status</TT> will be set
|
||||
* to <TT>U_ZERO_ERROR</TT>.
|
||||
* If the source data is not completely compressed, <TT>status</TT> will be
|
||||
* set to <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT>. If this occurs, larger buffers
|
||||
* should be allocated, or data flushed, and the function should be called
|
||||
* again with the new buffers.
|
||||
*
|
||||
* @param comp A pointer to a previously-initialized UnicodeDecompressor
|
||||
* @param target I/O parameter. On input, a pointer to a buffer of Unicode
|
||||
* characters to receive the compressed data. On output, points to the
|
||||
* character following the last character written. This buffer must be
|
||||
* at least 2 bytes.
|
||||
* @param targetLimit A pointer to the end of the array <TT>target</TT>.
|
||||
* @param source I/O parameter. On input, a pointer to a buffer of
|
||||
* bytes to be decompressed. On output, points to the byte following the
|
||||
* last byte decompressed.
|
||||
* @param sourceLimit A pointer to the end of the array <TT>source</TT>.
|
||||
* @param status A pointer to an UErrorCode to receive any errors.
|
||||
* @return The number of Unicode characters writeten to <TT>target</TT>.
|
||||
*
|
||||
* @see #compress
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 scsu_decompress(UnicodeCompressor *comp,
|
||||
UChar **target,
|
||||
const UChar *targetLimit,
|
||||
const uint8_t **source,
|
||||
const uint8_t *sourceLimit,
|
||||
UErrorCode *status);
|
||||
|
||||
#endif
|
892
icu4c/source/common/unicode/ubidi.h
Normal file
892
icu4c/source/common/unicode/ubidi.h
Normal file
|
@ -0,0 +1,892 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
* file name: ubidi.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999jul27
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef UBIDI_H
|
||||
#define UBIDI_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/uchar.h"
|
||||
|
||||
/*
|
||||
* javadoc-style comments are intended to be transformed into HTML
|
||||
* using DOC++ - see
|
||||
* http://www.zib.de/Visual/software/doc++/index.html .
|
||||
*
|
||||
* The HTML documentation is created with
|
||||
* doc++ -H ubidi.h
|
||||
*
|
||||
* The following #define trick allows us to do it all in one file
|
||||
* and still be able to compile it.
|
||||
*/
|
||||
#define DOCXX_TAG
|
||||
#define BIDI_SAMPLE_CODE
|
||||
|
||||
/**
|
||||
* @name BIDI algorithm for ICU
|
||||
*
|
||||
* <h2>BIDI algorithm for ICU</h2>
|
||||
*
|
||||
* This is an implementation of the Unicode Bidirectional algorithm.
|
||||
* The algorithm is defined in the
|
||||
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>,
|
||||
* version 5, also described in The Unicode Standard, Version 3.0 .<p>
|
||||
*
|
||||
* <h3>General remarks about the API:</h3>
|
||||
*
|
||||
* In functions with an error code parameter,
|
||||
* the <code>pErrorCode</code> pointer must be valid
|
||||
* and the value that it points to must not indicate a failure before
|
||||
* the function call. Otherwise, the function returns immediately.
|
||||
* After the function call, the value indicates success or failure.<p>
|
||||
*
|
||||
* The <quote>limit</quote> of a sequence of characters is the position just after their
|
||||
* last character, i.e., one more than that position.<p>
|
||||
*
|
||||
* Some of the API functions provide access to <quote>runs</quote>.
|
||||
* Such a <quote>run</quote> is defined as a sequence of characters
|
||||
* that are at the same embedding level
|
||||
* after performing the BIDI algorithm.<p>
|
||||
*
|
||||
* @author Markus W. Scherer
|
||||
* @version 1.0
|
||||
*/
|
||||
DOCXX_TAG
|
||||
/*@{*/
|
||||
|
||||
/**
|
||||
* UBiDiLevel is the type of the level values in this
|
||||
* BiDi implementation.
|
||||
* It holds an embedding level and indicates the visual direction
|
||||
* by its bit 0 (even/odd value).<p>
|
||||
*
|
||||
* It can also hold non-level values for the
|
||||
* <code>paraLevel</code> and <code>embeddingLevels</code>
|
||||
* arguments of <code>ubidi_setPara()</code>; there:
|
||||
* <ul>
|
||||
* <li>bit 7 of an <code>embeddingLevels[]</code>
|
||||
* value indicates whether the using application is
|
||||
* specifying the level of a character to <i>override</i> whatever the
|
||||
* BiDi implementation would resolve it to.</li>
|
||||
* <li><code>paraLevel</code> can be set to the
|
||||
* pesudo-level values <code>UBIDI_DEFAULT_LTR</code>
|
||||
* and <code>UBIDI_DEFAULT_RTL</code>.</li>
|
||||
*
|
||||
* @see ubidi_setPara
|
||||
*
|
||||
* <p>The related constants are not real, valid level values.
|
||||
* <code>UBIDI_DEFAULT_XXX</code> can be used to specify
|
||||
* a default for the paragraph level for
|
||||
* when the <code>ubidi_setPara()</code> function
|
||||
* shall determine it but there is no
|
||||
* strongly typed character in the input.<p>
|
||||
*
|
||||
* Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even
|
||||
* and the one for <code>UBIDI_DEFAULT_RTL</code> is odd,
|
||||
* just like with normal LTR and RTL level values -
|
||||
* these special values are designed that way. Also, the implementation
|
||||
* assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
|
||||
*
|
||||
* @see UBIDI_DEFAULT_LTR
|
||||
* @see UBIDI_DEFAULT_RTL
|
||||
* @see UBIDI_LEVEL_OVERRIDE
|
||||
* @see UBIDI_MAX_EXPLICIT_LEVEL
|
||||
*/
|
||||
typedef uint8_t UBiDiLevel;
|
||||
|
||||
/** Paragraph level setting.
|
||||
* If there is no strong character, then set the paragraph level to 0 (left-to-right).
|
||||
*/
|
||||
#define UBIDI_DEFAULT_LTR 0xfe
|
||||
|
||||
/** Paragraph level setting.
|
||||
* If there is no strong character, then set the paragraph level to 1 (right-to-left).
|
||||
*/
|
||||
#define UBIDI_DEFAULT_RTL 0xff
|
||||
|
||||
/**
|
||||
* Maximum explicit embedding level.
|
||||
* (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>).
|
||||
*
|
||||
*/
|
||||
#define UBIDI_MAX_EXPLICIT_LEVEL 61
|
||||
|
||||
/** Bit flag for level input.
|
||||
* Overrides directional properties.
|
||||
*/
|
||||
#define UBIDI_LEVEL_OVERRIDE 0x80
|
||||
|
||||
/**
|
||||
* @memo <code>UBiDiDirection</code> values indicate the text direction.
|
||||
*/
|
||||
enum UBiDiDirection {
|
||||
/** @memo All left-to-right text. This is a 0 value. */
|
||||
UBIDI_LTR,
|
||||
/** @memo All right-to-left text. This is a 1 value. */
|
||||
UBIDI_RTL,
|
||||
/** @memo Mixed-directional text. */
|
||||
UBIDI_MIXED
|
||||
};
|
||||
|
||||
typedef enum UBiDiDirection UBiDiDirection;
|
||||
|
||||
/**
|
||||
* Forward declaration of the <code>UBiDi</code> structure for the declaration of
|
||||
* the API functions. Its fields are implementation-specific.<p>
|
||||
* This structure holds information about a paragraph of text
|
||||
* with BiDi-algorithm-related details, or about one line of
|
||||
* such a paragraph.<p>
|
||||
* Reordering can be done on a line, or on a paragraph which is
|
||||
* then interpreted as one single line.
|
||||
*/
|
||||
struct UBiDi;
|
||||
|
||||
typedef struct UBiDi UBiDi;
|
||||
|
||||
/**
|
||||
* Allocate a <code>UBiDi</code> structure.
|
||||
* Such an object is initially empty. It is assigned
|
||||
* the BiDi properties of a paragraph by <code>ubidi_setPara()</code>
|
||||
* or the BiDi properties of a line of a paragraph by
|
||||
* <code>ubidi_getLine()</code>.<p>
|
||||
* This object can be reused for as long as it is not deallocated
|
||||
* by calling <code>ubidi_close()</code>.<p>
|
||||
* <code>ubidi_set()</code> will allocate additional memory for
|
||||
* internal structures as necessary.
|
||||
*
|
||||
* @return An empty <code>UBiDi</code> object.
|
||||
*/
|
||||
U_CAPI UBiDi * U_EXPORT2
|
||||
ubidi_open(void);
|
||||
|
||||
/**
|
||||
* Allocate a <code>UBiDi</code> structure with preallocated memory
|
||||
* for internal structures.
|
||||
* This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code>
|
||||
* with no arguments, but it also preallocates memory for internal structures
|
||||
* according to the sizings supplied by the caller.<p>
|
||||
* Subsequent functions will not allocate any more memory, and are thus
|
||||
* guaranteed not to fail because of lack of memory.<p>
|
||||
* The preallocation can be limited to some of the internal memory
|
||||
* by setting some values to 0 here. That means that if, e.g.,
|
||||
* <code>maxRunCount</code> cannot be reasonably predetermined and should not
|
||||
* be set to <code>maxLength</code> (the only failproof value) to avoid
|
||||
* wasting memory, then <code>maxRunCount</code> could be set to 0 here
|
||||
* and the internal structures that are associated with it will be allocated
|
||||
* on demand, just like with <code>ubidi_open()</code>.
|
||||
*
|
||||
* @param maxLength is the maximum paragraph or line length that internal memory
|
||||
* will be preallocated for. An attempt to associate this object with a
|
||||
* longer text will fail, unless this value is 0, which leaves the allocation
|
||||
* up to the implementation.
|
||||
*
|
||||
* @param maxRunCount is the maximum anticipated number of same-level runs
|
||||
* that internal memory will be preallocated for. An attempt to access
|
||||
* visual runs on an object that was not preallocated for as many runs
|
||||
* as the text was actually resolved to will fail,
|
||||
* unless this value is 0, which leaves the allocation up to the implementation.<p>
|
||||
* The number of runs depends on the actual text and maybe anywhere between
|
||||
* 1 and <code>maxLength</code>. It is typically small.<p>
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @return An empty <code>UBiDi</code> object with preallocated memory.
|
||||
*/
|
||||
U_CAPI UBiDi * U_EXPORT2
|
||||
ubidi_openSized(UTextOffset maxLength, UTextOffset maxRunCount, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* <code>ubidi_close()</code> must be called to free the memory
|
||||
* associated with a UBiDi object.<p>
|
||||
*
|
||||
* <strong>Important: </strong>
|
||||
* If a <code>UBiDi</code> object is the <quote>child</quote>
|
||||
* of another one (its <quote>parent</quote>), after calling
|
||||
* <code>ubidi_setLine()</code>, then the child object must
|
||||
* be destroyed (closed) or reused (by calling
|
||||
* <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>)
|
||||
* before the parent object.
|
||||
*
|
||||
* @param pBiDi is a <code>UBiDi</code> object.
|
||||
*
|
||||
* @see ubidi_setPara
|
||||
* @see ubidi_setLine
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_close(UBiDi *pBiDi);
|
||||
|
||||
/**
|
||||
* Perform the Unicode BiDi algorithm. It is defined in the
|
||||
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>,
|
||||
* version 5,
|
||||
* also described in The Unicode Standard, Version 3.0 .<p>
|
||||
*
|
||||
* This function takes a single plain text paragraph with or without
|
||||
* externally specified embedding levels from <quote>styled</quote> text
|
||||
* and computes the left-right-directionality of each character.<p>
|
||||
*
|
||||
* If the entire paragraph consists of text of only one direction, then
|
||||
* the function may not perform all the steps described by the algorithm,
|
||||
* i.e., some levels may not be the same as if all steps were performed.
|
||||
* This is not relevant for unidirectional text.<br>
|
||||
* For example, in pure LTR text with numbers the numbers would get
|
||||
* a resolved level of 2 higher than the surrounding text according to
|
||||
* the algorithm. This implementation may set all resolved levels to
|
||||
* the same value in such a case.<p>
|
||||
*
|
||||
* The text must be externally split into separate paragraphs (rule P1).
|
||||
* Paragraph separators (B) should appear at most at the very end.
|
||||
*
|
||||
* @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code>
|
||||
* which will be set to contain the reordering information,
|
||||
* especially the resolved levels for all the characters in <code>text</code>.
|
||||
*
|
||||
* @param text is a pointer to the single-paragraph text that the
|
||||
* BiDi algorithm will be performed on
|
||||
* (step (P1) of the algorithm is performed externally).
|
||||
* <strong>The text must be (at least) <code>length</code> long.</strong>
|
||||
*
|
||||
* @param length is the length of the text; if <code>length==-1</code> then
|
||||
* the text must be zero-terminated.
|
||||
*
|
||||
* @param paraLevel specifies the default level for the paragraph;
|
||||
* it is typically 0 (LTR) or 1 (RTL).
|
||||
* If the function shall determine the paragraph level from the text,
|
||||
* then <code>paraLevel</code> can be set to
|
||||
* either <code>UBIDI_DEFAULT_LTR</code>
|
||||
* or <code>UBIDI_DEFAULT_RTL</code>;
|
||||
* if there is no strongly typed character, then
|
||||
* the desired default is used (0 for LTR or 1 for RTL).
|
||||
* Any other value between 0 and <code>UBIDI_MAX_EXPLICIT_LEVEL</code> is also valid,
|
||||
* with odd levels indicating RTL.
|
||||
*
|
||||
* @param embeddingLevels (in) may be used to preset the embedding and override levels,
|
||||
* ignoring characters like LRE and PDF in the text.
|
||||
* A level overrides the directional property of its corresponding
|
||||
* (same index) character if the level has the
|
||||
* <code>UBIDI_LEVEL_OVERRIDE</code> bit set.<p>
|
||||
* Except for that bit, it must be
|
||||
* <code>paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL</code>.<p>
|
||||
* <strong>Caution: </strong>A copy of this pointer, not of the levels,
|
||||
* will be stored in the <code>UBiDi</code> object;
|
||||
* the <code>embeddingLevels</code> array must not be
|
||||
* deallocated before the <code>UBiDi</code> structure is destroyed or reused,
|
||||
* and the <code>embeddingLevels</code>
|
||||
* should not be modified to avoid unexpected results on subsequent BiDi operations.
|
||||
* However, the <code>ubidi_setPara()</code> and
|
||||
* <code>ubidi_setLine()</code> functions may modify some or all of the levels.<p>
|
||||
* After the <code>UBiDi</code> object is reused or destroyed, the caller
|
||||
* must take care of the deallocation of the <code>embeddingLevels</code> array.<p>
|
||||
* <strong>The <code>embeddingLevels</code> array must be
|
||||
* at least <code>length</code> long.</strong>
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_setPara(UBiDi *pBiDi, const UChar *text, UTextOffset length,
|
||||
UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* <code>ubidi_getLine()</code> sets a <code>UBiDi</code> to
|
||||
* contain the reordering information, especially the resolved levels,
|
||||
* for all the characters in a line of text. This line of text is
|
||||
* specified by referring to a <code>UBiDi</code> object representing
|
||||
* this information for a paragraph of text, and by specifying
|
||||
* a range of indexes in this paragraph.<p>
|
||||
* In the new line object, the indexes will range from 0 to <code>limit-start</code>.<p>
|
||||
*
|
||||
* This is used after calling <code>ubidi_setPara()</code>
|
||||
* for a paragraph, and after line-breaking on that paragraph.
|
||||
* It is not necessary if the paragraph is treated as a single line.<p>
|
||||
*
|
||||
* After line-breaking, rules (L1) and (L2) for the treatment of
|
||||
* trailing WS and for reordering are performed on
|
||||
* a <code>UBiDi</code> object that represents a line.<p>
|
||||
*
|
||||
* <strong>Important: </strong><code>pLineBiDi</code> shares data with
|
||||
* <code>pParaBiDi</code>.
|
||||
* You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>.
|
||||
* In other words, you must destroy or reuse the <code>UBiDi</code> object for a line
|
||||
* before the object for its parent paragraph.
|
||||
*
|
||||
* @param pParaBiDi is the parent paragraph object.
|
||||
*
|
||||
* @param start is the line's first index into the paragraph text.
|
||||
*
|
||||
* @param limit is just behind the line's last index into the paragraph text
|
||||
* (its last index +1).<br>
|
||||
* It must be <code>0<=start<=limit<=</code>paragraph length.
|
||||
*
|
||||
* @param pLineBiDi is the object that will now represent a line of the paragraph.
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see ubidi_setPara
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_setLine(const UBiDi *pParaBiDi,
|
||||
UTextOffset start, UTextOffset limit,
|
||||
UBiDi *pLineBiDi,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Get the directionality of the text.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @return A <code>UBIDI_XXX</code> value that indicates if the entire text
|
||||
* represented by this object is unidirectional,
|
||||
* and which direction, or if it is mixed-directional.
|
||||
*
|
||||
* @see UBiDiDirection
|
||||
*/
|
||||
U_CAPI UBiDiDirection U_EXPORT2
|
||||
ubidi_getDirection(const UBiDi *pBiDi);
|
||||
|
||||
/**
|
||||
* Get the length of the text.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @return The length of the text that the UBiDi object was created for.
|
||||
*/
|
||||
U_CAPI UTextOffset U_EXPORT2
|
||||
ubidi_getLength(const UBiDi *pBiDi);
|
||||
|
||||
/**
|
||||
* Get the paragraph level of the text.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @return The paragraph level.
|
||||
*
|
||||
* @see UBiDiLevel
|
||||
*/
|
||||
U_CAPI UBiDiLevel U_EXPORT2
|
||||
ubidi_getParaLevel(const UBiDi *pBiDi);
|
||||
|
||||
/**
|
||||
* Get the level for one character.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @param charIndex the index of a character.
|
||||
*
|
||||
* @return The level for the character at charIndex.
|
||||
*
|
||||
* @see UBiDiLevel
|
||||
*/
|
||||
U_CAPI UBiDiLevel U_EXPORT2
|
||||
ubidi_getLevelAt(const UBiDi *pBiDi, UTextOffset charIndex);
|
||||
|
||||
/**
|
||||
* Get an array of levels for each character.<p>
|
||||
*
|
||||
* Note that this function may allocate memory under some
|
||||
* circumstances, unlike <code>ubidi_getLevelAt()</code>.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @return The levels array for the text,
|
||||
* or <code>NULL</code> if an error occurs.
|
||||
*
|
||||
* @see UBiDiLevel
|
||||
*/
|
||||
U_CAPI const UBiDiLevel * U_EXPORT2
|
||||
ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Get a logical run.
|
||||
* This function returns information about a run and is used
|
||||
* to retrieve runs in logical order.<p>
|
||||
* This is especially useful for line-breaking on a paragraph.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @param logicalStart is the first character of the run.
|
||||
*
|
||||
* @param pLogicalLimit will receive the limit of the run.
|
||||
* The l-value that you point to here may be the
|
||||
* same expression (variable) as the one for
|
||||
* <code>logicalStart</code>.
|
||||
* This pointer can be <code>NULL</code> if this
|
||||
* value is not necessary.
|
||||
*
|
||||
* @param pLevel will receive the level of the run.
|
||||
* This pointer can be <code>NULL</code> if this
|
||||
* value is not necessary.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_getLogicalRun(const UBiDi *pBiDi, UTextOffset logicalStart,
|
||||
UTextOffset *pLogicalLimit, UBiDiLevel *pLevel);
|
||||
|
||||
/**
|
||||
* Get the number of runs.
|
||||
* This function may invoke the actual reordering on the
|
||||
* <code>UBiDi</code> object, after <code>ubidi_setPara()</code>
|
||||
* may have resolved only the levels of the text. Therefore,
|
||||
* <code>ubidi_countRuns()</code> may have to allocate memory,
|
||||
* and may fail doing so.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @return The number of runs.
|
||||
*/
|
||||
U_CAPI UTextOffset U_EXPORT2
|
||||
ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Get one run's logical start, length, and directionality,
|
||||
* which can be 0 for LTR or 1 for RTL.
|
||||
* In an RTL run, the character at the logical start is
|
||||
* visually on the right of the displayed run.
|
||||
* The length is the number of characters in the run.<p>
|
||||
* <code>ubidi_countRuns()</code> should be called
|
||||
* before the runs are retrieved.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @param runIndex is the number of the run in visual order, in the
|
||||
* range <code>[0..ubidi_countRuns(pBiDi)-1]</code>.
|
||||
*
|
||||
* @param pLogicalStart is the first logical character index in the text.
|
||||
* The pointer may be <code>NULL</code> if this index is not needed.
|
||||
*
|
||||
* @param pLength is the number of characters (at least one) in the run.
|
||||
* The pointer may be <code>NULL</code> if this is not needed.
|
||||
*
|
||||
* @return the directionality of the run,
|
||||
* <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>,
|
||||
* never <code>UBIDI_MIXED</code>.
|
||||
*
|
||||
* @see ubidi_countRuns
|
||||
*
|
||||
* Example:
|
||||
* <pre>
|
||||
* UTextOffset i, count=ubidi_countRuns(pBiDi),
|
||||
* logicalStart, visualIndex=0, length;
|
||||
* for(i=0; i<count; ++i) {
|
||||
* if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) {
|
||||
* do { // LTR
|
||||
* show_char(text[logicalStart++], visualIndex++);
|
||||
* } while(--length>0);
|
||||
* } else {
|
||||
* logicalStart+=length; // logicalLimit
|
||||
* do { // RTL
|
||||
* show_char(text[--logicalStart], visualIndex++);
|
||||
* } while(--length>0);
|
||||
* }
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* Note that in right-to-left runs, code like this places
|
||||
* modifier letters before base characters and second surrogates
|
||||
* before first ones.
|
||||
*/
|
||||
U_CAPI UBiDiDirection U_EXPORT2
|
||||
ubidi_getVisualRun(UBiDi *pBiDi, UTextOffset runIndex,
|
||||
UTextOffset *pLogicalStart, UTextOffset *pLength);
|
||||
|
||||
/**
|
||||
* Get the visual position from a logical text position.
|
||||
* If such a mapping is used many times on the same
|
||||
* <code>UBiDi</code> object, then calling
|
||||
* <code>ubidi_getLogicalMap()</code> is more efficient.<p>
|
||||
*
|
||||
* Note that in right-to-left runs, this mapping places
|
||||
* modifier letters before base characters and second surrogates
|
||||
* before first ones.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @param logicalIndex is the index of a character in the text.
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @return The visual position of this character.
|
||||
*
|
||||
* @see ubidi_getLogicalMap
|
||||
* @see ubidi_getLogicalIndex
|
||||
*/
|
||||
U_CAPI UTextOffset U_EXPORT2
|
||||
ubidi_getVisualIndex(UBiDi *pBiDi, UTextOffset logicalIndex, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Get the logical text position from a visual position.
|
||||
* If such a mapping is used many times on the same
|
||||
* <code>UBiDi</code> object, then calling
|
||||
* <code>ubidi_getVisualMap()</code> is more efficient.<p>
|
||||
*
|
||||
* This is the inverse function to <code>ubidi_getVisualIndex()</code>.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @param visualIndex is the visual position of a character.
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @return The index of this character in the text.
|
||||
*
|
||||
* @see ubidi_getVisualMap
|
||||
* @see ubidi_getVisualIndex
|
||||
*/
|
||||
U_CAPI UTextOffset U_EXPORT2
|
||||
ubidi_getLogicalIndex(UBiDi *pBiDi, UTextOffset visualIndex, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Get a logical-to-visual index map (array) for the characters in the UBiDi
|
||||
* (paragraph or line) object.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @param indexMap is a pointer to an array of <code>ubidi_getLength()</code>
|
||||
* indexes which will reflect the reordering of the characters.
|
||||
* The array does not need to be initialized.<p>
|
||||
* The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.<p>
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see ubidi_getVisualMap
|
||||
* @see ubidi_getVisualIndex
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_getLogicalMap(UBiDi *pBiDi, UTextOffset *indexMap, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Get a visual-to-logical index map (array) for the characters in the UBiDi
|
||||
* (paragraph or line) object.
|
||||
*
|
||||
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
|
||||
*
|
||||
* @param indexMap is a pointer to an array of <code>ubidi_getLength()</code>
|
||||
* indexes which will reflect the reordering of the characters.
|
||||
* The array does not need to be initialized.<p>
|
||||
* The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.<p>
|
||||
*
|
||||
* @param pErrorCode must be a valid pointer to an error code value,
|
||||
* which must not indicate a failure before the function call.
|
||||
*
|
||||
* @see ubidi_getLogicalMap
|
||||
* @see ubidi_getLogicalIndex
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_getVisualMap(UBiDi *pBiDi, UTextOffset *indexMap, UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* This is a convenience function that does not use a UBiDi object.
|
||||
* It is intended to be used for when an application has determined the levels
|
||||
* of objects (character sequences) and just needs to have them reordered (L2).
|
||||
* This is equivalent to using <code>ubidi_getLogicalMap</code> on a
|
||||
* <code>UBiDi</code> object.
|
||||
*
|
||||
* @param levels is an array with <code>length</code> levels that have been determined by
|
||||
* the application.
|
||||
*
|
||||
* @param length is the number of levels in the array, or, semantically,
|
||||
* the number of objects to be reordered.
|
||||
* It must be <code>length>0</code>.
|
||||
*
|
||||
* @param indexMap is a pointer to an array of <code>length</code>
|
||||
* indexes which will reflect the reordering of the characters.
|
||||
* The array does not need to be initialized.<p>
|
||||
* The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_reorderLogical(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap);
|
||||
|
||||
/**
|
||||
* This is a convenience function that does not use a UBiDi object.
|
||||
* It is intended to be used for when an application has determined the levels
|
||||
* of objects (character sequences) and just needs to have them reordered (L2).
|
||||
* This is equivalent to using <code>ubidi_getVisualMap</code> on a
|
||||
* <code>UBiDi</code> object.
|
||||
*
|
||||
* @param levels is an array with <code>length</code> levels that have been determined by
|
||||
* the application.
|
||||
*
|
||||
* @param length is the number of levels in the array, or, semantically,
|
||||
* the number of objects to be reordered.
|
||||
* It must be <code>length>0</code>.
|
||||
*
|
||||
* @param indexMap is a pointer to an array of <code>length</code>
|
||||
* indexes which will reflect the reordering of the characters.
|
||||
* The array does not need to be initialized.<p>
|
||||
* The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_reorderVisual(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap);
|
||||
|
||||
/**
|
||||
* Invert an index map.
|
||||
* The one-to-one index mapping of the first map is inverted and written to
|
||||
* the second one.
|
||||
*
|
||||
* @param srcMap is an array with <code>length</code> indexes
|
||||
* which define the original mapping.
|
||||
*
|
||||
* @param destMap is an array with <code>length</code> indexes
|
||||
* which will be filled with the inverse mapping.
|
||||
*
|
||||
* @param length is the length of each array.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
ubidi_invertMap(const UTextOffset *srcMap, UTextOffset *destMap, UTextOffset length);
|
||||
|
||||
/**
|
||||
* @name Sample code for the ICU BIDI API
|
||||
*
|
||||
* <h2>Rendering a paragraph with the ICU BiDi API</h2>
|
||||
*
|
||||
* This is (hypothetical) sample code that illustrates
|
||||
* how the ICU BiDi API could be used to render a paragraph of text.
|
||||
* Rendering code depends highly on the graphics system,
|
||||
* therefore this sample code must make a lot of assumptions,
|
||||
* which may or may not match any existing graphics system's properties.
|
||||
*
|
||||
* <p>The basic assumptions are:</p>
|
||||
* <ul>
|
||||
* <li>Rendering is done from left to right on a horizontal line.</li>
|
||||
* <li>A run of single-style, unidirectional text can be rendered at once.</li>
|
||||
* <li>Such a run of text is passed to the graphics system with
|
||||
* characters (code units) in logical order.</li>
|
||||
* <li>The line-breaking algorithm is very complicated
|
||||
* and Locale-dependent -
|
||||
* and therefore its implementation omitted from this sample code.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <pre>
|
||||
* #include "unicode/ubidi.h"
|
||||
*
|
||||
* typedef enum {
|
||||
* styleNormal=0, styleSelected=1,
|
||||
* styleBold=2, styleItalics=4,
|
||||
* styleSuper=8, styleSub=16
|
||||
* } Style;
|
||||
*
|
||||
* typedef struct { UTextOffset limit; Style style; } StyleRun;
|
||||
*
|
||||
* int getTextWidth(const UChar *text, UTextOffset start, UTextOffset limit,
|
||||
* const StyleRun *styleRuns, int styleRunCount);
|
||||
*
|
||||
* // set *pLimit and *pStyleRunLimit for a line
|
||||
* // from text[start] and from styleRuns[styleRunStart]
|
||||
* // using ubidi_getLogicalRun(para, ...)
|
||||
* void getLineBreak(const UChar *text, UTextOffset start, UTextOffset *pLimit,
|
||||
* UBiDi *para,
|
||||
* const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit,
|
||||
* int *pLineWidth);
|
||||
*
|
||||
* // render runs on a line sequentially, always from left to right
|
||||
*
|
||||
* // prepare rendering a new line
|
||||
* void startLine(UBiDiDirection textDirection, int lineWidth);
|
||||
*
|
||||
* // render a run of text and advance to the right by the run width
|
||||
* // the text[start..limit-1] is always in logical order
|
||||
* void renderRun(const UChar *text, UTextOffset start, UTextOffset limit,
|
||||
* UBiDiDirection textDirection, Style style);
|
||||
*
|
||||
* // We could compute a cross-product
|
||||
* // from the style runs with the directional runs
|
||||
* // and then reorder it.
|
||||
* // Instead, here we iterate over each run type
|
||||
* // and render the intersections -
|
||||
* // with shortcuts in simple (and common) cases.
|
||||
* // renderParagraph() is the main function.
|
||||
*
|
||||
* // render a directional run with
|
||||
* // (possibly) multiple style runs intersecting with it
|
||||
* void renderDirectionalRun(const UChar *text,
|
||||
* UTextOffset start, UTextOffset limit,
|
||||
* UBiDiDirection direction,
|
||||
* const StyleRun *styleRuns, int styleRunCount) {
|
||||
* int i;
|
||||
*
|
||||
* // iterate over style runs
|
||||
* if(direction==UBIDI_LTR) {
|
||||
* int styleLimit;
|
||||
*
|
||||
* for(i=0; i<styleRunCount; ++i) {
|
||||
* styleLimit=styleRun[i].limit;
|
||||
* if(start<styleLimit) {
|
||||
* if(styleLimit>limit) { styleLimit=limit; }
|
||||
* renderRun(text, start, styleLimit,
|
||||
* direction, styleRun[i].style);
|
||||
* if(styleLimit==limit) { break; }
|
||||
* start=styleLimit;
|
||||
* }
|
||||
* }
|
||||
* } else {
|
||||
* int styleStart;
|
||||
*
|
||||
* for(i=styleRunCount-1; i>=0; --i) {
|
||||
* if(i>0) {
|
||||
* styleStart=styleRun[i-1].limit;
|
||||
* } else {
|
||||
* styleStart=0;
|
||||
* }
|
||||
* if(limit>=styleStart) {
|
||||
* if(styleStart<start) { styleStart=start; }
|
||||
* renderRun(text, styleStart, limit,
|
||||
* direction, styleRun[i].style);
|
||||
* if(styleStart==start) { break; }
|
||||
* limit=styleStart;
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* // the line object represents text[start..limit-1]
|
||||
* void renderLine(UBiDi *line, const UChar *text,
|
||||
* UTextOffset start, UTextOffset limit,
|
||||
* const StyleRun *styleRuns, int styleRunCount) {
|
||||
* UBiDiDirection direction=ubidi_getDirection(line);
|
||||
* if(direction!=UBIDI_MIXED) {
|
||||
* // unidirectional
|
||||
* if(styleRunCount<=1) {
|
||||
* renderRun(text, start, limit, direction, styleRuns[0].style);
|
||||
* } else {
|
||||
* renderDirectionalRun(text, start, limit,
|
||||
* direction, styleRuns, styleRunCount);
|
||||
* }
|
||||
* } else {
|
||||
* // mixed-directional
|
||||
* UTextOffset count, i, length;
|
||||
* UBiDiLevel level;
|
||||
*
|
||||
* count=ubidi_countRuns(para, pErrorCode);
|
||||
* if(U_SUCCESS(*pErrorCode)) {
|
||||
* if(styleRunCount<=1) {
|
||||
* Style style=styleRuns[0].style;
|
||||
*
|
||||
* // iterate over directional runs
|
||||
* for(i=0; i<count; ++i) {
|
||||
* direction=ubidi_getVisualRun(para, i, &start, &length);
|
||||
* renderRun(text, start, start+length, direction, style);
|
||||
* }
|
||||
* } else {
|
||||
* UTextOffset j;
|
||||
*
|
||||
* // iterate over both directional and style runs
|
||||
* for(i=0; i<count; ++i) {
|
||||
* direction=ubidi_getVisualRun(line, i, &start, &length);
|
||||
* renderDirectionalRun(text, start, start+length,
|
||||
* direction, styleRuns, styleRunCount);
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* void renderParagraph(const UChar *text, UTextOffset length,
|
||||
* UBiDiDirection textDirection,
|
||||
* const StyleRun *styleRuns, int styleRunCount,
|
||||
* int lineWidth,
|
||||
* UErrorCode *pErrorCode) {
|
||||
* UBiDi *para;
|
||||
*
|
||||
* if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) {
|
||||
* return;
|
||||
* }
|
||||
*
|
||||
* para=ubidi_openSized(length, 0, pErrorCode);
|
||||
* if(para==NULL) { return; }
|
||||
*
|
||||
* ubidi_setPara(para, text, length,
|
||||
* textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR,
|
||||
* NULL, pErrorCode);
|
||||
* if(U_SUCCESS(*pErrorCode)) {
|
||||
* UBiDiLevel paraLevel=1&ubidi_getParaLevel(para);
|
||||
* StyleRun styleRun={ length, styleNormal };
|
||||
* int width;
|
||||
*
|
||||
* if(styleRuns==NULL || styleRunCount<=0) {
|
||||
* styleRunCount=1;
|
||||
* styleRuns=&styleRun;
|
||||
* }
|
||||
*
|
||||
* // assume styleRuns[styleRunCount-1].limit>=length
|
||||
*
|
||||
* width=getTextWidth(text, 0, length, styleRuns, styleRunCount);
|
||||
* if(width<=lineWidth) {
|
||||
* // everything fits onto one line
|
||||
*
|
||||
* // prepare rendering a new line from either left or right
|
||||
* startLine(paraLevel, width);
|
||||
*
|
||||
* renderLine(para, text, 0, length,
|
||||
* styleRuns, styleRunCount);
|
||||
* } else {
|
||||
* UBiDi *line;
|
||||
*
|
||||
* // we need to render several lines
|
||||
* line=ubidi_openSized(length, 0, pErrorCode);
|
||||
* if(line!=NULL) {
|
||||
* UTextOffset start=0, limit;
|
||||
* int styleRunStart=0, styleRunLimit;
|
||||
*
|
||||
* for(;;) {
|
||||
* limit=length;
|
||||
* styleRunLimit=styleRunCount;
|
||||
* getLineBreak(text, start, &limit, para,
|
||||
* styleRuns, styleRunStart, &styleRunLimit,
|
||||
* &width);
|
||||
* ubidi_setLine(para, start, limit, line, pErrorCode);
|
||||
* if(U_SUCCESS(*pErrorCode)) {
|
||||
* // prepare rendering a new line
|
||||
* // from either left or right
|
||||
* startLine(paraLevel, width);
|
||||
*
|
||||
* renderLine(line, text, start, limit,
|
||||
* styleRuns+styleRunStart,
|
||||
* styleRunLimit-styleRunStart);
|
||||
* }
|
||||
* if(limit==length) { break; }
|
||||
* start=limit;
|
||||
* styleRunStart=styleRunLimit-1;
|
||||
* if(start>=styleRuns[styleRunStart].limit) {
|
||||
* ++styleRunStart;
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* ubidi_close(line);
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* ubidi_close(para);
|
||||
* }
|
||||
* </pre>
|
||||
*/
|
||||
BIDI_SAMPLE_CODE
|
||||
/*@{*/
|
||||
/*@}*/
|
||||
|
||||
/*@}*/
|
||||
|
||||
#endif
|
841
icu4c/source/common/unicode/uchar.h
Normal file
841
icu4c/source/common/unicode/uchar.h
Normal file
|
@ -0,0 +1,841 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1997-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
* File UCHAR.H
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 04/02/97 aliu Creation.
|
||||
* 03/29/99 helena Updated for C APIs.
|
||||
* 4/15/99 Madhu Updated for C Implementation and Javadoc
|
||||
* 5/20/99 Madhu Added the function u_getVersion()
|
||||
* 8/19/1999 srl Upgraded scripts to Unicode 3.0
|
||||
* 8/27/1999 schererm UCharDirection constants: U_...
|
||||
* 11/11/1999 weiv added u_isalnum(), cleaned comments
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef UCHAR_H
|
||||
#define UCHAR_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
/*===========================================================================*/
|
||||
/* Unicode version number */
|
||||
/*===========================================================================*/
|
||||
#define UNICODE_VERSION "3.0.0"
|
||||
|
||||
/**
|
||||
* The Unicode C API allows you to query the properties associated with individual
|
||||
* Unicode character values.
|
||||
* <p>
|
||||
* The Unicode character information, provided implicitly by the
|
||||
* Unicode character encoding standard, includes information about the script
|
||||
* (for example, symbols or control characters) to which the character belongs,
|
||||
* as well as semantic information such as whether a character is a digit or
|
||||
* uppercase, lowercase, or uncased.
|
||||
* <P>
|
||||
*/
|
||||
|
||||
/**
|
||||
* Constants.
|
||||
*/
|
||||
|
||||
/**
|
||||
* The minimum value a UChar can have. The lowest value a
|
||||
* UChar can have is 0x0000.
|
||||
*/
|
||||
static UChar UCHAR_MIN_VALUE;
|
||||
/**
|
||||
* The maximum value a UChar can have. The greatest value a
|
||||
* UChar can have is 0xffff.
|
||||
*/
|
||||
|
||||
static UChar UCHAR_MAX_VALUE;
|
||||
|
||||
/**
|
||||
* Data for enumerated Unicode general category types
|
||||
*/
|
||||
enum UCharCategory
|
||||
{
|
||||
/** */
|
||||
U_UNASSIGNED = 0,
|
||||
/** */
|
||||
U_UPPERCASE_LETTER = 1,
|
||||
/** */
|
||||
U_LOWERCASE_LETTER = 2,
|
||||
/** */
|
||||
U_TITLECASE_LETTER = 3,
|
||||
/** */
|
||||
U_MODIFIER_LETTER = 4,
|
||||
/** */
|
||||
U_OTHER_LETTER = 5,
|
||||
/** */
|
||||
U_NON_SPACING_MARK = 6,
|
||||
/** */
|
||||
U_ENCLOSING_MARK = 7,
|
||||
/** */
|
||||
U_COMBINING_SPACING_MARK = 8,
|
||||
/** */
|
||||
U_DECIMAL_DIGIT_NUMBER = 9,
|
||||
/** */
|
||||
U_LETTER_NUMBER = 10,
|
||||
/** */
|
||||
U_OTHER_NUMBER = 11,
|
||||
/** */
|
||||
U_SPACE_SEPARATOR = 12,
|
||||
/** */
|
||||
U_LINE_SEPARATOR = 13,
|
||||
/** */
|
||||
U_PARAGRAPH_SEPARATOR = 14,
|
||||
/** */
|
||||
U_CONTROL_CHAR = 15,
|
||||
/** */
|
||||
U_FORMAT_CHAR = 16,
|
||||
/** */
|
||||
U_PRIVATE_USE_CHAR = 17,
|
||||
/** */
|
||||
U_SURROGATE = 18,
|
||||
/** */
|
||||
U_DASH_PUNCTUATION = 19,
|
||||
/** */
|
||||
U_START_PUNCTUATION = 20,
|
||||
/** */
|
||||
U_END_PUNCTUATION = 21,
|
||||
/** */
|
||||
U_CONNECTOR_PUNCTUATION = 22,
|
||||
/** */
|
||||
U_OTHER_PUNCTUATION = 23,
|
||||
/** */
|
||||
U_MATH_SYMBOL = 24,
|
||||
/** */
|
||||
U_CURRENCY_SYMBOL = 25,
|
||||
/** */
|
||||
U_MODIFIER_SYMBOL = 26,
|
||||
/** */
|
||||
U_OTHER_SYMBOL = 27,
|
||||
/** */
|
||||
U_INITIAL_PUNCTUATION = 28,
|
||||
/** */
|
||||
U_FINAL_PUNCTUATION = 29,
|
||||
/** */
|
||||
U_GENERAL_OTHER_TYPES = 30,
|
||||
/** */
|
||||
U_CHAR_CATEGORY_COUNT
|
||||
};
|
||||
|
||||
typedef enum UCharCategory UCharCategory;
|
||||
/**
|
||||
* This specifies the language directional property of a character set.
|
||||
*/
|
||||
enum UCharDirection {
|
||||
/** */
|
||||
U_LEFT_TO_RIGHT = 0,
|
||||
/** */
|
||||
U_RIGHT_TO_LEFT = 1,
|
||||
/** */
|
||||
U_EUROPEAN_NUMBER = 2,
|
||||
/** */
|
||||
U_EUROPEAN_NUMBER_SEPARATOR = 3,
|
||||
/** */
|
||||
U_EUROPEAN_NUMBER_TERMINATOR = 4,
|
||||
/** */
|
||||
U_ARABIC_NUMBER = 5,
|
||||
/** */
|
||||
U_COMMON_NUMBER_SEPARATOR = 6,
|
||||
/** */
|
||||
U_BLOCK_SEPARATOR = 7,
|
||||
/** */
|
||||
U_SEGMENT_SEPARATOR = 8,
|
||||
/** */
|
||||
U_WHITE_SPACE_NEUTRAL = 9,
|
||||
/** */
|
||||
U_OTHER_NEUTRAL = 10,
|
||||
/** */
|
||||
U_LEFT_TO_RIGHT_EMBEDDING = 11,
|
||||
/** */
|
||||
U_LEFT_TO_RIGHT_OVERRIDE = 12,
|
||||
/** */
|
||||
U_RIGHT_TO_LEFT_ARABIC = 13,
|
||||
/** */
|
||||
U_RIGHT_TO_LEFT_EMBEDDING = 14,
|
||||
/** */
|
||||
U_RIGHT_TO_LEFT_OVERRIDE = 15,
|
||||
/** */
|
||||
U_POP_DIRECTIONAL_FORMAT = 16,
|
||||
/** */
|
||||
U_DIR_NON_SPACING_MARK = 17,
|
||||
/** */
|
||||
U_BOUNDARY_NEUTRAL = 18,
|
||||
/** */
|
||||
U_CHAR_DIRECTION_COUNT
|
||||
};
|
||||
|
||||
typedef enum UCharDirection UCharDirection;
|
||||
/**
|
||||
* Script range as defined in the Unicode standard.
|
||||
*/
|
||||
|
||||
/* Generated from Unicode Data files */
|
||||
enum UCharScript {
|
||||
/* Script names */
|
||||
/** */
|
||||
U_BASIC_LATIN,
|
||||
/** */
|
||||
U_LATIN_1_SUPPLEMENT,
|
||||
/** */
|
||||
U_LATIN_EXTENDED_A,
|
||||
/** */
|
||||
U_LATIN_EXTENDED_B,
|
||||
/** */
|
||||
U_IPA_EXTENSIONS,
|
||||
/** */
|
||||
U_SPACING_MODIFIER_LETTERS,
|
||||
/** */
|
||||
U_COMBINING_DIACRITICAL_MARKS,
|
||||
/** */
|
||||
U_GREEK,
|
||||
/** */
|
||||
U_CYRILLIC,
|
||||
/** */
|
||||
U_ARMENIAN,
|
||||
/** */
|
||||
U_HEBREW,
|
||||
/** */
|
||||
U_ARABIC,
|
||||
/** */
|
||||
U_SYRIAC,
|
||||
/** */
|
||||
U_THAANA,
|
||||
/** */
|
||||
U_DEVANAGARI,
|
||||
/** */
|
||||
U_BENGALI,
|
||||
/** */
|
||||
U_GURMUKHI,
|
||||
/** */
|
||||
U_GUJARATI,
|
||||
/** */
|
||||
U_ORIYA,
|
||||
/** */
|
||||
U_TAMIL,
|
||||
/** */
|
||||
U_TELUGU,
|
||||
/** */
|
||||
U_KANNADA,
|
||||
/** */
|
||||
U_MALAYALAM,
|
||||
/** */
|
||||
U_SINHALA,
|
||||
/** */
|
||||
U_THAI,
|
||||
/** */
|
||||
U_LAO,
|
||||
/** */
|
||||
U_TIBETAN,
|
||||
/** */
|
||||
U_MYANMAR,
|
||||
/** */
|
||||
U_GEORGIAN,
|
||||
/** */
|
||||
U_HANGUL_JAMO,
|
||||
/** */
|
||||
U_ETHIOPIC,
|
||||
/** */
|
||||
U_CHEROKEE,
|
||||
/** */
|
||||
U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
|
||||
/** */
|
||||
U_OGHAM,
|
||||
/** */
|
||||
U_RUNIC,
|
||||
/** */
|
||||
U_KHMER,
|
||||
/** */
|
||||
U_MONGOLIAN,
|
||||
/** */
|
||||
U_LATIN_EXTENDED_ADDITIONAL,
|
||||
/** */
|
||||
U_GREEK_EXTENDED,
|
||||
/** */
|
||||
U_GENERAL_PUNCTUATION,
|
||||
/** */
|
||||
U_SUPERSCRIPTS_AND_SUBSCRIPTS,
|
||||
/** */
|
||||
U_CURRENCY_SYMBOLS,
|
||||
/** */
|
||||
U_COMBINING_MARKS_FOR_SYMBOLS,
|
||||
/** */
|
||||
U_LETTERLIKE_SYMBOLS,
|
||||
/** */
|
||||
U_NUMBER_FORMS,
|
||||
/** */
|
||||
U_ARROWS,
|
||||
/** */
|
||||
U_MATHEMATICAL_OPERATORS,
|
||||
/** */
|
||||
U_MISCELLANEOUS_TECHNICAL,
|
||||
/** */
|
||||
U_CONTROL_PICTURES,
|
||||
/** */
|
||||
U_OPTICAL_CHARACTER_RECOGNITION,
|
||||
/** */
|
||||
U_ENCLOSED_ALPHANUMERICS,
|
||||
/** */
|
||||
U_BOX_DRAWING,
|
||||
/** */
|
||||
U_BLOCK_ELEMENTS,
|
||||
/** */
|
||||
U_GEOMETRIC_SHAPES,
|
||||
/** */
|
||||
U_MISCELLANEOUS_SYMBOLS,
|
||||
/** */
|
||||
U_DINGBATS,
|
||||
/** */
|
||||
U_BRAILLE_PATTERNS,
|
||||
/** */
|
||||
U_CJK_RADICALS_SUPPLEMENT,
|
||||
/** */
|
||||
U_KANGXI_RADICALS,
|
||||
/** */
|
||||
U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
|
||||
/** */
|
||||
U_CJK_SYMBOLS_AND_PUNCTUATION,
|
||||
/** */
|
||||
U_HIRAGANA,
|
||||
/** */
|
||||
U_KATAKANA,
|
||||
/** */
|
||||
U_BOPOMOFO,
|
||||
/** */
|
||||
U_HANGUL_COMPATIBILITY_JAMO,
|
||||
/** */
|
||||
U_KANBUN,
|
||||
/** */
|
||||
U_BOPOMOFO_EXTENDED,
|
||||
/** */
|
||||
U_ENCLOSED_CJK_LETTERS_AND_MONTHS,
|
||||
/** */
|
||||
U_CJK_COMPATIBILITY,
|
||||
/** */
|
||||
U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
|
||||
/** */
|
||||
U_CJK_UNIFIED_IDEOGRAPHS,
|
||||
/** */
|
||||
U_YI_SYLLABLES,
|
||||
/** */
|
||||
U_YI_RADICALS,
|
||||
/** */
|
||||
U_HANGUL_SYLLABLES,
|
||||
/** */
|
||||
U_HIGH_SURROGATES,
|
||||
/** */
|
||||
U_HIGH_PRIVATE_USE_SURROGATES,
|
||||
/** */
|
||||
U_LOW_SURROGATES,
|
||||
/** */
|
||||
U_PRIVATE_USE_AREA,
|
||||
/** */
|
||||
U_CJK_COMPATIBILITY_IDEOGRAPHS,
|
||||
/** */
|
||||
U_ALPHABETIC_PRESENTATION_FORMS,
|
||||
/** */
|
||||
U_ARABIC_PRESENTATION_FORMS_A,
|
||||
/** */
|
||||
U_COMBINING_HALF_MARKS,
|
||||
/** */
|
||||
U_CJK_COMPATIBILITY_FORMS,
|
||||
/** */
|
||||
U_SMALL_FORM_VARIANTS,
|
||||
/** */
|
||||
U_ARABIC_PRESENTATION_FORMS_B,
|
||||
/** */
|
||||
U_SPECIALS,
|
||||
/** */
|
||||
U_HALFWIDTH_AND_FULLWIDTH_FORMS,
|
||||
/** */
|
||||
U_CHAR_SCRIPT_COUNT,
|
||||
/** */
|
||||
U_NO_SCRIPT=U_CHAR_SCRIPT_COUNT
|
||||
};
|
||||
typedef enum UCharScript UCharScript;
|
||||
|
||||
/**
|
||||
* Values returned by the u_getCellWidth() function.
|
||||
*/
|
||||
enum UCellWidth
|
||||
{
|
||||
/** */
|
||||
U_ZERO_WIDTH = 0,
|
||||
/** */
|
||||
U_HALF_WIDTH = 1,
|
||||
/** */
|
||||
U_FULL_WIDTH = 2,
|
||||
/** */
|
||||
U_NEUTRAL_WIDTH = 3,
|
||||
/** */
|
||||
U_CELL_WIDTH_COUNT
|
||||
};
|
||||
|
||||
typedef enum UCellWidth UCellWidth;
|
||||
|
||||
/**
|
||||
* Selector constants for u_charName().
|
||||
* <code>u_charName() returns either the "modern" name of a
|
||||
* Unicode character or the name that was defined in
|
||||
* Unicode version 1.0, before the Unicode standard merged
|
||||
* with ISO-10646.
|
||||
*
|
||||
* @see u_charName()
|
||||
*/
|
||||
enum UCharNameChoice {
|
||||
U_UNICODE_CHAR_NAME,
|
||||
U_UNICODE_10_CHAR_NAME,
|
||||
U_CHAR_NAME_CHOICE_COUNT
|
||||
};
|
||||
|
||||
typedef enum UCharNameChoice UCharNameChoice;
|
||||
|
||||
/**
|
||||
* Functions to classify characters.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Determines whether the specified UChar is a lowercase character
|
||||
* according to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is lowercase; false otherwise.
|
||||
* @see UNICODE_VERSION
|
||||
* @see u_isupper()
|
||||
* @see u_istitle()
|
||||
* @see u_islower()
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_islower(UChar c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is an uppercase character
|
||||
* according to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is uppercase; false otherwise.
|
||||
* @see u_islower()
|
||||
* @see u_istitle
|
||||
* @see u_tolower()
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isupper(UChar c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is a titlecase character
|
||||
* according to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is titlecase; false otherwise.
|
||||
* @see u_isupper()
|
||||
* @see u_islower()
|
||||
* @see u_totitle()
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_istitle(UChar c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is a digit according to Unicode
|
||||
* 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is a digit; false otherwise.
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isdigit(UChar c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is an alphanumeric character
|
||||
* (letter or digit)according to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is a letter or a digit; false otherwise.
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isalnum(UChar c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified numeric value is actually a defined character
|
||||
* according to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character has a defined Unicode meaning; false otherwise.
|
||||
*
|
||||
* @see u_isdigit()
|
||||
* @see u_isalpha()
|
||||
* @see u_isalnum()
|
||||
* @see u_isupper()
|
||||
* @see u_islower()
|
||||
* @see u_istitle()
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isdefined(UChar c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is a letter
|
||||
* according to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is a letter; false otherwise.
|
||||
*
|
||||
* @see u_isdigit()
|
||||
* @see u_isalnum()
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isalpha(UChar c);
|
||||
|
||||
/**
|
||||
* Determines if the specified character is a space character or not.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the character is a space character; false otherwise.
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isspace(UChar c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is a control character or not.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the Unicode character is a control character; false otherwise.
|
||||
*
|
||||
* @see u_isprint()
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_iscntrl(UChar c);
|
||||
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is a printable character according
|
||||
* to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the Unicode character is a printable character; false otherwise.
|
||||
*
|
||||
* @see u_iscntrl()
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isprint(UChar c);
|
||||
|
||||
/**
|
||||
* Determines whether the specified character is of the base form according
|
||||
* to Unicode 2.1.2.
|
||||
*
|
||||
* @param ch the character to be tested
|
||||
* @return true if the Unicode character is of the base form; false otherwise.
|
||||
*
|
||||
* @see u_isalpha()
|
||||
* @see u_isdigit()
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isbase(UChar c);
|
||||
/**
|
||||
* Returns the linguistic direction property of a character.
|
||||
* <P>
|
||||
* Returns the linguistic direction property of a character.
|
||||
* For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
|
||||
* property.
|
||||
* @see UCharDirection
|
||||
*/
|
||||
U_CAPI UCharDirection U_EXPORT2
|
||||
u_charDirection(UChar c);
|
||||
|
||||
/**
|
||||
* Returns a value indicating the display-cell width of the character
|
||||
* when used in Asian text, according to the Unicode standard (see p. 6-130
|
||||
* of The Unicode Standard, Version 2.0). The results for various characters
|
||||
* are as follows:
|
||||
* <P>
|
||||
* ZERO_WIDTH: Characters which are considered to take up no display-cell space:
|
||||
* control characters
|
||||
* format characters
|
||||
* line and paragraph separators
|
||||
* non-spacing marks
|
||||
* combining Hangul jungseong
|
||||
* combining Hangul jongseong
|
||||
* unassigned Unicode values
|
||||
* <P>
|
||||
* HALF_WIDTH: Characters which take up half a cell in standard Asian text:
|
||||
* all characters in the General Scripts Area except combining Hangul choseong
|
||||
* and the characters called out specifically above as ZERO_WIDTH
|
||||
* alphabetic and Arabic presentation forms
|
||||
* halfwidth CJK punctuation
|
||||
* halfwidth Katakana
|
||||
* halfwidth Hangul Jamo
|
||||
* halfwidth forms, arrows, and shapes
|
||||
* <P>
|
||||
* FULL_WIDTH: Characters which take up a full cell in standard Asian text:
|
||||
* combining Hangul choseong
|
||||
* all characters in the CJK Phonetics and Symbols Area
|
||||
* all characters in the CJK Ideographs Area
|
||||
* all characters in the Hangul Syllables Area
|
||||
* CJK compatibility ideographs
|
||||
* CJK compatibility forms
|
||||
* small form variants
|
||||
* fullwidth ASCII
|
||||
* fullwidth punctuation and currency signs
|
||||
* <P>
|
||||
* NEUTRAL: Characters whose cell width is context-dependent:
|
||||
* all characters in the Symbols Area, except those specifically called out above
|
||||
* all characters in the Surrogates Area
|
||||
* all charcaters in the Private Use Area
|
||||
* <P>
|
||||
* For Korean text, this algorithm should work properly with properly normalized Korean
|
||||
* text. Precomposed Hangul syllables and non-combining jamo are all considered full-
|
||||
* width characters. For combining jamo, we treat we treat choseong (initial consonants)
|
||||
* as double-width characters and junseong (vowels) and jongseong (final consonants)
|
||||
* as non-spacing marks. This will work right in text that uses the precomposed
|
||||
* choseong characters instead of teo choseong characters in a row, and which uses the
|
||||
* choseong filler character at the beginning of syllables that don't have an initial
|
||||
* consonant. The results may be slightly off with Korean text following different
|
||||
* conventions.
|
||||
*/
|
||||
U_CAPI uint16_t U_EXPORT2
|
||||
u_charCellWidth(UChar c);
|
||||
|
||||
/**
|
||||
* Returns a value indicating a character category according to Unicode
|
||||
* 2.1.2.
|
||||
* @param c the character to be tested
|
||||
* @return a value of type int, the character category.
|
||||
* @see UCharCategory
|
||||
*/
|
||||
U_CAPI int8_t U_EXPORT2
|
||||
u_charType(UChar c);
|
||||
|
||||
/**
|
||||
* Retrives the decimal numeric value of a digit character.
|
||||
*
|
||||
* @param c the digit character for which to get the numeric value
|
||||
* @return the numeric value of ch in decimal radix. This method returns
|
||||
* -1 if ch is not a valid digit character.
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
u_charDigitValue(UChar c);
|
||||
|
||||
/**
|
||||
* Returns the script associated with a character.
|
||||
*
|
||||
* @see #UCharScript
|
||||
*/
|
||||
U_CAPI UCharScript U_EXPORT2
|
||||
u_charScript(UChar ch);
|
||||
|
||||
/**
|
||||
* Retrieve the name of a Unicode character.
|
||||
* Depending on <code>nameChoice</code>, the character name written
|
||||
* into the buffer is the "modern" name or the name that was defined
|
||||
* in Unicode version 1.0.
|
||||
* The name contains only "invariant" characters
|
||||
* like A-Z, 0-9, space, and '-'.
|
||||
*
|
||||
* @param code The character (code point) for which to get the name.
|
||||
* It must be <code>0<=code<0x10ffff</code>.
|
||||
* @param nameChoice Selector for which name to get.
|
||||
* @param buffer Destination address for copying the name.
|
||||
* @param bufferLength <code>==sizeof(buffer)</code>
|
||||
* @param pErrorCode Pointer to a UErrorCode variable;
|
||||
* check for <code>U_SUCCESS()</code> after <code>u_charName()</code>
|
||||
* returns.
|
||||
*
|
||||
* @see UCharNameChoice
|
||||
*/
|
||||
U_CAPI UTextOffset U_EXPORT2
|
||||
u_charName(uint32_t code, UCharNameChoice nameChoice,
|
||||
char *buffer, UTextOffset bufferLength,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* The following functions are java specific.
|
||||
*/
|
||||
/**
|
||||
* A convenience method for determining if a Unicode character
|
||||
* is allowed to start in a Unicode identifier.
|
||||
* A character may start a Unicode identifier if and only if
|
||||
* it is a letter.
|
||||
*
|
||||
* @param c the Unicode character.
|
||||
* @return TRUE if the character may start a Unicode identifier;
|
||||
* FALSE otherwise.
|
||||
* @see u_isalpha
|
||||
* @see u_isIDPart
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isIDStart(UChar c);
|
||||
/**
|
||||
* A convenience method for determining if a Unicode character
|
||||
* may be part of a Unicode identifier other than the starting
|
||||
* character.
|
||||
* <P>
|
||||
* A character may be part of a Unicode identifier if and only if
|
||||
* it is one of the following:
|
||||
* <ul>
|
||||
* <li> a letter
|
||||
* <li> a connecting punctuation character (such as "_").
|
||||
* <li> a digit
|
||||
* <li> a numeric letter (such as a Roman numeral character)
|
||||
* <li> a combining mark
|
||||
* <li> a non-spacing mark
|
||||
* <li> an ignorable control character
|
||||
* </ul>
|
||||
*
|
||||
* @param c the Unicode character.
|
||||
* @return TRUE if the character may be part of a Unicode identifier;
|
||||
* FALSE otherwise.
|
||||
* @see u_isIDIgnorable
|
||||
* @see u_isIDStart
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isIDPart(UChar c);
|
||||
/**
|
||||
* A convenience method for determining if a Unicode character
|
||||
* should be regarded as an ignorable character
|
||||
* in a Unicode identifier.
|
||||
* <P>
|
||||
* The following Unicode characters are ignorable in a
|
||||
* Unicode identifier:
|
||||
* <table>
|
||||
* <tr><td>0x0000 through 0x0008,</td>
|
||||
* <td>ISO control characters that</td></tr>
|
||||
* <tr><td>0x000E through 0x001B,</td> <td>are not whitespace</td></tr>
|
||||
* <tr><td>and 0x007F through 0x009F</td></tr>
|
||||
* <tr><td>0x200C through 0x200F</td> <td>join controls</td></tr>
|
||||
* <tr><td>0x200A through 0x200E</td> <td>bidirectional controls</td></tr>
|
||||
* <tr><td>0x206A through 0x206F</td> <td>format controls</td></tr>
|
||||
* <tr><td>0xFEFF</td> <td>zero-width no-break space</td></tr>
|
||||
* </table>
|
||||
*
|
||||
* @param c the Unicode character.
|
||||
* @return TRUE if the character may be part of a Unicode identifier;
|
||||
* FALSE otherwise.
|
||||
* @see u_isIDPart
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isIDIgnorable(UChar c);
|
||||
/**
|
||||
* A convenience method for determining if a Unicode character
|
||||
* is allowed as the first character in a Java identifier.
|
||||
* <P>
|
||||
* A character may start a Java identifier if and only if
|
||||
* it is one of the following:
|
||||
* <ul>
|
||||
* <li> a letter
|
||||
* <li> a currency symbol (such as "$")
|
||||
* <li> a connecting punctuation symbol (such as "_").
|
||||
* </ul>
|
||||
*
|
||||
* @param c the Unicode character.
|
||||
* @return TRUE if the character may start a Java identifier;
|
||||
* FALSE otherwise.
|
||||
* @see u_isJavaIDPart
|
||||
* @see u_isalpha
|
||||
* @see u_isIDStart
|
||||
*/
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isJavaIDStart(UChar c);
|
||||
/**
|
||||
* A convenience method for determining if a Unicode character
|
||||
* may be part of a Java identifier other than the starting
|
||||
* character.
|
||||
* <P>
|
||||
* A character may be part of a Java identifier if and only if
|
||||
* it is one of the following:
|
||||
* <ul>
|
||||
* <li> a letter
|
||||
* <li> a currency symbol (such as "$")
|
||||
* <li> a connecting punctuation character (such as "_").
|
||||
* <li> a digit
|
||||
* <li> a numeric letter (such as a Roman numeral character)
|
||||
* <li> a combining mark
|
||||
* <li> a non-spacing mark
|
||||
* <li> an ignorable control character
|
||||
* </ul>
|
||||
*
|
||||
* @param c the Unicode character.
|
||||
* @return TRUE if the character may be part of a Unicode identifier;
|
||||
* FALSE otherwise.
|
||||
* @see u_isIDIgnorable
|
||||
* @see u_isJavaIDStart
|
||||
* @see u_isalpha
|
||||
* @see u_isdigit
|
||||
* @see u_isIDPart
|
||||
*/
|
||||
|
||||
U_CAPI bool_t U_EXPORT2
|
||||
u_isJavaIDPart(UChar c);
|
||||
|
||||
/**
|
||||
* Functions to change character case.
|
||||
*/
|
||||
|
||||
/**
|
||||
* The given character is mapped to its lowercase equivalent according to
|
||||
* Unicode 2.1.2; if the character has no lowercase equivalent, the character
|
||||
* itself is returned.
|
||||
* <P>
|
||||
* A character has a lowercase equivalent if and only if a lowercase mapping
|
||||
* is specified for the character in the Unicode 2.1.2 attribute table.
|
||||
* <P>
|
||||
* u_tolower() only deals with the general letter case conversion.
|
||||
* For language specific case conversion behavior, use ustrToUpper().
|
||||
* For example, the case conversion for dot-less i and dotted I in Turkish,
|
||||
* or for final sigma in Greek.
|
||||
*
|
||||
* @param ch the character to be converted
|
||||
* @return the lowercase equivalent of the character, if any;
|
||||
* otherwise the character itself.
|
||||
*/
|
||||
U_CAPI UChar U_EXPORT2
|
||||
u_tolower(UChar c);
|
||||
|
||||
/**
|
||||
* The given character is mapped to its uppercase equivalent according to Unicode
|
||||
* 2.1.2; if the character has no uppercase equivalent, the character itself is
|
||||
* returned.
|
||||
* <P>
|
||||
* u_toupper() only deals with the general letter case conversion.
|
||||
* For language specific case conversion behavior, use ustrToUpper().
|
||||
* For example, the case conversion for dot-less i and dotted I in Turkish,
|
||||
* or ess-zed (i.e., "sharp S") in German.
|
||||
*
|
||||
* @param ch the character to be converted
|
||||
* @return the uppercase equivalent of the character, if any;
|
||||
* otherwise the character itself.
|
||||
*/
|
||||
U_CAPI UChar U_EXPORT2
|
||||
u_toupper(UChar c);
|
||||
/**
|
||||
* The given character is mapped to its titlecase equivalent according to Unicode
|
||||
* 2.1.2. There are only four Unicode characters that are truly titlecase forms
|
||||
* that are distinct from uppercase forms. As a rule, if a character has no
|
||||
* true titlecase equivalent, its uppercase equivalent is returned.
|
||||
* <P>
|
||||
* A character has a titlecase equivalent if and only if a titlecase mapping
|
||||
* is specified for the character in the Unicode 2.1.2 data.
|
||||
*
|
||||
* @param ch the character to be converted
|
||||
* @return the titlecase equivalent of the character, if any;
|
||||
* otherwise the character itself.
|
||||
*/
|
||||
U_CAPI UChar U_EXPORT2
|
||||
u_totitle(UChar c);
|
||||
|
||||
/**
|
||||
* The function is used to get the Unicode standard Version that is used.
|
||||
*
|
||||
* @return the Unicode stabdard Version number
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
u_getVersion(void);
|
||||
|
||||
#endif /*_UCHAR*/
|
||||
/*eof*/
|
142
icu4c/source/common/unicode/uchriter.h
Normal file
142
icu4c/source/common/unicode/uchriter.h
Normal file
|
@ -0,0 +1,142 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1998-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*/
|
||||
|
||||
#ifndef UCHRITER_H
|
||||
#define UCHRITER_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/chariter.h"
|
||||
|
||||
|
||||
/**
|
||||
* A concrete subclass of CharacterIterator that iterates over the
|
||||
* characters in a UnicodeString. It's possible not only to create an
|
||||
* iterator that iterates over an entire UnicodeString, but also to
|
||||
* create only that iterates over only a subrange of a UnicodeString
|
||||
* (iterators over different subranges of the same UnicodeString don't
|
||||
* compare equal). */
|
||||
class U_COMMON_API UCharCharacterIterator : public CharacterIterator {
|
||||
public:
|
||||
/**
|
||||
* Create an iterator over the UnicodeString referred to by "text".
|
||||
* The iteration range is the whole string, and the starting
|
||||
* position is 0. */
|
||||
UCharCharacterIterator(const UChar* text, int32_t len);
|
||||
|
||||
/**
|
||||
* Copy constructor. The new iterator iterates over the same range
|
||||
* of the same string as "that", and its initial position is the
|
||||
* same as "that"'s current position. */
|
||||
UCharCharacterIterator(const UCharCharacterIterator& that);
|
||||
|
||||
/**
|
||||
* Destructor. */
|
||||
~UCharCharacterIterator();
|
||||
|
||||
/**
|
||||
* Assignment operator. *this is altered to iterate over the sane
|
||||
* range of the same string as "that", and refers to the same
|
||||
* character within that string as "that" does. */
|
||||
UCharCharacterIterator&
|
||||
operator=(const UCharCharacterIterator& that);
|
||||
|
||||
/**
|
||||
* Returns true if the iterators iterate over the same range of the
|
||||
* same string and are pointing at the same character. */
|
||||
virtual bool_t operator==(const CharacterIterator& that) const;
|
||||
|
||||
/**
|
||||
* Generates a hash code for this iterator. */
|
||||
virtual int32_t hashCode(void) const;
|
||||
|
||||
/**
|
||||
* Returns a new StringCharacterIterator referring to the same
|
||||
* character in the same range of the same string as this one. The
|
||||
* caller must delete the new iterator. */
|
||||
virtual CharacterIterator* clone(void) const;
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the first character in its
|
||||
* iteration range, and returns that character, */
|
||||
virtual UChar first(void);
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the last character in its iteration
|
||||
* range, and returns that character. */
|
||||
virtual UChar last(void);
|
||||
|
||||
/**
|
||||
* Sets the iterator to refer to the "position"-th character in the
|
||||
* UnicodeString the iterator refers to, and returns that character.
|
||||
* If the index is outside the iterator's iteration range, the
|
||||
* behavior of the iterator is undefined. */
|
||||
virtual UChar setIndex(UTextOffset pos);
|
||||
|
||||
/**
|
||||
* Returns the character the iterator currently refers to. */
|
||||
virtual UChar current(void) const;
|
||||
|
||||
/**
|
||||
* Advances to the next character in the iteration range (toward
|
||||
* last()), and returns that character. If there are no more
|
||||
* characters to return, returns DONE. */
|
||||
virtual UChar next(void);
|
||||
|
||||
/**
|
||||
* Advances to the previous character in the iteration rance (toward
|
||||
* first()), and returns that character. If there are no more
|
||||
* characters to return, returns DONE. */
|
||||
virtual UChar previous(void);
|
||||
|
||||
/**
|
||||
* Returns the numeric index of the first character in this
|
||||
* iterator's iteration range. */
|
||||
virtual UTextOffset startIndex(void) const;
|
||||
|
||||
/**
|
||||
* Returns the numeric index of the character immediately BEYOND the
|
||||
* last character in this iterator's iteration range. */
|
||||
virtual UTextOffset endIndex(void) const;
|
||||
|
||||
/**
|
||||
* Returns the numeric index in the underlying UnicodeString of the
|
||||
* character the iterator currently refers to (i.e., the character
|
||||
* returned by current()). */
|
||||
virtual UTextOffset getIndex(void) const;
|
||||
|
||||
/**
|
||||
* Copies the UnicodeString under iteration into the UnicodeString
|
||||
* referred to by "result". Even if this iterator iterates across
|
||||
* only a part of this string, the whole string is copied. @param
|
||||
* result Receives a copy of the text under iteration. */
|
||||
virtual void getText(UnicodeString& result);
|
||||
|
||||
/**
|
||||
* Return a class ID for this object (not really public) */
|
||||
virtual UClassID getDynamicClassID(void) const
|
||||
{ return getStaticClassID(); }
|
||||
|
||||
/**
|
||||
* Return a class ID for this class (not really public) */
|
||||
static UClassID getStaticClassID(void)
|
||||
{ return (UClassID)(&fgClassID); }
|
||||
|
||||
private:
|
||||
UCharCharacterIterator();
|
||||
|
||||
const UChar* text;
|
||||
UTextOffset pos;
|
||||
UTextOffset begin;
|
||||
UTextOffset end;
|
||||
|
||||
static char fgClassID;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
605
icu4c/source/common/unicode/ucnv.h
Normal file
605
icu4c/source/common/unicode/ucnv.h
Normal file
|
@ -0,0 +1,605 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* ucnv.h:
|
||||
* External APIs for the ICU's codeset conversion library
|
||||
* Bertrand A. Damiba
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 04/04/99 helena Fixed internal header inclusion.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @name Character Conversion C API
|
||||
*
|
||||
* Character Conversion C API documentation is still under construction.
|
||||
* Please check for updates soon.
|
||||
*/
|
||||
|
||||
#ifndef UCNV_H
|
||||
#define UCNV_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ucnv_bld.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
typedef void (*UConverterToUCallback) (UConverter *,
|
||||
UChar **,
|
||||
const UChar *,
|
||||
const char **,
|
||||
const char *,
|
||||
int32_t* offsets,
|
||||
bool_t,
|
||||
UErrorCode *);
|
||||
|
||||
typedef void (*UConverterFromUCallback) (UConverter *,
|
||||
char **,
|
||||
const char *,
|
||||
const UChar **,
|
||||
const UChar *,
|
||||
int32_t* offsets,
|
||||
bool_t,
|
||||
UErrorCode *);
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
|
||||
/**
|
||||
* Creates a UConverter object with the names specified as a C string.
|
||||
* The actual name will be resolved with the alias file.
|
||||
* if <TT>NULL</TT> is passed for the converter name, it will create one with the
|
||||
* getDefaultName return value.
|
||||
* @param converterName : name of the uconv table
|
||||
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND</TT>
|
||||
* @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
|
||||
* @see ucnv_openU
|
||||
* @see ucnv_openCCSID
|
||||
* @see ucnv_close
|
||||
*/
|
||||
|
||||
U_CAPI
|
||||
UConverter* U_EXPORT2 ucnv_open (const char *converterName,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/**
|
||||
* Creates a Unicode converter with the names specified as unicode string. The name should be limited to
|
||||
* the ASCII-7 alphanumerics range. The actual name will be resolved with the alias file.
|
||||
* if <TT>NULL</TT> is passed for the converter name, it will create one with the
|
||||
* getDefaultName return value.
|
||||
* @param converterName : name of the uconv table in a zero terminated Unicode string
|
||||
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND</TT>
|
||||
* @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
|
||||
* @see ucnv_open
|
||||
* @see ucnv_openCCSID
|
||||
* @see ucnv_close
|
||||
*/
|
||||
U_CAPI UConverter* U_EXPORT2 ucnv_openU (const UChar * name,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Creates a UConverter object using a CCSID number.
|
||||
*
|
||||
* @param codepage : codepage # of the uconv table
|
||||
* @param platform : codepage's platform (now only <TT>IBM</TT> supported)
|
||||
* @param err error status <TT>U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND</TT>
|
||||
* @return the created Unicode converter object, or <TT>NULL</TT> if and error occured
|
||||
* @see ucnv_open
|
||||
* @see ucnv_openU
|
||||
* @see ucnv_close
|
||||
*/
|
||||
|
||||
U_CAPI UConverter* U_EXPORT2 ucnv_openCCSID (int32_t codepage,
|
||||
UConverterPlatform platform,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/**
|
||||
* Deletes the unicode converter.
|
||||
*
|
||||
* @param converter the converter object to be deleted
|
||||
* @see ucnv_open
|
||||
* @see ucnv_openU
|
||||
* @see ucnv_openCCSID
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 ucnv_close (UConverter * converter);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Fills in the output parameter, subChars, with the substitution characters
|
||||
* as multiple bytes.
|
||||
*
|
||||
* @param converter: the Unicode converter
|
||||
* @param subChars: the subsitution characters
|
||||
* @param len: on input the capacity of subChars, on output the number of bytes copied to it
|
||||
* @param err: the outgoing error status code.
|
||||
* If the substitution character array is too small, an
|
||||
* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
|
||||
* @see ucnv_setSubstChars
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_getSubstChars (const UConverter * converter,
|
||||
char *subChars,
|
||||
int8_t * len,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* Sets the substitution chars when converting from unicode to a codepage. The
|
||||
* substitution is specified as a string of 1-4 bytes, and may contain <TT>NULL</TT> byte.
|
||||
* The fill-in parameter err will get the error status on return.
|
||||
* @param converter the Unicode converter
|
||||
* @param subChars the substitution character byte sequence we want set
|
||||
* @param len the number of bytes in subChars
|
||||
* @param err the error status code. <TT>U_INDEX_OUTOFBOUNDS_ERROR </TT> if
|
||||
* len is bigger than the maximum number of bytes allowed in subchars
|
||||
* @see ucnv_getSubstChars
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_setSubstChars (UConverter * converter,
|
||||
const char *subChars,
|
||||
int8_t len,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Fills in the output parameter, errBytes, with the error characters from the
|
||||
* last failing conversion.
|
||||
*
|
||||
* @param converter: the Unicode converter
|
||||
* @param errBytes: the bytes in error
|
||||
* @param len: on input the capacity of errBytes, on output the number of bytes copied to it
|
||||
* @param err: the outgoing error status code.
|
||||
* If the substitution character array is too small, an
|
||||
* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_getInvalidChars (const UConverter * converter,
|
||||
char *errBytes,
|
||||
int8_t * len,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/**
|
||||
* Fills in the output parameter, errChars, with the error characters from the
|
||||
* last failing conversion.
|
||||
*
|
||||
* @param converter: the Unicode converter
|
||||
* @param errUChars: the bytes in error
|
||||
* @param len: on input the capacity of errUChars, on output the number of UChars copied to it
|
||||
* @param err: the outgoing error status code.
|
||||
* If the substitution character array is too small, an
|
||||
* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_getInvalidUChars (const UConverter * converter,
|
||||
char *errUChars,
|
||||
int8_t * len,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* Resets the state of stateful conversion to the default state. This is used
|
||||
* in the case of error to restart a conversion from a known default state.
|
||||
* it will also empty the internal output buffers.
|
||||
* @param converter the Unicode converter
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
ucnv_reset (UConverter * converter);
|
||||
|
||||
/**
|
||||
* Returns the maximum length of bytes used by a character. This varies between 1 and 4
|
||||
* @param converter the Unicode converter
|
||||
* @return the maximum number of bytes allowed by this particular converter
|
||||
* @see ucnv_getMinCharSize
|
||||
*/
|
||||
U_CAPI int8_t U_EXPORT2
|
||||
ucnv_getMaxCharSize (const UConverter * converter);
|
||||
|
||||
|
||||
/**
|
||||
* Returns the minimum byte length for characters in this codepage. This is either
|
||||
* 1 or 2 for all supported codepages.
|
||||
* @param converter the Unicode converter
|
||||
* @return the minimum number of bytes allowed by this particular converter
|
||||
* @see ucnv_getMaxCharSize
|
||||
*/
|
||||
U_CAPI int8_t U_EXPORT2
|
||||
ucnv_getMinCharSize (const UConverter * converter);
|
||||
|
||||
|
||||
/**
|
||||
* Returns the display name of the converter passed in based on the Locale passed in,
|
||||
* in the case the locale contains no display name, the internal ASCII name will be
|
||||
* filled in.
|
||||
*
|
||||
* @param converter the Unicode converter.
|
||||
* @param displayLocale is the specific Locale we want to localised for
|
||||
* @param displayName user provided buffer to be filled in
|
||||
* @param displayNameCapacty size of displayName Buffer
|
||||
* @param err: outgoing error code.
|
||||
* @return displayNameLength number of UChar needed in displayName
|
||||
* @see ucnv_getName
|
||||
*/
|
||||
U_CAPI
|
||||
int32_t U_EXPORT2 ucnv_getDisplayName (const UConverter * converter,
|
||||
const char *displayLocale,
|
||||
UChar * displayName,
|
||||
int32_t displayNameCapacity,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* Gets the name of the converter (zero-terminated).
|
||||
* the name will be the internal name of the converter, the lifetime of the returned
|
||||
* string will be that of the converter passed to this function.
|
||||
* @param converter the Unicode converter
|
||||
* @param err UErrorCode status
|
||||
* @return the internal name of the converter
|
||||
* @see ucnv_getDisplayName
|
||||
*/
|
||||
U_CAPI
|
||||
const char * U_EXPORT2 ucnv_getName (const UConverter * converter, UErrorCode * err);
|
||||
|
||||
|
||||
/**
|
||||
* Gets a codepage number associated with the converter. This is not guaranteed
|
||||
* to be the one used to create the converter. Some converters do not represent
|
||||
* IBM registered codepages and return zero for the codepage number.
|
||||
* The error code fill-in parameter indicates if the codepage number is available.
|
||||
* @param converter the Unicode converter
|
||||
* @param err the error status code.
|
||||
* the converter is <TT>NULL</TT> or if converter's data table is <TT>NULL</TT>.
|
||||
* @return If any error occurrs, -1 will be returned otherwise, the codepage number
|
||||
* will be returned
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
ucnv_getCCSID (const UConverter * converter,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* Gets a codepage platform associated with the converter. Currently, only <TT>IBM</TT> is supported
|
||||
* The error code fill-in parameter indicates if the codepage number is available.
|
||||
* @param converter the Unicode converter
|
||||
* @param err the error status code.
|
||||
* the converter is <TT>NULL</TT> or if converter's data table is <TT>NULL</TT>.
|
||||
* @return The codepage platform
|
||||
*/
|
||||
U_CAPI UConverterPlatform U_EXPORT2
|
||||
ucnv_getPlatform (const UConverter * converter,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
*Gets the type of conversion associated with the converter
|
||||
* e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022, EBCDIC_STATEFUL, LATIN_1
|
||||
* @param converter: a valid, opened converter
|
||||
* @return the type of the converter
|
||||
*/
|
||||
U_CAPI UConverterType U_EXPORT2
|
||||
ucnv_getType (const UConverter * converter);
|
||||
|
||||
/**
|
||||
*Gets the "starter" bytes for the converters of type MBCS
|
||||
*will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in
|
||||
*is not MBCS.
|
||||
*fills in an array of boolean, with the value of the byte as offset to the array.
|
||||
*At return, if TRUE is found in at offset 0x20, it means that the byte 0x20 is a starter byte
|
||||
*in this converter.
|
||||
* @param converter: a valid, opened converter of type MBCS
|
||||
* @param starters: an array of size 256 to be filled in
|
||||
* @param err: an array of size 256 to be filled in
|
||||
* @see ucnv_getType
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 ucnv_getStarters(const UConverter* converter,
|
||||
bool_t starters[256],
|
||||
UErrorCode* err);
|
||||
|
||||
|
||||
/**
|
||||
* Gets the current calback function used by the converter when illegal or invalid sequence found.
|
||||
*
|
||||
* @param converter the unicode converter
|
||||
* @return a pointer to the callback function
|
||||
* @see ucnv_setToUCallBack
|
||||
*/
|
||||
U_CAPI UConverterToUCallback U_EXPORT2
|
||||
ucnv_getToUCallBack (const UConverter * converter);
|
||||
|
||||
/**
|
||||
* Gets the current callback function used by the converter when illegal or invalid sequence found.
|
||||
*
|
||||
* @param converter the unicode converter
|
||||
* @return a pointer to the callback function
|
||||
* @see ucnv_setFromUCallBack
|
||||
*/
|
||||
U_CAPI UConverterFromUCallback U_EXPORT2
|
||||
ucnv_getFromUCallBack (const UConverter * converter);
|
||||
|
||||
/**
|
||||
* Gets the current callback function used by the converter when illegal or invalid sequence found.
|
||||
*
|
||||
* @param converter the unicode converter
|
||||
* @param action the callback function we want to set.
|
||||
* @param err The error code status
|
||||
* @return the previously assigned callback function pointer
|
||||
* @see ucnv_getToUCallBack
|
||||
*/
|
||||
U_CAPI UConverterToUCallback U_EXPORT2
|
||||
ucnv_setToUCallBack (UConverter * converter,
|
||||
UConverterToUCallback action,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* Gets the current callback function used by the converter when illegal or invalid sequence found.
|
||||
*
|
||||
* @param converter the unicode converter
|
||||
* @param action the callback function we want to set.
|
||||
* @param err The error code status
|
||||
* @return the previously assigned callback function pointer
|
||||
* @see ucnv_getFromUCallBack
|
||||
*/
|
||||
U_CAPI UConverterFromUCallback U_EXPORT2
|
||||
ucnv_setFromUCallBack (UConverter * converter,
|
||||
UConverterFromUCallback action,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/**
|
||||
* Transcodes an array of unicode characters to an array of codepage characters.
|
||||
* The source pointer is an I/O parameter, it starts out pointing where the function is
|
||||
* to begin transcoding, and ends up pointing after the first sequence of the bytes
|
||||
* that it encounters that are semantically invalid.
|
||||
* if ucnv_setToUCallBack is called with an action other than <TT>STOP</TT>
|
||||
* before a call is made to this API, <TT>consumed</TT> and <TT>source</TT> should point to the same place
|
||||
* (unless <TT>target</TT> ends with an imcomplete sequence of bytes and <TT>flush</TT> is <TT>FALSE</TT>).
|
||||
* the <TT>target</TT> buffer buffer needs to be a least the size of the maximum # of bytes per characters
|
||||
* allowed by the target codepage.
|
||||
* @param converter the Unicode converter
|
||||
* @param converter the Unicode converter
|
||||
* @param target : I/O parameter. Input : Points to the beginning of the buffer to copy
|
||||
* codepage characters to. Output : points to after the last codepage character copied
|
||||
* to <TT>target</TT>.
|
||||
* @param targetLimit the pointer to the end of the <TT>target</TT> array
|
||||
* @param source the source Unicode character array
|
||||
* @param sourceLimit the pointer to the end of the source array
|
||||
* @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
|
||||
* of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
|
||||
* e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
|
||||
* For output data carried across calls -1 will be placed for offsets.
|
||||
* @param flush <TT>TRUE</TT> if the buffer is the last buffer of the conversion interation
|
||||
* and the conversion will finish with this call, FALSE otherwise.
|
||||
* @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be returned if the
|
||||
* converter is <TT>NULL</TT>.
|
||||
* @see ucnv_fromUChars
|
||||
* @see ucnv_convert
|
||||
* @see ucnv_getMinCharSize
|
||||
* @see ucnv_setToUCallBack
|
||||
*/
|
||||
|
||||
U_CAPI
|
||||
void U_EXPORT2 ucnv_fromUnicode (UConverter * converter,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/**
|
||||
* Converts an array of codepage characters into an array of unicode characters.
|
||||
* The source pointer is an I/O parameter, it starts out pointing at the place
|
||||
* to begin translating, and ends up pointing after the first sequence of the bytes
|
||||
* that it encounters that are semantically invalid.
|
||||
* if ucnv_setFromUCallBack is called with an action other than STOP
|
||||
* before a call is made to this API, consumed and source should point to the same place
|
||||
* (unless target ends with an imcomplete sequence of bytes and flush is FALSE).
|
||||
* @param converter the Unicode converter
|
||||
* @param target : I/O parameter. Input : Points to the beginning of the buffer to copy
|
||||
* Unicode characters to. Output : points to after the last UChar copied to target.
|
||||
* @param targetLimit the pointer to the end of the target array
|
||||
* @param source the source codepage character array
|
||||
* @param sourceLimit the pointer to the end of the source array
|
||||
* @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
|
||||
* of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
|
||||
* e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
|
||||
* For output data carried across calls -1 will be placed for offsets.
|
||||
* @param flush TRUE if the buffer is the last buffer and the conversion will finish
|
||||
* in this call, FALSE otherwise.
|
||||
* @param err the error code status <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be returned if the
|
||||
* converter is <TT>NULL</TT>, or if <TT>targetLimit</TT> and <TT>sourceLimit</TT> are misaligned.
|
||||
* @see ucnv_toUChars
|
||||
* @see ucnv_getNextUChar
|
||||
* @see ucnv_convert
|
||||
* @see ucnv_setFromUCallBack
|
||||
*/
|
||||
|
||||
U_CAPI
|
||||
void U_EXPORT2 ucnv_toUnicode (UConverter * converter,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/**
|
||||
* Transcodes the source Unicode string to the target string in a codepage encoding
|
||||
* with the specified Unicode converter. For example, if a Unicode to/from JIS
|
||||
* converter is specified, the source string in Unicode will be transcoded to JIS
|
||||
* encoding. The result will be stored in JIS encoding.
|
||||
* if any problems during conversion are encountered it will SUBSTITUTE with the default (initial)
|
||||
* substitute characters.
|
||||
* This function is a more convenient but less efficient version of \Ref{ucnv_fromUnicode}.
|
||||
* @param converter the Unicode converter
|
||||
* @param source the <TT>source</TT> Unicode string (zero Terminated)
|
||||
* @param target the <TT>target</TT> string in codepage encoding (<STRONG>not zero-terminated</STRONG> because some
|
||||
* codepage do not use '\0' as a string terminator
|
||||
* @param targetCapacity Input the number of bytes available in the <TT>target</TT> buffer
|
||||
* @param err the error status code.
|
||||
* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned if the
|
||||
* the # of bytes provided are not enough for transcoding.
|
||||
* <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> is returned if the converter is <TT>NULL</TT> or the source or target string is empty.
|
||||
* <TT>U_BUFFER_OVERFLOW_ERROR</TT> when <TT>targetSize</TT> turns out to be bigger than <TT>targetCapacity</TT>
|
||||
* @return number of bytes needed in target, regardless of <TT>targetCapacity</TT>
|
||||
* @see ucnv_fromUnicode
|
||||
* @see ucnv_convert
|
||||
*/
|
||||
U_CAPI
|
||||
int32_t U_EXPORT2 ucnv_fromUChars (const UConverter * converter,
|
||||
char *target,
|
||||
int32_t targetCapacity,
|
||||
const UChar * source,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Transcode the source string in codepage encoding to the target string in
|
||||
* Unicode encoding. For example, if a Unicode to/from JIS
|
||||
* converter is specified, the source string in JIS encoding will be transcoded
|
||||
* to Unicode and placed into a provided target buffer.
|
||||
* if any problems during conversion are encountered it will SUBSTITUTE with the Unicode REPLACEMENT char
|
||||
* We recomment, the size of the target buffer needs to be at least as long as the maximum # of bytes per char
|
||||
* in this character set.
|
||||
* A zero-terminator will be placed at the end of the target buffer
|
||||
* This function is a more convenient but less efficient version of \Ref{ucnv_toUnicode}.
|
||||
* @param converter the Unicode converter
|
||||
* @param source the source string in codepage encoding
|
||||
* @param target the target string in Unicode encoding
|
||||
* @param targetCapacity capacity of the target buffer
|
||||
* @param sourceSize : Number of bytes in <TT>source</TT> to be transcoded
|
||||
* @param err the error status code
|
||||
* <TT>U_MEMORY_ALLOCATION_ERROR</TT> will be returned if the
|
||||
* the internal process buffer cannot be allocated for transcoding.
|
||||
* <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> is returned if the converter is <TT>NULL</TT> or
|
||||
* if the source or target string is empty.
|
||||
* <TT>U_BUFFER_OVERFLOW_ERROR</TT> when the input buffer is prematurely exhausted and targetSize non-<TT>NULL</TT>.
|
||||
* @return the number of UChar needed in target (including the zero terminator)
|
||||
* @see ucnv_getNextUChar
|
||||
* @see ucnv_toUnicode
|
||||
* @see ucnv_convert
|
||||
*/
|
||||
U_CAPI
|
||||
int32_t U_EXPORT2 ucnv_toUChars (const UConverter * converter,
|
||||
UChar * target,
|
||||
int32_t targetCapacity,
|
||||
const char *source,
|
||||
int32_t sourceSize,
|
||||
UErrorCode * err);
|
||||
|
||||
/********************************
|
||||
* Will convert a codepage buffer one character at a time.
|
||||
* This function was written to be efficient when transcoding small amounts of data at a time.
|
||||
* In that case it will be more efficient than \Ref{ucnv_toUnicode}.
|
||||
* When converting large buffers use \Ref{ucnv_toUnicode}.
|
||||
*@param converter an open UConverter
|
||||
*@param source the address of a pointer to the codepage buffer, will be updated to point after
|
||||
*the bytes consumed in the conversion call.
|
||||
*@param points to the end of the input buffer
|
||||
*@param err fills in error status (see ucnv_toUnicode)
|
||||
*@return a UChar resulting from the partial conversion of source
|
||||
*@see ucnv_toUnicode
|
||||
*@see ucnv_toUChars
|
||||
*@see ucnv_convert
|
||||
*/
|
||||
U_CAPI
|
||||
UChar U_EXPORT2 ucnv_getNextUChar (UConverter * converter,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/**************************
|
||||
* Will convert a sequence of bytes from one codepage to another.
|
||||
* This is <STRONG>NOT AN EFFICIENT</STRONG> way to transcode.
|
||||
* use \Ref{ucnv_toUnicode} and \Ref{ucnv_fromUnicode} for efficiency
|
||||
* @param toConverterName: The name of the converter that will be used to encode the output buffer
|
||||
* @param fromConverterName: The name of the converter that will be used to decode the input buffer
|
||||
* @param target: Pointer to the output buffer to write to
|
||||
* @param targetCapacity: on input contains the capacity of target
|
||||
* @param source: Pointer to the input buffer
|
||||
* @param sourceLength: on input contains the capacity of source
|
||||
* @param err: fills in an error status
|
||||
* @return will be filled in with the number of bytes needed in target
|
||||
* @see ucnv_fromUnicode
|
||||
* @see ucnv_toUnicode
|
||||
* @see ucnv_fromUChars
|
||||
* @see ucnv_toUChars
|
||||
* @see ucnv_getNextUChar
|
||||
*/
|
||||
U_CAPI
|
||||
int32_t U_EXPORT2 ucnv_convert (const char *toConverterName,
|
||||
const char *fromConverterName,
|
||||
char *target,
|
||||
int32_t targetCapacity,
|
||||
const char *source,
|
||||
int32_t sourceLength,
|
||||
UErrorCode * err);
|
||||
|
||||
/**
|
||||
* SYSTEM API
|
||||
* Iterates through every cached converter and frees all the unused ones.
|
||||
*
|
||||
* @return the number of cached converters successfully deleted
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2 ucnv_flushCache (void);
|
||||
|
||||
|
||||
/**
|
||||
* provides a string containing the internal name (based on the alias file) of the converter.
|
||||
* given an index.
|
||||
* @param index the number of converters available on the system (<TT>[0..ucnv_countAvaiable()]</TT>)
|
||||
* @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds.
|
||||
* @see ucnv_countAvailable
|
||||
*/
|
||||
U_CAPI
|
||||
const char * U_EXPORT2 ucnv_getAvailableName (int32_t index);
|
||||
|
||||
/**
|
||||
* returns the number of available converters.
|
||||
*
|
||||
* @return the number of available converters
|
||||
* @see ucnv_getAvailableName
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2 ucnv_countAvailable (void);
|
||||
|
||||
/**
|
||||
* returns the current default converter name.
|
||||
*
|
||||
* @return returns the current default converter name;
|
||||
* if a default converter name cannot be determined,
|
||||
* then <code>NULL</code> is returned
|
||||
* @see ucnv_setDefaultName
|
||||
*/
|
||||
U_CAPI const char * U_EXPORT2 ucnv_getDefaultName (void);
|
||||
|
||||
/**
|
||||
* sets the current default converter name.
|
||||
* The lifetime of the return ptr is that of the library
|
||||
* @param name: the converter name you want as default (has to appear in alias file)
|
||||
* @see ucnv_getDefaultName
|
||||
*
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 ucnv_setDefaultName (const char *name);
|
||||
|
||||
|
||||
#endif
|
||||
/*_UCNV*/
|
215
icu4c/source/common/unicode/ucnv_bld.h
Normal file
215
icu4c/source/common/unicode/ucnv_bld.h
Normal file
|
@ -0,0 +1,215 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
*
|
||||
* ucnv_bld.h:
|
||||
* Contains all internal and external data structure definitions
|
||||
* Created & Maitained by Bertrand A. Damiba
|
||||
*
|
||||
*
|
||||
*
|
||||
* ATTENTION:
|
||||
* ---------
|
||||
* Although the data structures in this file are open and stack allocatable
|
||||
* we reserve the right to hide them in further releases.
|
||||
*/
|
||||
|
||||
#ifndef UCNV_BLD_H
|
||||
#define UCNV_BLD_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#define UCNV_MAX_SUBCHAR_LEN 4
|
||||
#define UCNV_ERROR_BUFFER_LENGTH 20
|
||||
|
||||
#ifndef UCMP16_H
|
||||
typedef struct _CompactShortArray CompactShortArray;
|
||||
#endif
|
||||
|
||||
#ifndef UCMP8_H
|
||||
typedef struct _CompactByteArray CompactByteArray;
|
||||
#endif
|
||||
|
||||
#define UCNV_IMPLEMENTED_CONVERSION_TYPES 9
|
||||
/*Sentinel Value used to check the integrity of the binary data files */
|
||||
|
||||
#define UCNV_FILE_CHECK_MARKER 0xBEDA
|
||||
|
||||
/*maximum length of the converter names */
|
||||
#define UCNV_MAX_CONVERTER_NAME_LENGTH 60
|
||||
#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
|
||||
|
||||
/*Pointer to the aforementioned file */
|
||||
#define UCNV_MAX_LINE_TEXT (UCNV_MAX_CONVERTER_NAME_LENGTH*400)
|
||||
|
||||
#define UCNV_SI 0x0F /*Shift in for EBDCDIC_STATEFUL and iso2022 states */
|
||||
#define UCNV_SO 0x0E /*Shift out for EBDCDIC_STATEFUL and iso2022 states */
|
||||
|
||||
typedef enum {
|
||||
UCNV_UNSUPPORTED_CONVERTER = -1,
|
||||
UCNV_SBCS = 0,
|
||||
UCNV_DBCS = 1,
|
||||
UCNV_MBCS = 2,
|
||||
UCNV_LATIN_1 = 3,
|
||||
UCNV_UTF8 = 4,
|
||||
UCNV_UTF16_BigEndian = 5,
|
||||
UCNV_UTF16_LittleEndian = 6,
|
||||
UCNV_EBCDIC_STATEFUL = 7,
|
||||
UCNV_ISO_2022 = 8,
|
||||
/* Number of converter types for which we have conversion routines. */
|
||||
UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES = 9,
|
||||
UCNV_JIS = 9,
|
||||
UCNV_EUC = 10,
|
||||
UCNV_GB = 11
|
||||
} UConverterType;
|
||||
|
||||
typedef enum {
|
||||
UCNV_UNKNOWN = -1,
|
||||
UCNV_IBM = 0
|
||||
} UConverterPlatform;
|
||||
|
||||
|
||||
/*Table Node Definitions */
|
||||
typedef struct
|
||||
{
|
||||
UChar *toUnicode; /* [256]; */
|
||||
CompactByteArray *fromUnicode;
|
||||
}
|
||||
UConverterSBCSTable;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
CompactShortArray *toUnicode;
|
||||
CompactShortArray *fromUnicode;
|
||||
}
|
||||
UConverterDBCSTable;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
bool_t *starters; /* [256]; */
|
||||
CompactShortArray *toUnicode;
|
||||
CompactShortArray *fromUnicode;
|
||||
}
|
||||
UConverterMBCSTable;
|
||||
|
||||
typedef union
|
||||
{
|
||||
UConverterSBCSTable sbcs;
|
||||
UConverterDBCSTable dbcs;
|
||||
UConverterMBCSTable mbcs;
|
||||
}
|
||||
UConverterTable;
|
||||
|
||||
|
||||
/*Defines the struct of a UConverterSharedData the immutable, shared part of
|
||||
*UConverter
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint32_t structSize; /* Size of this structure */
|
||||
void *dataMemory;
|
||||
uint32_t referenceCounter; /*used to count number of clients */
|
||||
char name[UCNV_MAX_CONVERTER_NAME_LENGTH]; /*internal name of the converter */
|
||||
UConverterPlatform platform; /*platform of the converter (only IBM now) */
|
||||
int32_t codepage; /*codepage # (now IBM-$codepage) */
|
||||
UConverterType conversionType; /*conversion type */
|
||||
int8_t minBytesPerChar; /*Minimum # bytes per char in this codepage */
|
||||
int8_t maxBytesPerChar; /*Maximum # bytes per char in this codepage */
|
||||
struct
|
||||
{ /*initial values of some members of the mutable part of object */
|
||||
uint32_t toUnicodeStatus;
|
||||
int8_t subCharLen;
|
||||
unsigned char subChar[UCNV_MAX_SUBCHAR_LEN];
|
||||
}
|
||||
defaultConverterValues;
|
||||
UConverterTable *table; /*Pointer to conversion data */
|
||||
}
|
||||
UConverterSharedData;
|
||||
|
||||
|
||||
/*Defines a UConverter, the lightweight mutable part the user sees */
|
||||
|
||||
U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv
|
||||
itself is compiled under C++, the linkage of the funcptrs will
|
||||
work.
|
||||
*/
|
||||
|
||||
struct UConverter
|
||||
{
|
||||
int32_t toUnicodeStatus; /*Used to internalize stream status information */
|
||||
int32_t fromUnicodeStatus;
|
||||
int8_t invalidCharLength;
|
||||
int8_t invalidUCharLength;
|
||||
int8_t pad;
|
||||
int32_t mode;
|
||||
int8_t subCharLen; /*length of the codepage specific character sequence */
|
||||
unsigned char subChar[UCNV_MAX_SUBCHAR_LEN]; /*codepage specific character sequence */
|
||||
UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store unicode data meant for
|
||||
*output stream by the Error function pointers
|
||||
*/
|
||||
unsigned char charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store codepage data meant for
|
||||
* output stream by the Error function pointers
|
||||
*/
|
||||
int8_t UCharErrorBufferLength; /*used to indicate the number of valid UChars
|
||||
*in charErrorBuffer
|
||||
*/
|
||||
int8_t charErrorBufferLength; /*used to indicate the number of valid bytes
|
||||
*in charErrorBuffer
|
||||
*/
|
||||
|
||||
UChar invalidUCharBuffer[3];
|
||||
char invalidCharBuffer[UCNV_MAX_SUBCHAR_LEN];
|
||||
/*Error function pointer called when conversion issues
|
||||
*occur during a T_UConverter_fromUnicode call
|
||||
*/
|
||||
void (*fromUCharErrorBehaviour) (struct UConverter *,
|
||||
char **,
|
||||
const char *,
|
||||
const UChar **,
|
||||
const UChar *,
|
||||
int32_t* offsets,
|
||||
bool_t,
|
||||
UErrorCode *);
|
||||
/*Error function pointer called when conversion issues
|
||||
*occur during a T_UConverter_toUnicode call
|
||||
*/
|
||||
void (*fromCharErrorBehaviour) (struct UConverter *,
|
||||
UChar **,
|
||||
const UChar *,
|
||||
const char **,
|
||||
const char *,
|
||||
int32_t* offsets,
|
||||
bool_t,
|
||||
UErrorCode *);
|
||||
|
||||
UConverterSharedData *sharedData; /*Pointer to the shared immutable part of the
|
||||
*converter object
|
||||
*/
|
||||
void *extraInfo; /*currently only used to point to a struct containing UConverter used by iso 2022
|
||||
Could be used by clients writing their own call back function to
|
||||
pass context to them
|
||||
*/
|
||||
};
|
||||
|
||||
U_CDECL_END /* end of UConverter */
|
||||
|
||||
typedef struct UConverter UConverter;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
UConverter *currentConverter;
|
||||
unsigned char escSeq2022[10];
|
||||
int8_t escSeq2022Length;
|
||||
}
|
||||
UConverterDataISO2022;
|
||||
|
||||
#define CONVERTER_FILE_EXTENSION ".cnv"
|
||||
|
||||
/*case insensitive hash key*/
|
||||
U_CAPI int32_t U_EXPORT2 uhash_hashIString(const void* name);
|
||||
|
||||
#endif /* _UCNV_BLD */
|
151
icu4c/source/common/unicode/ucnv_err.h
Normal file
151
icu4c/source/common/unicode/ucnv_err.h
Normal file
|
@ -0,0 +1,151 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
*
|
||||
* ucnv_err.h:
|
||||
* defines error behaviour functions called by T_UConverter_{from,to}Unicode
|
||||
*
|
||||
* These Functions, although public, should NEVER be called directly, they should be used as parameters to
|
||||
* the T_UConverter_setMissing{Char,Unicode}Action API, to set the behaviour of a converter
|
||||
* when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
|
||||
*
|
||||
* usage example:
|
||||
*
|
||||
* ...
|
||||
* UErrorCode err = U_ZERO_ERROR;
|
||||
* UConverter* myConverter = T_UConverter_create("ibm-949", &err);
|
||||
*
|
||||
* if (U_SUCCESS(err))
|
||||
* {
|
||||
* T_UConverter_setMissingUnicodeAction(myConverter, (MissingUnicodeAction)UCNV_FROM_U_CALLBACK_STOP, &err);
|
||||
* T_UConverter_setMissingCharAction(myConverter, (MissingCharAction)UCNV_TO_U_CALLBACK_SUBSTITUTE, &err);
|
||||
* }
|
||||
* ...
|
||||
*
|
||||
* The code above tells "myConverter" to stop when it encounters a ILLEGAL/TRUNCATED/INVALID sequences when it is used to
|
||||
* convert from Unicode -> Codepage.
|
||||
* and to substitute with a codepage specific substitutions sequence when converting from Codepage -> Unicode
|
||||
*/
|
||||
|
||||
|
||||
#ifndef UCNV_ERR_H
|
||||
#define UCNV_ERR_H
|
||||
|
||||
#include "unicode/ucnv.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
|
||||
/*Functor STOPS at the ILLEGAL_SEQUENCE */
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (UConverter * _this,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/*Functor STOPS at the ILLEGAL_SEQUENCE */
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (UConverter * _this,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
|
||||
|
||||
/*Functor SKIPs the ILLEGAL_SEQUENCE */
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (UConverter * _this,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
/* Functor Substitute the ILLEGAL SEQUENCE with the current substitution string assiciated with _this,
|
||||
* in the event target buffer is too small, it will store the extra info in the UConverter, and err
|
||||
* will be set to U_INDEX_OUTOFBOUNDS_ERROR. The next time T_UConverter_fromUnicode is called, it will
|
||||
* store the left over data in target, before transcoding the "source Stream"
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (UConverter * _this,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
/* Functor Substitute the ILLEGAL SEQUENCE with a sequence escaped codepoints corresponding to the ILLEGAL
|
||||
* SEQUENCE (format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). In the Event the Converter doesn't support the
|
||||
* characters {u,%}[A-F][0-9], it will substitute the illegal sequence with the substitution characters
|
||||
* (it will behave like the above functor).
|
||||
* in the event target buffer is too small, it will store the extra info in the UConverter, and err
|
||||
* will be set to U_INDEX_OUTOFBOUNDS_ERROR. The next time T_UConverter_fromUnicode is called, it will
|
||||
* store the left over data in target, before transcoding the "source Stream"
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (UConverter * _this,
|
||||
char **target,
|
||||
const char *targetLimit,
|
||||
const UChar ** source,
|
||||
const UChar * sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/*Functor SKIPs the ILLEGAL_SEQUENCE */
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (UConverter * _this,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
/* Functor Substitute the ILLEGAL SEQUENCE with the current substitution string assiciated with _this,
|
||||
* in the event target buffer is too small, it will store the extra info in the UConverter, and err
|
||||
* will be set to U_INDEX_OUTOFBOUNDS_ERROR. The next time T_UConverter_fromUnicode is called, it will
|
||||
* store the left over data in target, before transcoding the "source Stream"
|
||||
*/
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (UConverter * _this,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
/* Functor Substitute the ILLEGAL SEQUENCE with a sequence escaped codepoints corresponding to the
|
||||
* ILLEGAL SEQUENCE (format %XNN, e.g. "%XFF%X0A%XC8%X03").
|
||||
* in the event target buffer is too small, it will store the extra info in the UConverter, and err
|
||||
* will be set to U_INDEX_OUTOFBOUNDS_ERROR. The next time T_UConverter_fromUnicode is called, it will
|
||||
* store the left over data in target, before transcoding the "source Stream"
|
||||
*/
|
||||
|
||||
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (UConverter * _this,
|
||||
UChar ** target,
|
||||
const UChar * targetLimit,
|
||||
const char **source,
|
||||
const char *sourceLimit,
|
||||
int32_t* offsets,
|
||||
bool_t flush,
|
||||
UErrorCode * err);
|
||||
|
||||
|
||||
#endif/*UCNV_ERR_H*/
|
215
icu4c/source/common/unicode/udata.h
Normal file
215
icu4c/source/common/unicode/udata.h
Normal file
|
@ -0,0 +1,215 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* *
|
||||
* COPYRIGHT: *
|
||||
* (C) Copyright International Business Machines Corporation, 1999 *
|
||||
* Licensed Material - Program-Property of IBM - All Rights Reserved. *
|
||||
* US Government Users Restricted Rights - Use, duplication, or disclosure *
|
||||
* restricted by GSA ADP Schedule Contract with IBM Corp. *
|
||||
* *
|
||||
*******************************************************************************
|
||||
* file name: udata.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 1999oct25
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __UDATA_H__
|
||||
#define __UDATA_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* Information about data memory.
|
||||
* This structure may grow in the future, indicated by the
|
||||
* <code>size</code> field.
|
||||
*
|
||||
* <p>The platform data property fields help determine if a data
|
||||
* file can be efficiently used on a given machine.
|
||||
* The particular fields are of importance only if the data
|
||||
* is affected by the properties - if there is integer data
|
||||
* with word sizes > 1 byte, char* text, or UChar* text.</p>
|
||||
*
|
||||
* <p>The implementation for the <code>udata_open[Choice]()</code>
|
||||
* functions may reject data based on the value in <code>isBigEndian</code>.
|
||||
* No other field is used by the <code>udata</code> API implementation.</p>
|
||||
*
|
||||
* <p>The <code>dataFormat</code> may be used to identify
|
||||
* the kind of data, e.g. a converter table.</p>
|
||||
*
|
||||
* <p>The <code>formatVersion</code> field should be used to
|
||||
* make sure that the format can be interpreted.
|
||||
* I may be a good idea to check only for the one or two highest
|
||||
* of the version elements to allow the data memory to
|
||||
* get more or somewhat rearranged contents, for as long
|
||||
* as the using code can still interpret the older contents.</p>
|
||||
*
|
||||
* <p>The <code>dataVersion</code> field is intended to be a
|
||||
* common place to store the source version of the data;
|
||||
* for data from the Unicode character database, this could
|
||||
* reflect the Unicode version.</p>
|
||||
*/
|
||||
typedef struct {
|
||||
/** @memo sizeof(UDataInfo) */
|
||||
uint16_t size;
|
||||
|
||||
/** @memo unused, set to 0 */
|
||||
uint16_t reservedWord;
|
||||
|
||||
/* platform data properties */
|
||||
/** @memo 0 for little-endian machine, 1 for big-endian */
|
||||
uint8_t isBigEndian;
|
||||
|
||||
/** @memo see U_CHARSET_FAMILY values in utypes.h */
|
||||
uint8_t charsetFamily;
|
||||
|
||||
/** @memo sizeof(UChar), one of { 1, 2, 4 } */
|
||||
uint8_t sizeofUChar;
|
||||
|
||||
/** @memo unused, set to 0 */
|
||||
uint8_t reservedByte;
|
||||
|
||||
/** @memo data format identifier */
|
||||
uint8_t dataFormat[4];
|
||||
|
||||
/** @memo versions: [0] major [1] minor [2] milli [3] micro */
|
||||
uint8_t formatVersion[4];
|
||||
uint8_t dataVersion[4];
|
||||
} UDataInfo;
|
||||
|
||||
/* API for reading data -----------------------------------------------------*/
|
||||
|
||||
/**
|
||||
* Forward declaration of the data memory type.
|
||||
*/
|
||||
typedef struct UDataMemory UDataMemory;
|
||||
|
||||
/**
|
||||
* Callback function for udata_openChoice().
|
||||
* @param context parameter passed into <code>udata_openChoice()</code>.
|
||||
* @param type The type of the data as passed into <code>udata_openChoice()</code>.
|
||||
* It may be <code>NULL</code>.
|
||||
* @param name The name of the data as passed into <code>udata_openChoice()</code>.
|
||||
* @param pInfo A pointer to the <code>UDataInfo</code> structure
|
||||
* of data that has been loaded and will be returned
|
||||
* by <code>udata_openChoice()</code> if this function
|
||||
* returns <code>TRUE</code>.
|
||||
* @return TRUE if the current data memory is acceptable
|
||||
*/
|
||||
typedef bool_t
|
||||
UDataMemoryIsAcceptable(void *context,
|
||||
const char *type, const char *name,
|
||||
UDataInfo *pInfo);
|
||||
|
||||
|
||||
/**
|
||||
* Convenience function.
|
||||
* This function works the same as <code>udata_openChoice</code>
|
||||
* except that any data that matches the type and name
|
||||
* is assumed to be acceptable.
|
||||
*/
|
||||
U_CAPI UDataMemory * U_EXPORT2
|
||||
udata_open(const char *path, const char *type, const char *name,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Data loading function.
|
||||
* This function is used to find and load efficiently data for
|
||||
* ICU and applications using ICU.
|
||||
* It provides an abstract interface that allows to specify a data
|
||||
* type and name to find and load the data.
|
||||
*
|
||||
* <p>The implementation depends on platform properties and user preferences
|
||||
* and may involve loading shared libraries (DLLs), mapping
|
||||
* files into memory, or fopen()/fread() files.
|
||||
* It may also involve using static memory or database queries etc.
|
||||
* Several or all data items may be combined into one entity
|
||||
* (DLL, memory-mappable file).</p>
|
||||
*
|
||||
* <p>The data is always preceded by a header that includes
|
||||
* a <code>UDataInfo</code> structure.
|
||||
* The caller's <code>isAcceptable()</code> function is called to make
|
||||
* sure that the data is useful. It may be called several times if it
|
||||
* rejects the data and there is more than one location with data
|
||||
* matching the type and name.</p>
|
||||
*
|
||||
* <p>If <code>path==NULL</code>, then ICU data is loaded.
|
||||
* Otherwise, it is separated into a basename and a basename-less path string.
|
||||
* If the path string is empty, then <code>u_getDataDirectory()</code>
|
||||
* is set in its place.
|
||||
* When data is loaded from files or DLLs (shared libraries) and
|
||||
* may be stored in common files, then the data finding is roughly as follows:
|
||||
* <ul>
|
||||
* <li>common file at path/basename has entry name_type?</li>
|
||||
* <li>common file at basename has entry name_type?</li>
|
||||
* <li>separate file at path/basename_name_type?</li>
|
||||
* <li>separate file at basename_name_type?</li>
|
||||
* <li>separate file at path/name_type?</li>
|
||||
* <li>separate file at name_type?</li>
|
||||
* </ul>
|
||||
* If the basename is empty, then only the last two options are attempted.
|
||||
* Otherwise, it serves as a name for a common data file or as a basename
|
||||
* (collection name) prefix for individual files.</p>
|
||||
*
|
||||
* @param path Specifies an absolute path and/or a basename for the
|
||||
* finding of the data in the file system.
|
||||
* <code>NULL</code> for ICU data.
|
||||
* @param type A string that specifies the type of data to be loaded.
|
||||
* For example, resource bundles are loaded with type "res",
|
||||
* conversion tables with type "cnv".
|
||||
* This may be <code>NULL</code> or empty.
|
||||
* @param name A string that specifies the name of the data.
|
||||
* @param isAcceptable This function is called to verify that loaded data
|
||||
* is useful for the client code. If it returns FALSE
|
||||
* for all data items, then <code>udata_openChoice()</code>
|
||||
* will return with an error.
|
||||
* @param context Arbitrary parameter to be passed into isAcceptable.
|
||||
* @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
|
||||
* @return A pointer (handle) to a data memory object, or <code>NULL</code>
|
||||
* if an error occurs. Call <code>udata_getMemory()</code>
|
||||
* to get a pointer to the actual data.
|
||||
*/
|
||||
U_CAPI UDataMemory * U_EXPORT2
|
||||
udata_openChoice(const char *path, const char *type, const char *name,
|
||||
UDataMemoryIsAcceptable *isAcceptable, void *context,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/**
|
||||
* Close the data memory.
|
||||
* This function must be called to allow the system to
|
||||
* release resources associated with this data memory.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
udata_close(UDataMemory *pData);
|
||||
|
||||
/**
|
||||
* Get the pointer to the actual data inside the data memory.
|
||||
* The data is read-only.
|
||||
*/
|
||||
U_CAPI const void * U_EXPORT2
|
||||
udata_getMemory(UDataMemory *pData);
|
||||
|
||||
/**
|
||||
* Get the information from the data memory header.
|
||||
* This allows to get access to the header containing
|
||||
* platform data properties etc. which is not part of
|
||||
* the data itself and can therefore not be accessed
|
||||
* via the pointer that <code>udata_getMemory()</code> returns.
|
||||
*
|
||||
* @param pData pointer to the data memory object
|
||||
* @param pInfo pointer to a UDataInfo object;
|
||||
* its <code>size</code> field must be set correctly,
|
||||
* typically to <code>sizeof(UDataInfo)</code>.
|
||||
*
|
||||
* <code>*pInfo</code> will be filled with the UDataInfo structure
|
||||
* in the data memory object. If this structure is smaller than
|
||||
* <code>pInfo->size</code>, then the <code>size</code> will be
|
||||
* adjusted and only part of the structure will be filled.
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
udata_getInfo(UDataMemory *pData, UDataInfo *pInfo);
|
||||
|
||||
#endif
|
469
icu4c/source/common/unicode/uloc.h
Normal file
469
icu4c/source/common/unicode/uloc.h
Normal file
|
@ -0,0 +1,469 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1997-1999, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
* File ULOC.H
|
||||
*
|
||||
* Modification History:
|
||||
*
|
||||
* Date Name Description
|
||||
* 04/01/97 aliu Creation.
|
||||
* 08/22/98 stephen JDK 1.2 sync.
|
||||
* 12/08/98 rtg New C API for Locale
|
||||
* 03/30/99 damiba overhaul
|
||||
* 03/31/99 helena Javadoc for uloc functions.
|
||||
* 04/15/99 Madhu Updated Javadoc
|
||||
********************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef ULOC_H
|
||||
#define ULOC_H
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
*
|
||||
* A <code>Locale</code> represents a specific geographical, political,
|
||||
* or cultural region. An operation that requires a <code>Locale</code> to perform
|
||||
* its task is called <em>locale-sensitive</em> and uses the <code>Locale</code>
|
||||
* to tailor information for the user. For example, displaying a number
|
||||
* is a locale-sensitive operation--the number should be formatted
|
||||
* according to the customs/conventions of the user's native country,
|
||||
* region, or culture. In the C APIs, a locales is simply a const char string.
|
||||
*
|
||||
* <P>
|
||||
* You create a <code>Locale</code> with one of the three options listed below.
|
||||
* Each of the component is separated by '_' in the locale string.
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* . newLanguage
|
||||
* .
|
||||
* . newLanguage + newCountry
|
||||
* .
|
||||
* . newLanguage + newCountry + newVariant
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
* The first option is a valid <STRONG>ISO
|
||||
* Language Code.</STRONG> These codes are the lower-case two-letter
|
||||
* codes as defined by ISO-639.
|
||||
* You can find a full list of these codes at a number of sites, such as:
|
||||
* <BR><a href ="http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt">
|
||||
* <code>http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt</code></a>
|
||||
*
|
||||
* <P>
|
||||
* The second option includes an additonal <STRONG>ISO Country
|
||||
* Code.</STRONG> These codes are the upper-case two-letter codes
|
||||
* as defined by ISO-3166.
|
||||
* You can find a full list of these codes at a number of sites, such as:
|
||||
* <BR><a href="http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html">
|
||||
* <code>http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html</code></a>
|
||||
*
|
||||
* <P>
|
||||
* The third option requires another additonal information--the
|
||||
* <STRONG>Variant.</STRONG>
|
||||
* The Variant codes are vendor and browser-specific.
|
||||
* For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX.
|
||||
* Where there are two variants, separate them with an underscore, and
|
||||
* put the most important one first. For
|
||||
* example, a Traditional Spanish collation might be referenced, with
|
||||
* "ES", "ES", "Traditional_WIN".
|
||||
*
|
||||
* <P>
|
||||
* Because a <code>Locale</code> is just an identifier for a region,
|
||||
* no validity check is performed when you specify a <code>Locale</code>.
|
||||
* If you want to see whether particular resources are available for the
|
||||
* <code>Locale</code> you asked for, you must query those resources. For
|
||||
* example, ask the <code>UNumberFormat</code> for the locales it supports
|
||||
* using its <code>getAvailable</code> method.
|
||||
* <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular
|
||||
* locale, you get back the best available match, not necessarily
|
||||
* precisely what you asked for. For more information, look at
|
||||
* <a href="ures.html"><code>UResourceBundle</code></a>.
|
||||
*
|
||||
* <P>
|
||||
* The <code>Locale</code> provides a number of convenient constants
|
||||
* that you can use to specify the commonly used
|
||||
* locales. For example, the following refers to a locale
|
||||
* for the United States:
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* . ULOC_US
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
*
|
||||
* <P>
|
||||
* Once you've specified a locale you can query it for information about
|
||||
* itself. Use <code>uloc_getCountry</code> to get the ISO Country Code and
|
||||
* <code>uloc_getLanguage</code> to get the ISO Language Code. You can
|
||||
* use <code>uloc_getDisplayCountry</code> to get the
|
||||
* name of the country suitable for displaying to the user. Similarly,
|
||||
* you can use <code>uloc_getDisplayLanguage</code> to get the name of
|
||||
* the language suitable for displaying to the user. Interestingly,
|
||||
* the <code>uloc_getDisplayXXX</code> methods are themselves locale-sensitive
|
||||
* and have two versions: one that uses the default locale and one
|
||||
* that takes a locale as an argument and displays the name or country in
|
||||
* a language appropriate to that locale.
|
||||
*
|
||||
* <P>
|
||||
* The ICU provides a number of services that perform locale-sensitive
|
||||
* operations. For example, the <code>unum_xxx</code> functions format
|
||||
* numbers, currency, or percentages in a locale-sensitive manner.
|
||||
* </P>
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* . UErrorCode success = U_ZERO_ERROR;
|
||||
* . UNumberFormat *nf;
|
||||
* . const char* myLocale = "fr_FR";
|
||||
* .
|
||||
* . nf = unum_open( UNUM_DEFAULT, NULL, success );
|
||||
* . unum_close(nf);
|
||||
* . nf = unum_open( UNUM_CURRENCY, NULL, success );
|
||||
* . unum_close(nf);
|
||||
* . nf = unum_open( UNUM_PERCENT, NULL, success );
|
||||
* . unum_close(nf);
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
* Each of these methods has two variants; one with an explicit locale
|
||||
* and one without; the latter using the default locale.
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* .
|
||||
* . nf = unum_open( UNUM_DEFAULT, myLocale, success );
|
||||
* . unum_close(nf);
|
||||
* . nf = unum_open( UNUM_CURRENCY, myLocale, success );
|
||||
* . unum_close(nf);
|
||||
* . nf = unum_open( UNUM_PERCENT, myLocale, success );
|
||||
* . unum_close(nf);
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
* A <code>Locale</code> is the mechanism for identifying the kind of services
|
||||
* (<code>UNumberFormat</code>) that you would like to get. The locale is
|
||||
* <STRONG>just</STRONG> a mechanism for identifying these services.
|
||||
*
|
||||
* <P>
|
||||
* Each international serivce that performs locale-sensitive operations
|
||||
* allows you
|
||||
* to get all the available objects of that type. You can sift
|
||||
* through these objects by language, country, or variant,
|
||||
* and use the display names to present a menu to the user.
|
||||
* For example, you can create a menu of all the collation objects
|
||||
* suitable for a given language. Such classes implement these
|
||||
* three class methods:
|
||||
* <blockquote>
|
||||
* <pre>
|
||||
* . const char* uloc_getAvailable(int32_t index);
|
||||
* . int32_t uloc_countAvailable();
|
||||
* . int32_t
|
||||
* . uloc_getDisplayName(const char* localeID,
|
||||
* . const char* inLocaleID,
|
||||
* . UChar* result,
|
||||
* . int32_t maxResultSize,
|
||||
* . UErrorCode* err);
|
||||
* .
|
||||
* </pre>
|
||||
* </blockquote>
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* Useful constants for language.
|
||||
*/
|
||||
#define ULOC_ENGLISH "en"
|
||||
#define ULOC_FRENCH "fr"
|
||||
#define ULOC_GERMAN "de"
|
||||
#define ULOC_ITALIAN "it"
|
||||
#define ULOC_JAPANESE "ja"
|
||||
#define ULOC_KOREAN "ko"
|
||||
#define ULOC_CHINESE "zh"
|
||||
#define ULOC_SIMPLIFIED_CHINESE "zh_CN"
|
||||
#define ULOC_TRADITIONAL_CHINESE "zh_TW"
|
||||
|
||||
/*
|
||||
*
|
||||
* Useful constants for country.
|
||||
*/
|
||||
#define ULOC_FRANCE "fr_FR"
|
||||
#define ULOC_GERMANY "de_DE"
|
||||
#define ULOC_ITALY "it_IT"
|
||||
#define ULOC_JAPAN "ja_JP"
|
||||
#define ULOC_KOREA "ko_KR"
|
||||
#define ULOC_CHINA "zh_CN"
|
||||
#define ULOC_PRC "zh_CN"
|
||||
#define ULOC_TAIWAN "zh_TW"
|
||||
#define ULOC_UK "en_GB"
|
||||
#define ULOC_US "en_US"
|
||||
#define ULOC_CANADA "en_CA"
|
||||
#define ULOC_CANADA_FRENCH "fr_CA"
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Gets the system's default locale.
|
||||
*
|
||||
* @return the system default locale
|
||||
*/
|
||||
|
||||
U_CAPI const char* U_EXPORT2
|
||||
uloc_getDefault(void);
|
||||
|
||||
/**
|
||||
* Sets the system's default locale.
|
||||
*
|
||||
* @param localeID the new system default locale
|
||||
* @param status the error information if the setting of default locale fails
|
||||
*/
|
||||
U_CAPI void U_EXPORT2
|
||||
uloc_setDefault(const char* localeID,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Gets the language code for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the ISO langauge code with
|
||||
* @param language the langauge code for localeID
|
||||
* @param languageCapacity the size of the language buffer to store the
|
||||
* language code with
|
||||
* @param err error information if retrieving the language code failed
|
||||
* @return the actual buffer size needed for the langauge code. If it's greater
|
||||
* than languageCapacity, the returned language code will be truncated.
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getLanguage(const char* localeID,
|
||||
char* language,
|
||||
int32_t languageCapacity,
|
||||
UErrorCode* err);
|
||||
|
||||
/**
|
||||
* Gets the country code for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the country code with
|
||||
* @param country the country code for localeID
|
||||
* @param languageCapacity the size of the coutry buffer to store the
|
||||
* country code with
|
||||
* @param err error information if retrieving the country code failed
|
||||
* @return the actual buffer size needed for the country code. If it's greater
|
||||
* than countryCapacity, the returned country code will be truncated.
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getCountry(const char* localeID,
|
||||
char* country,
|
||||
int32_t countryCapacity,
|
||||
UErrorCode* err);
|
||||
|
||||
/**
|
||||
* Gets the variant code for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the variant code with
|
||||
* @param variant the variant code for localeID
|
||||
* @param variantCapacity the size of the variant buffer to store the
|
||||
* variant code with
|
||||
* @param err error information if retrieving the variant code failed
|
||||
* @return the actual buffer size needed for the variant code. If it's greater
|
||||
* than variantCapacity, the returned variant code will be truncated.
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getVariant(const char* localeID,
|
||||
char* variant,
|
||||
int32_t variantCapacity,
|
||||
UErrorCode* err);
|
||||
/**
|
||||
* Gets the full name for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the full name with
|
||||
* @param name the full name for localeID
|
||||
* @param nameCapacity the size of the name buffer to store the
|
||||
* full name with
|
||||
* @param err error information if retrieving the full name failed
|
||||
* @return the actual buffer size needed for the full name. If it's greater
|
||||
* than nameCapacity, the returned full name will be truncated.
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getName(const char* localeID,
|
||||
char* name,
|
||||
int32_t nameCapacity,
|
||||
UErrorCode* err);
|
||||
|
||||
/**
|
||||
* Gets the ISO language code for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the ISO langauge code with
|
||||
* @return language the ISO langauge code for localeID
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
uloc_getISO3Language(const char* localeID);
|
||||
|
||||
|
||||
/**
|
||||
* Gets the ISO country code for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the ISO country code with
|
||||
* @return country the ISO country code for localeID
|
||||
*/
|
||||
|
||||
U_CAPI const char* U_EXPORT2
|
||||
uloc_getISO3Country(const char* localeID);
|
||||
|
||||
/**
|
||||
* Gets the Win32 LCID value for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the Win32 LCID value with
|
||||
* @return country the Win32 LCID for localeID
|
||||
*/
|
||||
|
||||
U_CAPI uint32_t U_EXPORT2
|
||||
uloc_getLCID(const char* localeID);
|
||||
|
||||
/**
|
||||
* Gets the language name suitable for display for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the ISO langauge code with
|
||||
* @param language the displayable langauge code for localeID
|
||||
* @param languageCapacity the size of the language buffer to store the
|
||||
* displayable language code with
|
||||
* @param err error information if retrieving the displayable language code failed
|
||||
* @return the actual buffer size needed for the displayable langauge code. If it's greater
|
||||
* than languageCapacity, the returned language code will be truncated.
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getDisplayLanguage(const char* locale,
|
||||
const char* inLocale,
|
||||
UChar* language,
|
||||
int32_t languageCapacity,
|
||||
UErrorCode* status);
|
||||
|
||||
/**
|
||||
* Gets the country name suitable for display for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the displayable country code with
|
||||
* @param country the displayable country code for localeID
|
||||
* @param languageCapacity the size of the coutry buffer to store the
|
||||
* displayable country code with
|
||||
* @param err error information if retrieving the displayable country code failed
|
||||
* @return the actual buffer size needed for the displayable country code. If it's greater
|
||||
* than countryCapacity, the returned displayable country code will be truncated.
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getDisplayCountry(const char* locale,
|
||||
const char* inLocale,
|
||||
UChar* country,
|
||||
int32_t countryCapacity,
|
||||
UErrorCode* status); /* NULL may be used to specify the default */
|
||||
|
||||
|
||||
/**
|
||||
* Gets the variant code suitable for display for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the displayable variant code with
|
||||
* @param variant the displayable variant code for localeID
|
||||
* @param variantCapacity the size of the variant buffer to store the
|
||||
* displayable variant code with
|
||||
* @param err error information if retrieving the displayable variant code failed
|
||||
* @return the actual buffer size needed for the displayable variant code. If it's greater
|
||||
* than variantCapacity, the returned displayable variant code will be truncated.
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getDisplayVariant(const char* locale,
|
||||
const char* inLocale,
|
||||
UChar* variant,
|
||||
int32_t variantCapacity,
|
||||
UErrorCode* status); /* NULL may be used to specify the default */
|
||||
|
||||
/**
|
||||
* Gets the full name suitable for display for the specified locale.
|
||||
*
|
||||
* @param localeID the locale to get the displayable name with
|
||||
* @param variant the displayable name for localeID
|
||||
* @param variantCapacity the size of the name buffer to store the
|
||||
* displayable full name with
|
||||
* @param err error information if retrieving the displayable name failed
|
||||
* @return the actual buffer size needed for the displayable name. If it's greater
|
||||
* than variantCapacity, the returned displayable name will be truncated.
|
||||
*/
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
uloc_getDisplayName(const char* localeID,
|
||||
const char* inLocaleID, /* NULL may be used to specify the default */
|
||||
UChar* result,
|
||||
int32_t maxResultSize,
|
||||
UErrorCode* err);
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* Gets the specified locale from a list of all available locales.
|
||||
* The return value is a pointer to an item of
|
||||
* a locale name array. Both this array and the pointers
|
||||
* it contains are owned by ICU and should not be deleted or written through
|
||||
* by the caller. The locale name is terminated by a null pointer.
|
||||
* @param index the specific locale name index of the available locale list
|
||||
* @return a specified locale name of all available locales
|
||||
*/
|
||||
U_CAPI const char* U_EXPORT2
|
||||
uloc_getAvailable(int32_t index);
|
||||
|
||||
/**
|
||||
* Gets the size of the all available locale list.
|
||||
*
|
||||
* @return the size of the locale list
|
||||
*/
|
||||
U_CAPI int32_t U_EXPORT2 uloc_countAvailable(void);
|
||||
|
||||
/**
|
||||
*
|
||||
* Gets a list of all available language codes defined in ISO 639. This is a pointer
|
||||
* to an array of pointers to arrays of char. All of these pointers are owned
|
||||
* by ICU-- do not delete them, and do not write through them. The array is
|
||||
* terminated with a null pointer.
|
||||
* @return a list of all available language codes
|
||||
*/
|
||||
U_CAPI const char* const* U_EXPORT2
|
||||
uloc_getISOLanguages(void);
|
||||
|
||||
/**
|
||||
*
|
||||
* Gets a list of all available 2-letter country codes defined in ISO 639. This is a
|
||||
* pointer to an array of pointers to arrays of char. All of these pointers are
|
||||
* owned by ICU-- do not delete them, and do not write through them. The array is
|
||||
* terminated with a null pointer.
|
||||
* @return a list of all available country codes
|
||||
*/
|
||||
U_CAPI const char* const* U_EXPORT2
|
||||
uloc_getISOCountries(void);
|
||||
|
||||
/**
|
||||
* Deprecated 1999dec14 - Gets the directory containing the locale data files.
|
||||
*
|
||||
* @return the locale data file directory
|
||||
*/
|
||||
#define uloc_getDataDirectory u_getDataDirectory
|
||||
|
||||
/**
|
||||
* Deprecated 1999dec14 - Sets the directory containing the locale data files.
|
||||
*
|
||||
* @return the new directory to fetch locale data from
|
||||
*/
|
||||
#define uloc_setDataDirectory u_setDataDirectory
|
||||
|
||||
/*Internal function */
|
||||
int32_t U_EXPORT2
|
||||
uloc_getParent(const char* localeID,
|
||||
char* parent,
|
||||
int32_t parentCapacity,
|
||||
UErrorCode* err);
|
||||
|
||||
/*eof*/
|
||||
|
||||
|
||||
#endif /*_ULOC*/
|
||||
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue