ICU-12 all public include files are now in unicode dir, all private icu_ functions renamed to uprv_

X-SVN-Rev: 472
This commit is contained in:
Vladimir Weinstein 1999-12-28 23:39:02 +00:00
parent 4414ab71fa
commit 8e21f86812
132 changed files with 12021 additions and 11991 deletions

View file

@ -18,4 +18,4 @@
#endif
// provide an object for the implementations of the member functions of BiDi
#include "bidi.h"
#include "unicode/bidi.h"

View file

@ -1,255 +1,2 @@
/*
*******************************************************************************
*
* Copyright (C) 1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: ubidi.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999sep15
* created by: Markus W. Scherer
*/
#error Please include unicode/bidi.h instead
#ifndef BIDI_H
#define BIDI_H
#include "utypes.h"
#include "ubidi.h"
#ifndef XP_CPLUSPLUS
# error This is a C++ header file.
#endif
/**
* BiDi is a C++ wrapper class for UBiDi.
* You need one BiDi object in place of one UBiDi object.
* For details on the API and implementation of the
* Unicode BiDi algorithm, see ubidi.h.
*
* @see UBiDi
*/
class U_COMMON_API BiDi {
public:
/** @memo Default constructor, calls ubidi_open(). */
BiDi();
/** @memo Constructor, calls ubidi_open(). */
BiDi(UErrorCode &rErrorCode);
/** @memo Preallocating constructor, calls ubidi_openSized(). */
BiDi(UTextOffset maxLength, UTextOffset maxRunCount, UErrorCode &rErrorCode);
/** @memo Destructor, calls ubidi_close(). */
~BiDi();
/** @memo Set this object for one paragraph's text. */
BiDi &
setPara(const UChar *text, UTextOffset length,
UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
UErrorCode &rErrorCode);
/** @memo Set this object for one line of the paragraph object's text. */
BiDi &
setLine(const BiDi &rParaBiDi,
UTextOffset start, UTextOffset limit,
UErrorCode &rErrorCode);
/** @memo Get the directionality of the text. */
UBiDiDirection
getDirection() const;
/** @memo Get the length of the text. */
UTextOffset
getLength() const;
/** @memo Get the paragraph level of the text. */
UBiDiLevel
getParaLevel() const;
/** @memo Get the level for one character. */
UBiDiLevel
getLevelAt(UTextOffset charIndex) const;
/** @memo Get an array of levels for each character. */
const UBiDiLevel *
getLevels(UErrorCode &rErrorCode);
/** @memo Get a logical run. */
void
getLogicalRun(UTextOffset logicalStart,
UTextOffset &rLogicalLimit, UBiDiLevel &rLevel) const;
/** @memo Get the number of runs. */
UTextOffset
countRuns(UErrorCode &rErrorCode);
/**
* @memo Get one run's logical start, length, and directionality,
* which can be 0 for LTR or 1 for RTL.
*/
UBiDiDirection
getVisualRun(UTextOffset runIndex, UTextOffset &rLogicalStart, UTextOffset &rLength);
/** @memo Get the visual position from a logical text position. */
UTextOffset
getVisualIndex(UTextOffset logicalIndex, UErrorCode &rErrorCode);
/** @memo Get the logical text position from a visual position. */
UTextOffset
getLogicalIndex(UTextOffset visualIndex, UErrorCode &rErrorCode);
/**
* @memo Get a logical-to-visual index map (array) for the characters in the UBiDi
* (paragraph or line) object.
*/
void
getLogicalMap(UTextOffset *indexMap, UErrorCode &rErrorCode);
/**
* @memo Get a visual-to-logical index map (array) for the characters in the UBiDi
* (paragraph or line) object.
*/
void
getVisualMap(UTextOffset *indexMap, UErrorCode &rErrorCode);
/** @memo Same as ubidi_reorderLogical(). */
static void
reorderLogical(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap);
/** @memo Same as ubidi_reorderVisual(). */
static void
reorderVisual(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap);
/** @memo Same as ubidi_invertMap(). */
static void
invertMap(const UTextOffset *srcMap, UTextOffset *destMap, UTextOffset length);
protected:
UBiDi *pBiDi;
};
/* Inline implementations. -------------------------------------------------- */
inline BiDi::BiDi() {
pBiDi=ubidi_open();
}
inline BiDi::BiDi(UErrorCode &rErrorCode) {
if(U_SUCCESS(rErrorCode)) {
pBiDi=ubidi_open();
if(pBiDi==0) {
rErrorCode=U_MEMORY_ALLOCATION_ERROR;
}
} else {
pBiDi=0;
}
}
inline BiDi::BiDi(UTextOffset maxLength, UTextOffset maxRunCount, UErrorCode &rErrorCode) {
pBiDi=ubidi_openSized(maxLength, maxRunCount, &rErrorCode);
}
inline BiDi::~BiDi() {
ubidi_close(pBiDi);
pBiDi=0;
}
inline BiDi &
BiDi::setPara(const UChar *text, UTextOffset length,
UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
UErrorCode &rErrorCode) {
ubidi_setPara(pBiDi, text, length, paraLevel, embeddingLevels, &rErrorCode);
return *this;
}
inline BiDi &
BiDi::setLine(const BiDi &rParaBiDi,
UTextOffset start, UTextOffset limit,
UErrorCode &rErrorCode) {
ubidi_setLine(rParaBiDi.pBiDi, start, limit, pBiDi, &rErrorCode);
return *this;
}
inline UBiDiDirection
BiDi::getDirection() const {
return ubidi_getDirection(pBiDi);
}
inline UTextOffset
BiDi::getLength() const {
return ubidi_getLength(pBiDi);
}
inline UBiDiLevel
BiDi::getParaLevel() const {
return ubidi_getParaLevel(pBiDi);
}
inline UBiDiLevel
BiDi::getLevelAt(UTextOffset charIndex) const {
return ubidi_getLevelAt(pBiDi, charIndex);
}
inline const UBiDiLevel *
BiDi::getLevels(UErrorCode &rErrorCode) {
return ubidi_getLevels(pBiDi, &rErrorCode);
}
inline void
BiDi::getLogicalRun(UTextOffset logicalStart,
UTextOffset &rLogicalLimit, UBiDiLevel &rLevel) const {
ubidi_getLogicalRun(pBiDi, logicalStart, &rLogicalLimit, &rLevel);
}
inline UTextOffset
BiDi::countRuns(UErrorCode &rErrorCode) {
return ubidi_countRuns(pBiDi, &rErrorCode);
}
inline UBiDiDirection
BiDi::getVisualRun(UTextOffset runIndex, UTextOffset &rLogicalStart, UTextOffset &rLength) {
return ubidi_getVisualRun(pBiDi, runIndex, &rLogicalStart, &rLength);
}
inline UTextOffset
BiDi::getVisualIndex(UTextOffset logicalIndex, UErrorCode &rErrorCode) {
return ubidi_getVisualIndex(pBiDi, logicalIndex, &rErrorCode);
}
inline UTextOffset
BiDi::getLogicalIndex(UTextOffset visualIndex, UErrorCode &rErrorCode) {
return ubidi_getLogicalIndex(pBiDi, visualIndex, &rErrorCode);
}
inline void
BiDi::getLogicalMap(UTextOffset *indexMap, UErrorCode &rErrorCode) {
ubidi_getLogicalMap(pBiDi, indexMap, &rErrorCode);
}
inline void
BiDi::getVisualMap(UTextOffset *indexMap, UErrorCode &rErrorCode) {
ubidi_getVisualMap(pBiDi, indexMap, &rErrorCode);
}
inline void
BiDi::reorderLogical(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap) {
ubidi_reorderLogical(levels, length, indexMap);
}
inline void
BiDi::reorderVisual(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap) {
ubidi_reorderVisual(levels, length, indexMap);
}
inline void
BiDi::invertMap(const UTextOffset *srcMap, UTextOffset *destMap, UTextOffset length) {
ubidi_invertMap(srcMap, destMap, length);
}
#endif

View file

@ -6,7 +6,7 @@
**********************************************************************
*/
#include "chariter.h"
#include "unicode/chariter.h"
const UChar CharacterIterator::DONE = 0xffff;

View file

@ -1,194 +1 @@
/*
********************************************************************
*
* Copyright (C) 1997-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
********************************************************************
*/
#ifndef CHARITER_H
#define CHARITER_H
#include "utypes.h"
#include "unistr.h"
/**
* Abstract class defining a protcol for accessing characters in a text-storage object.
<P>Examples:<P>
Function processing characters, in this example simple output
<pre>
. void processChar( UChar c )
. {
. cout &lt;&lt; " " &lt;&lt; c;
. }
</pre>
Traverse the text from start to finish
<pre>
. void traverseForward(CharacterIterator& iter)
. {
. for(UChar c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
. processChar(c);
. }
. }
</pre>
Traverse the text backwards, from end to start
<pre>
. void traverseBackward(CharacterIterator& iter)
. {
. for(UChar c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
. processChar(c);
. }
. }
</pre>
Traverse both forward and backward from a given position in the text.
Calls to notBoundary() in this example represents some additional stopping criteria.
<pre>
. void traverseOut(CharacterIterator& iter, UTextOffset pos)
. {
. UChar c;
. for (c = iter.setIndex(pos);
. c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
. c = iter.next()) {}
. UTextOffset end = iter.getIndex();
. for (c = iter.setIndex(pos);
. c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
. c = iter.previous()) {}
. UTextOffset start = iter.getIndex() + 1;
.
. cout &lt;&lt; "start: " &lt;&lt; start &lt;&lt; " end: " &lt;&lt; end &lt;&lt; endl;
. for (c = iter.setIndex(start); iter.getIndex() &lt; end; c = iter.next() ) {
. processChar(c);
. }
. }
</pre>
Creating a StringCharacterIteratorand calling the test functions
<pre>
. void CharacterIterator_Example( void )
. {
. cout &lt;&lt; endl &lt;&lt; "===== CharacterIterator_Example: =====" &lt;&lt; endl;
. UnicodeString text("Ein kleiner Satz.");
. StringCharacterIterator iterator(text);
. cout &lt;&lt; "----- traverseForward: -----------" &lt;&lt; endl;
. traverseForward( iterator );
. cout &lt;&lt; endl &lt;&lt; endl &lt;&lt; "----- traverseBackward: ----------" &lt;&lt; endl;
. traverseBackward( iterator );
. cout &lt;&lt; endl &lt;&lt; endl &lt;&lt; "----- traverseOut: ---------------" &lt;&lt; endl;
. traverseOut( iterator, 7 );
. cout &lt;&lt; endl &lt;&lt; endl &lt;&lt; "-----" &lt;&lt; endl;
. }
</pre>
*/
class U_COMMON_API CharacterIterator
{
public:
/**
* Value returned by most of CharacterIterator's functions
* when the iterator has reached the limits of its iteration. */
static const UChar DONE;
/**
* Destructor. */
virtual ~CharacterIterator();
/**
* Returns true when both iterators refer to the same
* character in the same character-storage object. */
virtual bool_t operator==(const CharacterIterator& that) const = 0;
/**
* Returns true when the iterators refer to different
* text-storage objects, or to different characters in the
* same text-storage object. */
bool_t operator!=(const CharacterIterator& that) const { return !operator==(that); }
/**
* Returns a pointer to a new CharacterIterator of the same
* concrete class as this one, and referring to the same
* character in the same text-storage object as this one. The
* caller is responsible for deleting the new clone. */
virtual CharacterIterator*
clone(void) const = 0;
/**
* Generates a hash code for this iterator. */
virtual int32_t hashCode(void) const = 0;
/**
* Sets the iterator to refer to the first character in its
* iteration range, and returns that character, */
virtual UChar first(void) = 0;
/**
* Sets the iterator to refer to the last character in its
* iteration range, and returns that character. */
virtual UChar last(void) = 0;
/**
* Sets the iterator to refer to the "position"-th character
* in the text-storage object the iterator refers to, and
* returns that character. */
virtual UChar setIndex(UTextOffset position) = 0;
/**
* Returns the character the iterator currently refers to. */
virtual UChar current(void) const = 0;
/**
* Advances to the next character in the iteration range
* (toward last()), and returns that character. If there are
* no more characters to return, returns DONE. */
virtual UChar next(void) = 0;
/**
* Advances to the previous character in the iteration rance
* (toward first()), and returns that character. If there are
* no more characters to return, returns DONE. */
virtual UChar previous(void) = 0;
/**
* Returns the numeric index in the underlying text-storage
* object of the character returned by first(). Since it's
* possible to create an iterator that iterates across only
* part of a text-storage object, this number isn't
* necessarily 0. */
virtual UTextOffset startIndex(void) const = 0;
/**
* Returns the numeric index in the underlying text-storage
* object of the position immediately BEYOND the character
* returned by last(). */
virtual UTextOffset endIndex(void) const = 0;
/**
* Returns the numeric index in the underlying text-storage
* object of the character the iterator currently refers to
* (i.e., the character returned by current()). */
virtual UTextOffset getIndex(void) const = 0;
/**
* Copies the text under iteration into the UnicodeString
* referred to by "result". @param result Receives a copy of
* the text under iteration. */
virtual void getText(UnicodeString& result) = 0;
/**
* Returns a UClassID for this CharacterIterator ("poor man's
* RTTI").<P> Despite the fact that this function is public,
* DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API! */
virtual UClassID getDynamicClassID(void) const = 0;
protected:
CharacterIterator() {}
CharacterIterator(const CharacterIterator&) {}
CharacterIterator& operator=(const CharacterIterator&) { return *this; }
};
#endif
#error Please include unicode/chariter.h instead

View file

@ -27,12 +27,12 @@
#include <stdlib.h>
#include <string.h>
#define icu_malloc(size) malloc(size)
#define icu_realloc(buffer, size) realloc(buffer, size)
#define icu_free(buffer) free(buffer)
#define icu_memcpy(dst, src, size) memcpy(dst, src, size)
#define icu_memmove(dst, src, size) memmove(dst, src, size)
#define icu_memset(buffer, mark, size) memset(buffer, mark, size)
#define icu_memcmp(buffer1, buffer2, size) memcmp(buffer1, buffer2,size)
#define uprv_malloc(size) malloc(size)
#define uprv_realloc(buffer, size) realloc(buffer, size)
#define uprv_free(buffer) free(buffer)
#define uprv_memcpy(dst, src, size) memcpy(dst, src, size)
#define uprv_memmove(dst, src, size) memmove(dst, src, size)
#define uprv_memset(buffer, mark, size) memset(buffer, mark, size)
#define uprv_memcmp(buffer1, buffer2, size) memcmp(buffer1, buffer2,size)
#endif

View file

@ -277,25 +277,25 @@ SOURCE=.\uvector.cpp
# PROP Default_Filter "h;hpp;hxx;hm;inl"
# Begin Source File
SOURCE=.\bidi.h
SOURCE=.\unicode\bidi.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\bidi.h
InputPath=.\unicode\bidi.h
"..\..\include\bidi.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy bidi.h ..\..\include
"..\..\include\unicode\bidi.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\bidi.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\bidi.h
InputPath=.\unicode\bidi.h
"..\..\include\bidi.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy bidi.h ..\..\include
"..\..\include\unicode\bidi.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\bidi.h ..\..\include\unicode
# End Custom Build
@ -304,25 +304,25 @@ InputPath=.\bidi.h
# End Source File
# Begin Source File
SOURCE=.\chariter.h
SOURCE=.\unicode\chariter.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\chariter.h
InputPath=.\unicode\chariter.h
"..\..\include\chariter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy chariter.h ..\..\include
"..\..\include\unicode\chariter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\chariter.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\chariter.h
InputPath=.\unicode\chariter.h
"..\..\include\chariter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy chariter.h ..\..\include
"..\..\include\unicode\chariter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\chariter.h ..\..\include\unicode
# End Custom Build
@ -343,25 +343,25 @@ SOURCE=.\compitr.h
# End Source File
# Begin Source File
SOURCE=.\convert.h
SOURCE=.\unicode\convert.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\convert.h
InputPath=.\unicode\convert.h
"..\..\include\convert.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy convert.h ..\..\include
"..\..\include\unicode\convert.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\convert.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\convert.h
InputPath=.\unicode\convert.h
"..\..\include\convert.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy convert.h ..\..\include
"..\..\include\unicode\convert.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\convert.h ..\..\include\unicode
# End Custom Build
@ -390,25 +390,25 @@ SOURCE=.\filestrm.h
# End Source File
# Begin Source File
SOURCE=.\locid.h
SOURCE=.\unicode\locid.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\locid.h
InputPath=.\unicode\locid.h
"..\..\include\locid.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy locid.h ..\..\include
"..\..\include\unicode\locid.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\locid.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\locid.h
InputPath=.\unicode\locid.h
"..\..\include\locid.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy locid.h ..\..\include
"..\..\include\unicode\locid.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\locid.h ..\..\include\unicode
# End Custom Build
@ -425,25 +425,25 @@ SOURCE=.\mutex.h
# End Source File
# Begin Source File
SOURCE=.\normlzr.h
SOURCE=.\unicode\normlzr.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\normlzr.h
InputPath=.\unicode\normlzr.h
"..\..\include\normlzr.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy normlzr.h ..\..\include
"..\..\include\unicode\normlzr.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\normlzr.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\normlzr.h
InputPath=.\unicode\normlzr.h
"..\..\include\normlzr.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy normlzr.h ..\..\include
"..\..\include\unicode\normlzr.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\normlzr.h ..\..\include\unicode
# End Custom Build
@ -452,25 +452,25 @@ InputPath=.\normlzr.h
# End Source File
# Begin Source File
SOURCE=.\putil.h
SOURCE=.\unicode\putil.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\putil.h
InputPath=.\unicode\putil.h
"..\..\include\putil.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy putil.h ..\..\include
"..\..\include\unicode\putil.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\putil.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\putil.h
InputPath=.\unicode\putil.h
"..\..\include\putil.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy putil.h ..\..\include
"..\..\include\unicode\putil.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\putil.h ..\..\include\unicode
# End Custom Build
@ -479,25 +479,25 @@ InputPath=.\putil.h
# End Source File
# Begin Source File
SOURCE=.\pwin32.h
SOURCE=.\unicode\pwin32.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\pwin32.h
InputPath=.\unicode\pwin32.h
"..\..\include\pwin32.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy pwin32.h ..\..\include
"..\..\include\unicode\pwin32.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\pwin32.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\pwin32.h
InputPath=.\unicode\pwin32.h
"..\..\include\pwin32.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy pwin32.h ..\..\include
"..\..\include\unicode\pwin32.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\pwin32.h ..\..\include\unicode
# End Custom Build
@ -518,25 +518,25 @@ SOURCE=.\rbread.h
# End Source File
# Begin Source File
SOURCE=.\rep.h
SOURCE=.\unicode\rep.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\rep.h
InputPath=.\unicode\rep.h
"..\..\include\rep.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy rep.h ..\..\include
"..\..\include\unicode\rep.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\rep.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\rep.h
InputPath=.\unicode\rep.h
"..\..\include\rep.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy rep.h ..\..\include
"..\..\include\unicode\rep.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\rep.h ..\..\include\unicode
# End Custom Build
@ -545,25 +545,25 @@ InputPath=.\rep.h
# End Source File
# Begin Source File
SOURCE=.\resbund.h
SOURCE=.\unicode\resbund.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\resbund.h
InputPath=.\unicode\resbund.h
"..\..\include\resbund.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy resbund.h ..\..\include
"..\..\include\unicode\resbund.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\resbund.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\resbund.h
InputPath=.\unicode\resbund.h
"..\..\include\resbund.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy resbund.h ..\..\include
"..\..\include\unicode\resbund.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\resbund.h ..\..\include\unicode
# End Custom Build
@ -572,25 +572,25 @@ InputPath=.\resbund.h
# End Source File
# Begin Source File
SOURCE=.\schriter.h
SOURCE=.\unicode\schriter.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\schriter.h
InputPath=.\unicode\schriter.h
"..\..\include\schriter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy schriter.h ..\..\include
"..\..\include\unicode\schriter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\schriter.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\schriter.h
InputPath=.\unicode\schriter.h
"..\..\include\schriter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy schriter.h ..\..\include
"..\..\include\unicode\schriter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\schriter.h ..\..\include\unicode
# End Custom Build
@ -599,25 +599,25 @@ InputPath=.\schriter.h
# End Source File
# Begin Source File
SOURCE=.\scsu.h
SOURCE=.\unicode\scsu.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\scsu.h
InputPath=.\unicode\scsu.h
"..\..\include\scsu.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy scsu.h ..\..\include
"..\..\include\unicode\scsu.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\scsu.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\scsu.h
InputPath=.\unicode\scsu.h
"..\..\include\scsu.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy scsu.h ..\..\include
"..\..\include\unicode\scsu.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\scsu.h ..\..\include\unicode
# End Custom Build
@ -626,25 +626,25 @@ InputPath=.\scsu.h
# End Source File
# Begin Source File
SOURCE=.\ubidi.h
SOURCE=.\unicode\ubidi.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\ubidi.h
InputPath=.\unicode\ubidi.h
"..\..\include\ubidi.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy ubidi.h ..\..\include
"..\..\include\unicode\ubidi.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\ubidi.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\ubidi.h
InputPath=.\unicode\ubidi.h
"..\..\include\ubidi.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy ubidi.h ..\..\include
"..\..\include\unicode\ubidi.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\ubidi.h ..\..\include\unicode
# End Custom Build
@ -657,25 +657,25 @@ SOURCE=.\ubidiimp.h
# End Source File
# Begin Source File
SOURCE=.\uchar.h
SOURCE=.\unicode\uchar.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\uchar.h
InputPath=.\unicode\uchar.h
"..\..\include\uchar.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy uchar.h ..\..\include
"..\..\include\unicode\uchar.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\uchar.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\uchar.h
InputPath=.\unicode\uchar.h
"..\..\include\uchar.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy uchar.h ..\..\include
"..\..\include\unicode\uchar.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\uchar.h ..\..\include\unicode
# End Custom Build
@ -684,25 +684,25 @@ InputPath=.\uchar.h
# End Source File
# Begin Source File
SOURCE=.\uchriter.h
SOURCE=.\unicode\uchriter.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\uchriter.h
InputPath=.\unicode\uchriter.h
"..\..\include\uchriter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy uchriter.h ..\..\include
"..\..\include\unicode\uchriter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\uchriter.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\uchriter.h
InputPath=.\unicode\uchriter.h
"..\..\include\uchriter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy uchriter.h ..\..\include
"..\..\include\unicode\uchriter.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\uchriter.h ..\..\include\unicode
# End Custom Build
@ -723,25 +723,25 @@ SOURCE=.\ucmp8.h
# End Source File
# Begin Source File
SOURCE=.\ucnv.h
SOURCE=.\unicode\ucnv.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\ucnv.h
InputPath=.\unicode\ucnv.h
"..\..\include\ucnv.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy ucnv.h ..\..\include
"..\..\include\unicode\ucnv.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\ucnv.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\ucnv.h
InputPath=.\unicode\ucnv.h
"..\..\include\ucnv.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy ucnv.h ..\..\include
"..\..\include\unicode\ucnv.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\ucnv.h ..\..\include\unicode
# End Custom Build
@ -750,25 +750,25 @@ InputPath=.\ucnv.h
# End Source File
# Begin Source File
SOURCE=.\ucnv_bld.h
SOURCE=.\unicode\ucnv_bld.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\ucnv_bld.h
InputPath=.\unicode\ucnv_bld.h
"..\..\include\ucnv_bld.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy ucnv_bld.h ..\..\include
"..\..\include\unicode\ucnv_bld.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\ucnv_bld.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\ucnv_bld.h
InputPath=.\unicode\ucnv_bld.h
"..\..\include\ucnv_bld.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy ucnv_bld.h ..\..\include
"..\..\include\unicode\ucnv_bld.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\ucnv_bld.h ..\..\include\unicode
# End Custom Build
@ -781,25 +781,25 @@ SOURCE=.\ucnv_cnv.h
# End Source File
# Begin Source File
SOURCE=.\ucnv_err.h
SOURCE=.\unicode\ucnv_err.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\ucnv_err.h
InputPath=.\unicode\ucnv_err.h
"..\..\include\ucnv_err.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy ucnv_err.h ..\..\include
"..\..\include\unicode\ucnv_err.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\ucnv_err.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\ucnv_err.h
InputPath=.\unicode\ucnv_err.h
"..\..\include\ucnv_err.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy ucnv_err.h ..\..\include
"..\..\include\unicode\ucnv_err.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\ucnv_err.h ..\..\include\unicode
# End Custom Build
@ -816,25 +816,25 @@ SOURCE=.\ucnv_io.h
# End Source File
# Begin Source File
SOURCE=.\udata.h
SOURCE=.\unicode\udata.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\udata.h
InputPath=.\unicode\udata.h
"..\..\include\udata.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy udata.h ..\..\include
"..\..\include\unicode\udata.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\udata.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\udata.h
InputPath=.\unicode\udata.h
"..\..\include\udata.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy udata.h ..\..\include
"..\..\include\unicode\udata.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\udata.h ..\..\include\unicode
# End Custom Build
@ -847,25 +847,25 @@ SOURCE=.\uhash.h
# End Source File
# Begin Source File
SOURCE=.\uloc.h
SOURCE=.\unicode\uloc.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\uloc.h
InputPath=.\unicode\uloc.h
"..\..\include\uloc.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy uloc.h ..\..\include
"..\..\include\unicode\uloc.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\uloc.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\uloc.h
InputPath=.\unicode\uloc.h
"..\..\include\uloc.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy uloc.h ..\..\include
"..\..\include\unicode\uloc.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\uloc.h ..\..\include\unicode
# End Custom Build
@ -874,16 +874,16 @@ InputPath=.\uloc.h
# End Source File
# Begin Source File
SOURCE=.\umisc.h
SOURCE=.\unicode\umisc.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\umisc.h
InputPath=.\unicode\umisc.h
InputName=umisc
"..\..\include\$(InputName).h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputName).h ..\..\include
"..\..\include\unicode\$(InputName).h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\$(InputName).h ..\..\include\unicode
echo $(InputName)
# End Custom Build
@ -891,11 +891,11 @@ InputName=umisc
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\umisc.h
InputPath=.\unicode\umisc.h
InputName=umisc
"..\..\include\$(InputName).h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy $(InputName).h ..\..\include
"..\..\include\unicode\$(InputName).h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\$(InputName).h ..\..\include\unicode
echo $(InputName)
# End Custom Build
@ -909,25 +909,25 @@ SOURCE=.\umutex.h
# End Source File
# Begin Source File
SOURCE=.\unicode.h
SOURCE=.\unicode\unicode.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\unicode.h
InputPath=.\unicode\unicode.h
"..\..\include\unicode.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode.h ..\..\include
"..\..\include\unicode\unicode.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\unicode.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\unicode.h
InputPath=.\unicode\unicode.h
"..\..\include\unicode.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode.h ..\..\include
"..\..\include\unicode\unicode.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\unicode.h ..\..\include\unicode
# End Custom Build
@ -936,25 +936,25 @@ InputPath=.\unicode.h
# End Source File
# Begin Source File
SOURCE=.\unistr.h
SOURCE=.\unicode\unistr.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\unistr.h
InputPath=.\unicode\unistr.h
"..\..\include\unistr.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unistr.h ..\..\include
"..\..\include\unicode\unistr.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\unistr.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\unistr.h
InputPath=.\unicode\unistr.h
"..\..\include\unistr.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unistr.h ..\..\include
"..\..\include\unicode\unistr.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\unistr.h ..\..\include\unicode
# End Custom Build
@ -967,25 +967,25 @@ SOURCE=.\unistrm.h
# End Source File
# Begin Source File
SOURCE=.\ures.h
SOURCE=.\unicode\ures.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\ures.h
InputPath=.\unicode\ures.h
"..\..\include\ures.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy ures.h ..\..\include
"..\..\include\unicode\ures.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\ures.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\ures.h
InputPath=.\unicode\ures.h
"..\..\include\ures.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy ures.h ..\..\include
"..\..\include\unicode\ures.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\ures.h ..\..\include\unicode
# End Custom Build
@ -998,25 +998,25 @@ SOURCE=.\uresdata.h
# End Source File
# Begin Source File
SOURCE=.\ustring.h
SOURCE=.\unicode\ustring.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\ustring.h
InputPath=.\unicode\ustring.h
"..\..\include\ustring.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy ustring.h ..\..\include
"..\..\include\unicode\ustring.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\ustring.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\ustring.h
InputPath=.\unicode\ustring.h
"..\..\include\ustring.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy ustring.h ..\..\include
"..\..\include\unicode\ustring.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\ustring.h ..\..\include\unicode
# End Custom Build
@ -1025,25 +1025,25 @@ InputPath=.\ustring.h
# End Source File
# Begin Source File
SOURCE=.\utypes.h
SOURCE=.\unicode\utypes.h
!IF "$(CFG)" == "common - Win32 Release"
# Begin Custom Build
InputPath=.\utypes.h
InputPath=.\unicode\utypes.h
"..\..\include\utypes.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy utypes.h ..\..\include
"..\..\include\unicode\utypes.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\utypes.h ..\..\include\unicode
# End Custom Build
!ELSEIF "$(CFG)" == "common - Win32 Debug"
# Begin Custom Build
InputPath=.\utypes.h
InputPath=.\unicode\utypes.h
"..\..\include\utypes.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy utypes.h ..\..\include
"..\..\include\unicode\utypes.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
copy unicode\utypes.h ..\..\include\unicode
# End Custom Build

View file

@ -4,7 +4,7 @@
*/
#include "utypes.h"
#include "unicode/utypes.h"
#include "ucmp8.h"
#include "ucmp16.h"

View file

@ -9,7 +9,7 @@
#include "compitr.h"
#include "normlzr.h"
#include "unicode/normlzr.h"
/**
* Constant that indicates the iteration has completed.

View file

@ -11,8 +11,8 @@
#define COMPITR_H
#include "utypes.h"
#include "unistr.h"
#include "unicode/utypes.h"
#include "unicode/unistr.h"
/**

View file

@ -10,16 +10,16 @@ class Locale;
class UnicodeString;
class Mutex;
#include "utypes.h"
#include "resbund.h"
#include "unicode/utypes.h"
#include "unicode/resbund.h"
#include "cmemory.h"
#include "mutex.h"
extern "C" {
#include "ucnv_io.h"
#include "ucnv_bld.h"
#include "ucnv.h"
#include "unicode/ucnv_bld.h"
#include "unicode/ucnv.h"
}
#include "convert.h"
#include "unicode/convert.h"
/* list of converter and alias names */
const char **UnicodeConverterCPP::availableConverterNames=NULL;
@ -75,11 +75,11 @@ bool_t UnicodeConverterCPP::operator==(const UnicodeConverterCPP& that) const
(myUnicodeConverter->fromCharErrorBehaviour == that.myUnicodeConverter->fromCharErrorBehaviour) &&
(myUnicodeConverter->toUnicodeStatus == that.myUnicodeConverter->toUnicodeStatus) &&
(myUnicodeConverter->subCharLen == that.myUnicodeConverter->subCharLen) &&
(icu_memcmp(myUnicodeConverter->subChar, that.myUnicodeConverter->subChar, myUnicodeConverter->subCharLen) == 0) &&
(uprv_memcmp(myUnicodeConverter->subChar, that.myUnicodeConverter->subChar, myUnicodeConverter->subCharLen) == 0) &&
(myUnicodeConverter->UCharErrorBufferLength == that.myUnicodeConverter->UCharErrorBufferLength) &&
(myUnicodeConverter->charErrorBufferLength == that.myUnicodeConverter->charErrorBufferLength) &&
(icu_memcmp(myUnicodeConverter->UCharErrorBuffer, that.myUnicodeConverter->UCharErrorBuffer, myUnicodeConverter->UCharErrorBufferLength) == 0) &&
(icu_memcmp(myUnicodeConverter->charErrorBuffer, that.myUnicodeConverter->charErrorBuffer, myUnicodeConverter->charErrorBufferLength) == 0) &&
(uprv_memcmp(myUnicodeConverter->UCharErrorBuffer, that.myUnicodeConverter->UCharErrorBuffer, myUnicodeConverter->UCharErrorBufferLength) == 0) &&
(uprv_memcmp(myUnicodeConverter->charErrorBuffer, that.myUnicodeConverter->charErrorBuffer, myUnicodeConverter->charErrorBufferLength) == 0) &&
(myUnicodeConverter->fromUCharErrorBehaviour == that.myUnicodeConverter->fromUCharErrorBehaviour))
return TRUE;
else return FALSE;
@ -180,7 +180,7 @@ UnicodeConverterCPP::toUnicodeString(UnicodeString& target,
*on a "normal" call, only one iteration will be necessary.
*/
myTargetUChars =
(UChar*)icu_malloc(sizeof(UChar)*(myTargetUCharsLength = (sourceSize/(int32_t)getMinBytesPerChar())));
(UChar*)uprv_malloc(sizeof(UChar)*(myTargetUCharsLength = (sourceSize/(int32_t)getMinBytesPerChar())));
if (myTargetUChars == NULL)
{
@ -216,7 +216,7 @@ UnicodeConverterCPP::toUnicodeString(UnicodeString& target,
} while (err == U_INDEX_OUTOFBOUNDS_ERROR);
icu_free(myTargetUChars);
uprv_free(myTargetUChars);
return;
}

View file

@ -1,325 +1 @@
/*******************************************************************************
*
* Copyright (C) 1998-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************/
#ifndef CONVERT_H
#define CONVERT_H
#include "unistr.h"
#include "ucnv.h"
class U_COMMON_API UnicodeConverterCPP
{
private:
/*Internal Data representation of the Converter*/
UConverter* myUnicodeConverter;
/*Debug method*/
void printRef(void) const;
/* list of converter and alias names */
static const char **availableConverterNames;
static int32_t availableConverterNamesCount;
public:
//Constructors and a destructor
/**
* Creates Unicode Conversion Object will default to LATIN1 <-> encoding
* @return An object Handle if successful or a NULL if the creation failed
*/
UnicodeConverterCPP();
/**
* Creates Unicode Conversion Object by specifying the codepage name. The name
* string is in ASCII format.
* @param code_set the pointer to a char[] object containing a codepage name. (I)
* @param UErrorCode Error status (I/O) IILLEGAL_ARGUMENT_ERROR will be returned if the string is empty.
* If the internal program does not work correctly, for example, if there's no such codepage,
* U_INTERNAL_PROGRAM_ERROR will be returned.
* @return An object Handle if successful or a NULL if the creation failed
*/
UnicodeConverterCPP(const char* name,
UErrorCode& err);
/**
*Creates a UnicodeConverter object with the names specified as unicode strings. The name should be limited to
*the ASCII-7 alphanumerics. Dash and underscore characters are allowed for readability, but are ignored in the
*search.
*@param code_set name of the uconv table in Unicode string (I)
*@param err error status (I/O) IILLEGAL_ARGUMENT_ERROR will be returned if the string is empty. If the internal
*program does not work correctly, for example, if there's no such codepage, U_INTERNAL_PROGRAM_ERROR will be
*returned.
*@return the created Unicode converter object
*/
UnicodeConverterCPP(const UnicodeString& name,
UErrorCode& err);
/**
* Creates Unicode Conversion Object using the codepage ID number.
* @param code_set a codepage # (I)
* @UErrorCode Error status (I/O) IILLEGAL_ARGUMENT_ERROR will be returned if the string is empty.
* If the internal program does not work correctly, for example, if there's no such codepage,
* U_INTERNAL_PROGRAM_ERROR will be returned.
* @return An object Handle if successful or a NULL if failed
*
*/
UnicodeConverterCPP(int32_t codepageNumber,
UConverterPlatform platform,
UErrorCode& err);
~UnicodeConverterCPP();
/**
* Transcodes the source UnicodeString to the target string in a codepage encoding
* with the specified Unicode converter. For example, if a Unicode to/from JIS
* converter is specified, the source string in Unicode will be transcoded to JIS
* encoding. The result will be stored in JIS encoding.
*
* @param source the source Unicode string
* @param target the target string in codepage encoding
* @param targetSize Input the number of bytes available in the "target" buffer, Output the number of bytes copied to it
* @param err the error status code. U_MEMORY_ALLOCATION_ERROR will be returned if the
* the internal process buffer cannot be allocated for transcoding. U_ILLEGAL_ARGUMENT_ERROR
* is returned if the converter is null or the source or target string is empty.
*/
void fromUnicodeString(char* target,
int32_t& targetSize,
const UnicodeString& source,
UErrorCode& err) const;
/**
* Transcode the source string in codepage encoding to the target string in
* Unicode encoding. For example, if a Unicode to/from JIS
* converter is specified, the source string in JIS encoding will be transcoded
* to Unicode encoding. The result will be stored in Unicode encoding.
* @param source the source string in codepage encoding
* @param target the target string in Unicode encoding
* @param targetSize : I/O parameter, Input size buffer, Output # of bytes copied to it
* @param err the error status code U_MEMORY_ALLOCATION_ERROR will be returned if the
* the internal process buffer cannot be allocated for transcoding. U_ILLEGAL_ARGUMENT_ERROR
* is returned if the converter is null or the source or target string is empty.
*/
void toUnicodeString(UnicodeString& target,
const char* source,
int32_t sourceSize,
UErrorCode& err) const;
/**
* Transcodes an array of unicode characters to an array of codepage characters.
* The source pointer is an I/O parameter, it starts out pointing at the place
* to begin translating, and ends up pointing after the first sequence of the bytes
* that it encounters that are semantically invalid.
* if T_UnicodeConverter_setMissingCharAction is called with an action other than STOP
* before a call is made to this API, consumed and source should point to the same place
* (unless target ends with an imcomplete sequence of bytes and flush is FALSE).
* @param target : I/O parameter. Input : Points to the beginning of the buffer to copy
* codepage characters to. Output : points to after the last codepage character copied
* to target.
* @param targetLimit the pointer to the end of the target array
* @param source the source Unicode character array
* @param sourceLimit the pointer to the end of the source array
* @param flush TRUE if the buffer is the last buffer and the conversion will finish
* in this call, FALSE otherwise. (future feature pending)
* @param UErrorCode the error status. U_ILLEGAL_ARGUMENT_ERROR will be returned if the
* converter is null.
*/
void fromUnicode(char*& target,
const char* targetLimit,
const UChar*& source,
const UChar* sourceLimit,
int32_t * offsets,
bool_t flush,
UErrorCode& err);
/**
* Converts an array of codepage characters into an array of unicode characters.
* The source pointer is an I/O parameter, it starts out pointing at the place
* to begin translating, and ends up pointing after the first sequence of the bytes
* that it encounters that are semantically invalid.
* if T_UnicodeConverter_setMissingUnicodeAction is called with an action other than STOP
* before a call is made to this API, consumed and source should point to the same place
* (unless target ends with an imcomplete sequence of bytes and flush is FALSE).
* @param target : I/O parameter. Input : Points to the beginning of the buffer to copy
* Unicode characters to. Output : points to after the last UChar copied to target.
* @param targetLimit the pointer to the end of the target array
* @param source the source codepage character array
* @param sourceLimit the pointer to the end of the source array
* @param flush TRUE if the buffer is the last buffer and the conversion will finish
* in this call, FALSE otherwise. (future feature pending)
* @param err the error code status U_ILLEGAL_ARGUMENT_ERROR will be returned if the
* converter is null, targetLimit < target, sourceLimit < source
*/
void toUnicode(UChar*& target,
const UChar* targetLimit,
const char*& source,
const char* sourceLimit,
int32_t * offsets,
bool_t flush,
UErrorCode& err);
/*
* Returns the maximum length of bytes used by a character. This varies between 1 and 4
* @return the max number of bytes per codepage character * converter is null, targetLimit < target, sourceLimit < source
*/
int8_t getMaxBytesPerChar(void) const;
/**
* Returns the minimum byte length for characters in this codepage. This is either
* 1 or 2 for all supported codepages.
* @return the minimum number of byte per codepage character
*/
int8_t getMinBytesPerChar(void) const;
/**
*Gets the type of conversion associated with the converter
* e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022, EBCDIC_STATEFUL, LATIN_1
* @return the type of the converter
*/
UConverterType getType(void) const;
/**
*Gets the "starter" bytes for the converters of type MBCS
*will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in
*is not MBCS.
*fills in an array of boolean, with the value of the byte as offset to the array.
*At return, if TRUE is found in at offset 0x20, it means that the byte 0x20 is a starter byte
*in this converter.
* @param starters: an array of size 256 to be filled in
* @param err: an array of size 256 to be filled in
* @see ucnv_getType
*/
void getStarters(bool_t starters[256],
UErrorCode& err) const;
/**
* Fills in the output parameter, subChars, with the substitution characters
* as multiple bytes.
* @param subChars the subsitution characters
* @param len the number of bytes of the substitution character array
* @param err the error status code. U_ILLEGAL_ARGUMENT_ERROR will be returned if
* the converter is null. If the substitution character array is too small, an
* U_INDEX_OUTOFBOUNDS_ERROR will be returned.
*/
void getSubstitutionChars(char* subChars,
int8_t& len,
UErrorCode& err) const;
/**
* Sets the substitution chars when converting from unicode to a codepage. The
* substitution is specified as a string of 1-4 bytes, and may contain null byte.
* The fill-in parameter err will get the error status on return.
* @param cstr the substitution character array to be set with
* @param len the number of bytes of the substitution character array and upon return will contain the
* number of bytes copied to that buffer
* @param err the error status code. U_ILLEGAL_ARGUMENT_ERROR if the converter is
* null. or if the number of bytes provided are not in the codepage's range (e.g length 1 for ucs-2)
*/
void setSubstitutionChars(const char* subChars,
int8_t len,
UErrorCode& err);
/**
* Resets the state of stateful conversion to the default state. This is used
* in the case of error to restart a conversion from a known default state.
*/
void resetState(void);
/**
* Gets the name of the converter (zero-terminated).
* the name will be the internal name of the converter
* @param converter the Unicode converter
* @param err the error status code. U_INDEX_OUTOFBOUNDS_ERROR in the converterNameLen is too
* small to contain the name.
*/
const char* getName( UErrorCode& err) const;
/**
* Gets a codepage number associated with the converter. This is not guaranteed
* to be the one used to create the converter. Some converters do not represent
* IBM registered codepages and return zero for the codepage number.
* The error code fill-in parameter indicates if the codepage number is available.
* @param err the error status code. U_ILLEGAL_ARGUMENT_ERROR will returned if
* the converter is null or if converter's data table is null.
* @return If any error occurrs, null will be returned.
*/
int32_t getCodepage(UErrorCode& err) const;
/**
* Returns the current setting action taken when a character from a codepage
* is missing. (Currently STOP or SUBSTITUTE).
* @return the action constant when a Unicode character cannot be converted to a
* codepage equivalent
*/
UConverterToUCallback getMissingCharAction(void) const;
/**
* Return the current setting action taken when a unicode character is missing.
* (Currently STOP or SUBSTITUTE).
* @return the action constant when a codepage character cannot be converted to a
* Unicode eqivalent
*/
UConverterFromUCallback getMissingUnicodeAction(void) const;
/**
* Sets the current setting action taken when a character from a codepage is
* missing. (Currently STOP or SUBSTITUTE).
* @param action the action constant if an equivalent codepage character is missing
*/
void setMissingCharAction(UConverterToUCallback action,
UErrorCode& err);
/**
* Sets the current setting action taken when a unicode character is missing.
* (currently T_UnicodeConverter_MissingUnicodeAction is either STOP or SUBSTITUTE,
* SKIP, CLOSEST_MATCH, ESCAPE_SEQ may be added in the future).
* @param action the action constant if an equivalent Unicode character is missing
* @param err the error status code
*/
void setMissingUnicodeAction(UConverterFromUCallback action,
UErrorCode& err);
/**
* Returns the localized name of the UnicodeConverter, if for any reason it is
* available, the internal name will be returned instead.
* @param displayLocale the valid Locale, from which we want to localize
* @param displayString a UnicodeString that is going to be filled in.
*/
void getDisplayName(const Locale& displayLocale,
UnicodeString& displayName) const;
/**
* Returns the T_UnicodeConverter_platform (ICU defined enum) of a UnicodeConverter
* available, the internal name will be returned instead.
* @param err the error code status
* @return the codepages platform
*/
UConverterPlatform getCodepagePlatform(UErrorCode& err) const;
UnicodeConverterCPP& operator=(const UnicodeConverterCPP& that);
bool_t operator==(const UnicodeConverterCPP& that) const;
bool_t operator!=(const UnicodeConverterCPP& that) const;
UnicodeConverterCPP(const UnicodeConverterCPP& that);
/**
* Returns the available names. Lazy evaluated, Library owns the storage
* @param num the number of available converters
* @param err the error code status
* @return the name array
*/
static const char* const* getAvailableNames(int32_t& num,
UErrorCode& err);
/**
* Iterates through every cached converter and frees all the unused ones
* @return the number of cached converters successfully deleted
*/
static int32_t flushCache(void);
};
#endif
#error Please include unicode/convert.h instead

View file

@ -1,7 +1,7 @@
#define EXTENDED_FUNCTIONALITY
#include "cpputils.h"
#include "cstring.h"
#include "ustring.h"
#include "unicode/ustring.h"
/**********************************************************************
* Copyright (C) 1999, International Business Machines
@ -21,7 +21,7 @@ void T_fillOutputParams(const UnicodeString* temp,
const int32_t actual = temp->length();
const bool_t overflowed = actual >= resultLength;
const int32_t returnedSize = icu_min(actual, resultLength-1);
const int32_t returnedSize = uprv_min(actual, resultLength-1);
if ((temp->length() < resultLength) && (result != temp->getUChars()) && (returnedSize > 0)) {
u_strcpy(result, temp->getUChars());
}

View file

@ -14,49 +14,49 @@
#ifndef CPPUTILS_H
#define CPPUTILS_H
#include "utypes.h"
#include "unicode/utypes.h"
#ifdef XP_CPLUSPLUS
#include "cmemory.h"
#include "unistr.h"
#include "unicode/unistr.h"
/*===========================================================================*/
/* Array copy utility functions */
/*===========================================================================*/
inline void icu_arrayCopy(const double* src, double* dst, int32_t count)
{ icu_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
inline void uprv_arrayCopy(const double* src, double* dst, int32_t count)
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
inline void icu_arrayCopy(const double* src, int32_t srcStart,
inline void uprv_arrayCopy(const double* src, int32_t srcStart,
double* dst, int32_t dstStart, int32_t count)
{ icu_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
inline void icu_arrayCopy(const int8_t* src, int8_t* dst, int32_t count)
{ icu_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
inline void uprv_arrayCopy(const int8_t* src, int8_t* dst, int32_t count)
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
inline void icu_arrayCopy(const int8_t* src, int32_t srcStart,
inline void uprv_arrayCopy(const int8_t* src, int32_t srcStart,
int8_t* dst, int32_t dstStart, int32_t count)
{ icu_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
inline void icu_arrayCopy(const int16_t* src, int16_t* dst, int32_t count)
{ icu_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
inline void uprv_arrayCopy(const int16_t* src, int16_t* dst, int32_t count)
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
inline void icu_arrayCopy(const int16_t* src, int32_t srcStart,
inline void uprv_arrayCopy(const int16_t* src, int32_t srcStart,
int16_t* dst, int32_t dstStart, int32_t count)
{ icu_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
inline void icu_arrayCopy(const int32_t* src, int32_t* dst, int32_t count)
{ icu_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
inline void uprv_arrayCopy(const int32_t* src, int32_t* dst, int32_t count)
{ uprv_memcpy(dst, src, (size_t)(count * sizeof(*src))); }
inline void icu_arrayCopy(const int32_t* src, int32_t srcStart,
inline void uprv_arrayCopy(const int32_t* src, int32_t srcStart,
int32_t* dst, int32_t dstStart, int32_t count)
{ icu_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
inline void
icu_arrayCopy(const UChar *src, int32_t srcStart,
uprv_arrayCopy(const UChar *src, int32_t srcStart,
UChar *dst, int32_t dstStart, int32_t count)
{ icu_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)(count * sizeof(*src))); }
/******************************************************
* Simple utility to set output buffer parameters

View file

@ -28,8 +28,8 @@
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "utypes.h"
#include "putil.h"
#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "cstring.h"
char*
@ -121,7 +121,7 @@ T_CString_stricmp(const char *str1, const char *str2) {
return 1;
} else {
/* compare non-zero characters with lowercase */
rc=(int)(unsigned char)icu_tolower(c1)-(int)(unsigned char)icu_tolower(c2);
rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
if(rc!=0) {
return rc;
}

View file

@ -28,38 +28,38 @@
#include <string.h>
#include <ctype.h>
#include "utypes.h"
#include "unicode/utypes.h"
#define icu_strcpy(dst, src) strcpy(dst, src)
#define icu_strcpyWithSize(dst, src, size) strncpy(dst, src, size)
#define icu_strncpy(dst, src, size) strncpy(dst, src, size)
#define icu_strlen(str) strlen(str)
#define icu_strcmp(s1, s2) strcmp(s1, s2)
#define icu_strncmp(s1, s2, n) strncmp(s1, s2, n)
#define icu_strcat(dst, src) strcat(dst, src)
#define icu_strncat(dst, src, n) strncat(dst, src, n)
#define icu_strchr(s, c) strchr(s, c)
#define icu_strstr(s, c) strstr(s, c)
#define icu_strrchr(s, c) strrchr(s, c)
#define icu_toupper(c) toupper(c)
#define icu_tolower(c) tolower(c)
#define icu_strtoul(str, end, base) strtoul(str, end, base)
#define uprv_strcpy(dst, src) strcpy(dst, src)
#define uprv_strcpyWithSize(dst, src, size) strncpy(dst, src, size)
#define uprv_strncpy(dst, src, size) strncpy(dst, src, size)
#define uprv_strlen(str) strlen(str)
#define uprv_strcmp(s1, s2) strcmp(s1, s2)
#define uprv_strncmp(s1, s2, n) strncmp(s1, s2, n)
#define uprv_strcat(dst, src) strcat(dst, src)
#define uprv_strncat(dst, src, n) strncat(dst, src, n)
#define uprv_strchr(s, c) strchr(s, c)
#define uprv_strstr(s, c) strstr(s, c)
#define uprv_strrchr(s, c) strrchr(s, c)
#define uprv_toupper(c) toupper(c)
#define uprv_tolower(c) tolower(c)
#define uprv_strtoul(str, end, base) strtoul(str, end, base)
#ifdef WIN32
# define icu_stricmp(str1, str2) _stricmp(str1, str2)
# define uprv_stricmp(str1, str2) _stricmp(str1, str2)
#elif defined(POSIX)
# define icu_stricmp(str1, str2) strcasecmp(str1, str2)
# define uprv_stricmp(str1, str2) strcasecmp(str1, str2)
#else
# define icu_stricmp(str1, str2) T_CString_stricmp(str1, str2)
# define uprv_stricmp(str1, str2) T_CString_stricmp(str1, str2)
#endif
/*===========================================================================*/
/* Wide-character functions */
/*===========================================================================*/
#define icu_wcscat(dst, src) wcscat(dst, src)
#define icu_wcscpy(dst, src) wcscpy(dst, src)
#define icu_wcslen(src) wcslen(src)
#define icu_wcstombs(mbstr, wcstr, count) wcstombs(mbstr, wcstr, count)
#define icu_mbstowcs(wcstr, mbstr, count) mbstowcs(wcstr, mbstr, count)
#define uprv_wcscat(dst, src) wcscat(dst, src)
#define uprv_wcscpy(dst, src) wcscpy(dst, src)
#define uprv_wcslen(src) wcslen(src)
#define uprv_wcstombs(mbstr, wcstr, count) wcstombs(mbstr, wcstr, count)
#define uprv_mbstowcs(wcstr, mbstr, count) mbstowcs(wcstr, mbstr, count)
U_CAPI char* U_EXPORT2
T_CString_toLowerCase(char* str);

View file

@ -4,7 +4,7 @@
*/
#include "utypes.h"
#include "unicode/utypes.h"
#include "ucmp8.h"
#include "ucmp16.h"

View file

@ -24,7 +24,7 @@
#ifndef DIGITLST_H
#define DIGITLST_H
#include "utypes.h"
#include "unicode/utypes.h"
#include <float.h>
// Decimal digits in a 32-bit int

View file

@ -57,18 +57,18 @@ T_FileStream_wopen(const wchar_t* filename, const wchar_t* mode)
/* convert from wchar_t to char */
fnMbsSize = wcstombs(NULL, filename, ((size_t)-1) >> 1);
fn = (char*)icu_malloc(fnMbsSize+2);
fn = (char*)uprv_malloc(fnMbsSize+2);
wcstombs(fn, filename, fnMbsSize);
fn[fnMbsSize] = 0;
mdMbsSize = wcstombs(NULL, mode, ((size_t)-1) >> 1);
md = (char*)icu_malloc(mdMbsSize+2);
md = (char*)uprv_malloc(mdMbsSize+2);
wcstombs(md, mode, mdMbsSize);
md[mdMbsSize] = 0;
result = fopen(fn, md);
icu_free(fn);
icu_free(md);
uprv_free(fn);
uprv_free(md);
return (FileStream*)result;
#endif
}

View file

@ -26,7 +26,7 @@
#define FILESTRM_H
#ifndef _UTYPES
#include "utypes.h"
#include "unicode/utypes.h"
#endif
#include <wchar.h>

View file

@ -30,11 +30,11 @@
#include "uhash.h"
#include "locid.h"
#include "uloc.h"
#include "resbund.h"
#include "unicode/locid.h"
#include "unicode/uloc.h"
#include "unicode/resbund.h"
#include "mutex.h"
#include "unicode.h"
#include "unicode/unicode.h"
#include "cmemory.h"
#include "cstring.h"
@ -212,17 +212,17 @@ Locale::Locale(const Locale& other)
{
int j;
/*Copy the language and country fields*/
icu_strcpy(language, other.language);
icu_strcpy(country, other.country);
uprv_strcpy(language, other.language);
uprv_strcpy(country, other.country);
/*make fullName point to the heap if necessary*/
if ((j=icu_strlen(other.fullName)) > ULOC_FULLNAME_CAPACITY)
if ((j=uprv_strlen(other.fullName)) > ULOC_FULLNAME_CAPACITY)
{
fullName = new char[j+1];
}
else fullName = fullNameBuffer;
icu_strcpy(fullName, other.fullName);
uprv_strcpy(fullName, other.fullName);
/*Make the variant point to the same offset as the copied*/
variant = fullName + (other.variant - other.fullName) ;
@ -232,11 +232,11 @@ Locale::Locale(const Locale& other)
bool_t
Locale::operator==( const Locale& other) const
{
if (icu_strcmp(other.language, language) == 0)
if (uprv_strcmp(other.language, language) == 0)
{
if (icu_strcmp(other.country, country) == 0)
if (uprv_strcmp(other.country, country) == 0)
{
if (icu_strcmp(other.variant, variant) == 0) return TRUE;
if (uprv_strcmp(other.variant, variant) == 0) return TRUE;
}
}
@ -265,13 +265,13 @@ Locale& Locale::init(const char* localeID)
/*Go to heap for the fullName if necessary*/
int j;
if ((j=icu_strlen(localeID)) > ULOC_FULLNAME_CAPACITY)
if ((j=uprv_strlen(localeID)) > ULOC_FULLNAME_CAPACITY)
{
this->fullName = new char[j+1];
}
else this->fullName = this->fullNameBuffer;
icu_strcpy(this->fullName, localeID);
uprv_strcpy(this->fullName, localeID);
/*Setting up the variant:
-point to the zero terminator of fullName if there is none
@ -293,17 +293,17 @@ Locale& Locale::init(const char* localeID)
Locale& Locale::operator=(const Locale& other)
{
icu_strcpy(language, other.language);
icu_strcpy(country, other.country);
uprv_strcpy(language, other.language);
uprv_strcpy(country, other.country);
if (other.fullName == other.fullNameBuffer) fullName = fullNameBuffer;
else
{
/*In case the assigner has some of its data on the heap
* we need to do the same*/
if (fullName != fullNameBuffer) delete []fullName;
fullName = new char[(icu_strlen(other.fullName)+1)];
fullName = new char[(uprv_strlen(other.fullName)+1)];
}
icu_strcpy(fullName, other.fullName);
uprv_strcpy(fullName, other.fullName);
/*Make the variant point to the same offset as the assigner*/
variant = fullName + (other.variant - other.fullName) ;

View file

@ -1,570 +1 @@
/*
*****************************************************************************************
*
* Copyright (C) 1996-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*****************************************************************************************
*
* File locid.h
*
* Created by: Helena Shih
*
* Modification History:
*
* Date Name Description
* 02/11/97 aliu Changed gLocPath to fgLocPath and added methods to
* get and set it.
* 04/02/97 aliu Made operator!= inline; fixed return value of getName().
* 04/15/97 aliu Cleanup for AIX/Win32.
* 04/24/97 aliu Numerous changes per code review.
* 08/18/98 stephen Added tokenizeString(),changed getDisplayName()
* 09/08/98 stephen Moved definition of kEmptyString for Mac Port
* 11/09/99 weiv Added const char * getName() const;
*****************************************************************************************
*/
#ifndef LOCID_H
#define LOCID_H
#include "unistr.h"
typedef struct ULocale ULocale;
typedef struct UHashtable UHashtable;
#define ULOC_LANG_CAPACITY 3
#define ULOC_COUNTRY_CAPACITY 3
#define ULOC_FULLNAME_CAPACITY 50
/**
*
* A <code>Locale</code> object represents a specific geographical, political,
* or cultural region. An operation that requires a <code>Locale</code> to perform
* its task is called <em>locale-sensitive</em> and uses the <code>Locale</code>
* to tailor information for the user. For example, displaying a number
* is a locale-sensitive operation--the number should be formatted
* according to the customs/conventions of the user's native country,
* region, or culture.
*
* <P>
* You create a <code>Locale</code> object using one of the three constructors in
* this class:
* <blockquote>
* <pre>
* . Locale( const UnicodeString& newLanguage);
* .
* . Locale( const UnicodeString& language,
* . const UnicodeString& country);
* .
* . Locale( const UnicodeString& language,
* . const UnicodeString& country,
* . const UnicodeString& variant);
* </pre>
* </blockquote>
* The first argument to the constructors is a valid <STRONG>ISO
* Language Code.</STRONG> These codes are the lower-case two-letter
* codes as defined by ISO-639.
* You can find a full list of these codes at a number of sites, such as:
* <BR><a href ="http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt">
* <code>http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt</code></a>
*
* <P>
* The second argument to the constructors is a valid <STRONG>ISO Country
* Code.</STRONG> These codes are the upper-case two-letter codes
* as defined by ISO-3166.
* You can find a full list of these codes at a number of sites, such as:
* <BR><a href="http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html">
* <code>http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html</code></a>
*
* <P>
* The third constructor requires a third argument--the <STRONG>Variant.</STRONG>
* The Variant codes are vendor and browser-specific.
* For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX.
* Where there are two variants, separate them with an underscore, and
* put the most important one first. For
* example, a Traditional Spanish collation might be referenced, with
* "ES", "ES", "Traditional_WIN".
*
* <P>
* Because a <code>Locale</code> object is just an identifier for a region,
* no validity check is performed when you construct a <code>Locale</code>.
* If you want to see whether particular resources are available for the
* <code>Locale</code> you construct, you must query those resources. For
* example, ask the <code>NumberFormat</code> for the locales it supports
* using its <code>getAvailableLocales</code> method.
* <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular
* locale, you get back the best available match, not necessarily
* precisely what you asked for. For more information, look at
* <a href="java.util.ResourceBundle.html"><code>ResourceBundle</code></a>.
*
* <P>
* The <code>Locale</code> class provides a number of convenient constants
* that you can use to create <code>Locale</code> objects for commonly used
* locales. For example, the following refers to a <code>Locale</code> object
* for the United States:
* <blockquote>
* <pre>
* . Locale::US
* </pre>
* </blockquote>
*
* <P>
* Once you've created a <code>Locale</code> you can query it for information about
* itself. Use <code>getCountry</code> to get the ISO Country Code and
* <code>getLanguage</code> to get the ISO Language Code. You can
* use <code>getDisplayCountry</code> to get the
* name of the country suitable for displaying to the user. Similarly,
* you can use <code>getDisplayLanguage</code> to get the name of
* the language suitable for displaying to the user. Interestingly,
* the <code>getDisplayXXX</code> methods are themselves locale-sensitive
* and have two versions: one that uses the default locale and one
* that takes a locale as an argument and displays the name or country in
* a language appropriate to that locale.
*
* <P>
* The TIFC provides a number of classes that perform locale-sensitive
* operations. For example, the <code>NumberFormat</code> class formats
* numbers, currency, or percentages in a locale-sensitive manner. Classes
* such as <code>NumberFormat</code> have a number of convenience methods
* for creating a default object of that type. For example, the
* <code>NumberFormat</code> class provides these three convenience methods
* for creating a default <code>NumberFormat</code> object:
* <blockquote>
* <pre>
* . UErrorCode success = U_ZERO_ERROR;
* . Locale myLocale;
* . NumberFormat *nf;
* .
* . nf = NumberFormat::createInstance( success ); delete nf;
* . nf = NumberFormat::createCurrencyInstance( success ); delete nf;
* . nf = NumberFormat::createPercentInstance( success ); delete nf;
* </pre>
* </blockquote>
* Each of these methods has two variants; one with an explicit locale
* and one without; the latter using the default locale.
* <blockquote>
* <pre>
* . nf = NumberFormat::createInstance( myLocale, success ); delete nf;
* . nf = NumberFormat::createCurrencyInstance( myLocale, success ); delete nf;
* . nf = NumberFormat::createPercentInstance( myLocale, success ); delete nf;
* </pre>
* </blockquote>
* A <code>Locale</code> is the mechanism for identifying the kind of object
* (<code>NumberFormat</code>) that you would like to get. The locale is
* <STRONG>just</STRONG> a mechanism for identifying objects,
* <STRONG>not</STRONG> a container for the objects themselves.
*
* <P>
* Each class that performs locale-sensitive operations allows you
* to get all the available objects of that type. You can sift
* through these objects by language, country, or variant,
* and use the display names to present a menu to the user.
* For example, you can create a menu of all the collation objects
* suitable for a given language. Such classes implement these
* three class methods:
* <blockquote>
* <pre>
* . static Locale* getAvailableLocales(int32_t& numLocales)
* . static UnicodeString& getDisplayName(const Locale& objectLocale,
* . const Locale& displayLocale,
* . UnicodeString& displayName)
* . static UnicodeString& getDisplayName(const Locale& objectLocale,
* . UnicodeString& displayName)
* </pre>
* </blockquote>
*/
class U_COMMON_API Locale
{
public:
/**
* Useful constants for language.
*/
static const Locale ENGLISH;
static const Locale FRENCH;
static const Locale GERMAN;
static const Locale ITALIAN;
static const Locale JAPANESE;
static const Locale KOREAN;
static const Locale CHINESE;
static const Locale SIMPLIFIED_CHINESE;
static const Locale TRADITIONAL_CHINESE;
/**
* Useful constants for country.
*/
static const Locale FRANCE;
static const Locale GERMANY;
static const Locale ITALY;
static const Locale JAPAN;
static const Locale KOREA;
static const Locale CHINA; // Alias for PRC
static const Locale PRC; // Peoples Republic of China
static const Locale TAIWAN; // Republic of China
static const Locale UK;
static const Locale US;
static const Locale CANADA;
static const Locale CANADA_FRENCH;
/**
* Construct an empty locale. It's only used when a fill-in parameter is
* needed.
*/
Locale();
/**
* Construct a locale from language, country, variant.
*
* @param language Lowercase two-letter ISO-639 code.
* @param country Uppercase two-letter ISO-3166 code. (optional)
* @param variant Uppercase vendor and browser specific code. See class
* description. (optional)
*/
Locale( const UnicodeString& language,
const UnicodeString& country ,
const UnicodeString& variant );
Locale( const UnicodeString& language,
const UnicodeString& country );
Locale( const UnicodeString& language);
/**
* Initializes a Locale object from another Locale object.
*
* @param other The Locale object being copied in.
*/
Locale(const Locale& other);
/**
* Destructor
*/
~Locale() ;
/**
* Replaces the entire contents of *this with the specified value.
*
* @param other The Locale object being copied in.
* @return *this
*/
Locale& operator=(const Locale& other);
/**
* Checks if two locale keys are the same.
*
* @param other The locale key object to be compared with this.
* @return True if the two locale keys are the same, false otherwise.
*/
bool_t operator==(const Locale& other) const;
/**
* Checks if two locale keys are not the same.
*
* @param other The locale key object to be compared with this.
* @return True if the two locale keys are not the same, false
* otherwise.
*/
bool_t operator!=(const Locale& other) const;
/**
* Common methods of getting the current default Locale. Used for the
* presentation: menus, dialogs, etc. Generally set once when your applet or
* application is initialized, then never reset. (If you do reset the
* default locale, you probably want to reload your GUI, so that the change
* is reflected in your interface.)
*
* More advanced programs will allow users to use different locales for
* different fields, e.g. in a spreadsheet.
*
* Note that the initial setting will match the host system.
*/
static Locale& getDefault(void);
/**
* Sets the default. Normally set once at the beginning of applet or
* application, then never reset. setDefault does NOT reset the host locale.
*
* @param newLocale Locale to set to.
*/
static void setDefault(const Locale& newLocale,
UErrorCode& success);
/**
* Fills in "lang" with the locale's two-letter ISO-639 language code.
* @param lang Receives the language code.
* @return A reference to "lang".
*/
UnicodeString& getLanguage( UnicodeString& lang) const;
/**
* Fills in "cntry" with the locale's two-letter ISO-3166 country code.
* @param cntry Receives the country code.
* @return A reference to "cntry".
*/
UnicodeString& getCountry( UnicodeString& cntry) const;
/**
* Fills in "var" with the locale's variant code.
* @param var Receives the variant code.
* @return A reference to "var".
*/
UnicodeString& getVariant( UnicodeString& var) const;
/**
* Fills in "name" the programmatic name of the entire locale, with the language,
* country and variant separated by underbars. If a field is missing, at
* most one underbar will occur. Example: "en", "de_DE", "en_US_WIN",
* "de_POSIX", "fr_MAC"
* @param var Receives the programmatic locale name.
* @return A reference to "name".
*/
UnicodeString& getName( UnicodeString& name) const;
/**
* Returns the programmatic name of the entire locale, with the language,
* country and variant separated by underbars. If a field is missing, at
* most one underbar will occur. Example: "en", "de_DE", "en_US_WIN",
* "de_POSIX", "fr_MAC"
* @return A pointer to "name".
*/
const char * getName() const;
/**
* Fills in "name" with the locale's three-letter language code, as specified
* in ISO draft standard ISO-639-2..
* @param name Receives the three-letter language code.
* @param status An UErrorCode to receive any MISSING_RESOURCE_ERRORs
* @return A reference to "name".
*/
UnicodeString& getISO3Language(UnicodeString& name, UErrorCode& status) const;
// this version is deprecated, use getISO3Language(UnicodeString&, UErrorCode&)
UnicodeString& getISO3Language(UnicodeString& name) const;
/**
* Fills in "name" with the locale's three-letter ISO-3166 country code.
* @param name Receives the three-letter country code.
* @param status An UErrorCode to receive any MISSING_RESOURCE_ERRORs
* @return A reference to "name".
*/
UnicodeString& getISO3Country( UnicodeString& name, UErrorCode& status) const;
// this version is deprecated, use getISO3Country(UnicodeString&, UErrorCode&);
UnicodeString& getISO3Country( UnicodeString& name) const;
/**
* Returns the Windows LCID value corresponding to this locale.
* This value is stored in the resource data for the locale as a one-to-four-digit
* hexadecimal number. If the resource is missing, in the wrong format, or
* there is no Windows LCID value that corresponds to this locale, returns 0.
*/
uint32_t getLCID(void) const;
/**
* Fills in "dispLang" with the name of this locale's language in a format suitable for
* user display in the default locale. For example, if the locale's language code is
* "fr" and the default locale's language code is "en", this function would set
* dispLang to "French".
* @param dispLang Receives the language's display name.
* @return A reference to "dispLang".
*/
UnicodeString& getDisplayLanguage(UnicodeString& dispLang) const;
/**
* Fills in "dispLang" with the name of this locale's language in a format suitable for
* user display in the locale specified by "inLocale". For example, if the locale's
* language code is "en" and inLocale's language code is "fr", this function would set
* dispLang to "Anglais".
* @param inLocale Specifies the locale to be used to display the name. In other words,
* if the locale's language code is "en", passing Locale::FRENCH for
* inLocale would result in "Anglais", while passing Locale::GERMAN
* for inLocale would result in "Englisch".
* @param dispLang Receives the language's display name.
* @return A reference to "dispLang".
*/
UnicodeString& getDisplayLanguage( const Locale& inLocale,
UnicodeString& dispLang) const;
/**
* Fills in "dispCountry" with the name of this locale's country in a format suitable
* for user display in the default locale. For example, if the locale's country code
* is "FR" and the default locale's language code is "en", this function would set
* dispCountry to "France".
* @param dispCountry Receives the country's display name.
* @return A reference to "dispCountry".
*/
UnicodeString& getDisplayCountry( UnicodeString& dispCountry) const;
/**
* Fills in "dispCountry" with the name of this locale's country in a format suitable
* for user display in the locale specified by "inLocale". For example, if the locale's
* country code is "US" and inLocale's language code is "fr", this function would set
* dispCountry to "Etats-Unis".
* @param inLocale Specifies the locale to be used to display the name. In other
* words, if the locale's country code is "US", passing
* Locale::FRENCH for inLocale would result in "États-Unis", while
* passing Locale::GERMAN for inLocale would result in
* "Vereinigte Staaten".
* @param dispCountry Receives the country's display name.
* @return A reference to "dispCountry".
*/
UnicodeString& getDisplayCountry( const Locale& inLocale,
UnicodeString& dispCountry) const;
/**
* Fills in "dispVar" with the name of this locale's variant code in a format suitable
* for user display in the default locale.
* @param dispVar Receives the variant's name.
* @return A reference to "dispVar".
*/
UnicodeString& getDisplayVariant( UnicodeString& dispVar) const;
/**
* Fills in "dispVar" with the name of this locale's variant code in a format
* suitable for user display in the locale specified by "inLocale".
* @param inLocale Specifies the locale to be used to display the name.
* @param dispVar Receives the variant's display name.
* @return A reference to "dispVar".
*/
UnicodeString& getDisplayVariant( const Locale& inLocale,
UnicodeString& dispVar) const;
/**
* Fills in "name" with the name of this locale in a format suitable for user display
* in the default locale. This function uses getDisplayLanguage(), getDisplayCountry(),
* and getDisplayVariant() to do its work, and outputs the display name in the format
* "language (country[,variant])". For example, if the default locale is en_US, then
* fr_FR's display name would be "French (France)", and es_MX_Traditional's display name
* would be "Spanish (Mexico,Traditional)".
* @param name Receives the locale's display name.
* @return A reference to "name".
*/
UnicodeString& getDisplayName( UnicodeString& name) const;
/**
* Fills in "name" with the name of this locale in a format suitable for user display
* in the locale specfied by "inLocale". This function uses getDisplayLanguage(),
* getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display
* name in the format "language (country[,variant])". For example, if inLocale is
* fr_FR, then en_US's display name would be "Anglais (États-Unis)", and no_NO_NY's
* display name would be "norvégien (Norvège,NY)".
* @param inLocale Specifies the locale to be used to display the name.
* @param name Receives the locale's display name.
* @return A reference to "name".
*/
UnicodeString& getDisplayName( const Locale& inLocale,
UnicodeString& name) const;
/**
* Generates a hash code for the locale. Since Locales are often used in hashtables,
* caches the value for speed.
*/
int32_t hashCode(void) const;
/**
* Returns a list of all installed locales.
* @param count Receives the number of locales in the list.
* @return A pointer to an array of Locale objects. This array is the list
* of all locales with installed resource files. The called does NOT
* get ownership of this list, and must NOT delete it.
*/
static const Locale* getAvailableLocales(int32_t& count);
/**
* Returns a list of all 2-letter country codes defined in ISO 3166.
* Can be used to create Locales.
* @param count Receives the number of countries in the list.
* @return A pointer to an array of UnicodeString objects. The caller does NOT
* get ownership of this list, and must NOT delete it.
*/
static const UnicodeString* getISOCountries(int32_t& count);
/**
* Returns a list of all 2-letter language codes defined in ISO 639.
* Can be used to create Locales.
* [NOTE: ISO 639 is not a stable standard-- some languages' codes have changed.
* The list this function returns includes both the new and the old codes for the
* languages whose codes have changed.]
* @param count Receives the number of languages in the list.
* @return A pointer to an array of UnicodeString objects. The caller does NOT
* get ownership of this list, and must NOT delete it.
*/
static const UnicodeString* getISOLanguages(int32_t& count);
/**
* Deprecated 1999dec14 - Get the path to the ResourceBundle locale files. This path will be a
* platform-specific path name ending in a directory separator, so that file
* names may be concatenated to it. This path may be changed by calling
* setDataDirectory(). If setDataDirectory() has not been called yet,
* getDataDirectory() will return a platform-dependent default path as
* specified by TPlatformUtilities::getDefaultDataDirectory().
*
* @return Current data path.
*/
static const char* getDataDirectory(void);
/**
* Deprecated 1999dec14 - Set the path to the ResourceBundle locale files. After making this call,
* all objects in the Unicode Analytics package will read ResourceBundle
* data files in the specified directory in order to obtain locale data.
*
* @param path The new data path to be set to.
*/
static void setDataDirectory(const char* path);
Locale& init(const char* cLocaleID);
protected: // only protected for testing purposes. DO NOT USE.
void setFromPOSIXID(const UnicodeString& posixID); // set it from a single string.
void setFromPOSIXID(const char *posixID); // set it from a single string.
/**
* Given an ISO country code, returns an array of Strings containing the ISO
* codes of the languages spoken in that country. Official languages are listed
* in the returned table before unofficial languages, but other than that, the
* order of the returned list is indeterminate. If the value the user passes in
* for "country" is not a valid ISO 316 country code, or if we don't have language
* information for the specified country, this function returns an empty array.
*
* [This function is not currently part of Locale's API, but is needed in the
* implementation. We hope to add it to the API in a future release.]
* @param country The ISO 2-letter country code of the desired country
* @param count Receives the number of languages in the list.
* @return A pointer to an array of UnicodeString objects. The caller does NOT
* get ownership of this list, and must NOT delete it.
*/
static const UnicodeString* getLanguagesForCountry( const UnicodeString& country,
int32_t& count);
private:
/**
* Initializes a Locale object from a ULocale struct, which is the C locale object,
* and where the actual implementation is.
*/
void setHashCode(void);
char language[ULOC_LANG_CAPACITY];
char country[ULOC_COUNTRY_CAPACITY];
char* variant;
char* fullName;
char fullNameBuffer[ULOC_FULLNAME_CAPACITY];
int32_t khashCode;
static Locale *localeList;
static int32_t localeListCount;
static UnicodeString *isoLanguages;
static int32_t isoLanguagesCount;
static UnicodeString *isoCountries;
static int32_t isoCountriesCount;
static UHashtable *ctry2LangMapping;
static const UnicodeString compressedCtry2LangMapping;
static Locale fgDefaultLocale;
};
inline bool_t
Locale::operator!=(const Locale& other) const
{
return !operator==(other);
}
#endif
#error Please include unicode/locid.h instead

View file

@ -4,7 +4,7 @@
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
// $Revision: 1.4 $
// $Revision: 1.5 $
//
// Provides functionality for mapping between
// LCID and Posix IDs.
@ -29,7 +29,7 @@
#include <math.h>
#include "locmap.h"
#include "locid.h"
#include "unicode/locid.h"
#include "mutex.h"
#include "cmemory.h"
#include "cstring.h"
@ -244,7 +244,7 @@ IGlobalLocales::convertToLCID(const char* posixID)
mid = (low + high) / 2;
int32_t compVal = icu_strcmp(langID, fgPosixIDmap[mid].posixLangID());
int32_t compVal = uprv_strcmp(langID, fgPosixIDmap[mid].posixLangID());
if (mid == 0) // not found
break;
@ -446,7 +446,7 @@ ILcidPosixMap::hostID(const char* posixID) const
mid = (low + high) / 2;
int32_t compVal = icu_strcmp(posixID, fRegionMaps[mid].fPosixID);
int32_t compVal = uprv_strcmp(posixID, fRegionMaps[mid].fPosixID);
if (compVal < 0)
high = mid - 1;

View file

@ -6,7 +6,7 @@
*
*****************************************************************************************
*/
// $Revision: 1.4 $
// $Revision: 1.5 $
//===============================================================================
//
// File locmap.hpp : Locale Mapping Classes
@ -24,7 +24,7 @@
#ifndef LOCMAP_H
#define LOCMAP_H
#include "utypes.h"
#include "unicode/utypes.h"
#ifdef XP_CPLUSPLUS
class Locale;
/////////////////////////////////////////////////

View file

@ -19,7 +19,7 @@
//------------------------------------------------------------------------------
#ifndef MUTEX_H
#define MUTEX_H
#include "utypes.h"
#include "unicode/utypes.h"
#include "umutex.h"

View file

@ -10,12 +10,12 @@
#include "dcmpdata.h"
#include "compdata.h"
#include "normlzr.h"
#include "utypes.h"
#include "unistr.h"
#include "chariter.h"
#include "schriter.h"
#include "unicode.h"
#include "unicode/normlzr.h"
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "unicode/chariter.h"
#include "unicode/schriter.h"
#include "unicode/unicode.h"
#include "mutex.h"

View file

@ -1,717 +1 @@
/*
********************************************************************
* COPYRIGHT:
* Copyright (c) 1996-1999, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************
*/
#ifndef NORMLZR_H
#define NORMLZR_H
#include "utypes.h"
#include "unistr.h"
#include "chariter.h"
/**
* <tt>Normalizer</tt> transforms Unicode text into an equivalent composed or
* decomposed form, allowing for easier sorting and searching of text.
* <tt>Normalizer</tt> supports the standard normalization forms described in
* <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
* Unicode Technical Report #15</a>.
* <p>
* Characters with accents or other adornments can be encoded in
* several different ways in Unicode. For example, take the character "Á"
* (A-acute). In Unicode, this can be encoded as a single character (the
* "composed" form):
* <pre>
* 00C1 LATIN CAPITAL LETTER A WITH ACUTE</pre>
* or as two separate characters (the "decomposed" form):
* <pre>
* 0041 LATIN CAPITAL LETTER A
* 0301 COMBINING ACUTE ACCENT</pre>
* <p>
* To a user of your program, however, both of these sequences should be
* treated as the same "user-level" character "Á". When you are searching or
* comparing text, you must ensure that these two sequences are treated
* equivalently. In addition, you must handle characters with more than one
* accent. Sometimes the order of a character's combining accents is
* significant, while in other cases accent sequences in different orders are
* really equivalent.
* <p>
* Similarly, the string "ffi" can be encoded as three separate letters:
* <pre>
* 0066 LATIN SMALL LETTER F
* 0066 LATIN SMALL LETTER F
* 0069 LATIN SMALL LETTER I</pre>
* or as the single character
* <pre>
* FB03 LATIN SMALL LIGATURE FFI</pre>
* <p>
* The ffi ligature is not a distinct semantic character, and strictly speaking
* it shouldn't be in Unicode at all, but it was included for compatibility
* with existing character sets that already provided it. The Unicode standard
* identifies such characters by giving them "compatibility" decompositions
* into the corresponding semantic characters. When sorting and searching, you
* will often want to use these mappings.
* <p>
* <tt>Normalizer</tt> helps solve these problems by transforming text into the
* canonical composed and decomposed forms as shown in the first example above.
* In addition, you can have it perform compatibility decompositions so that
* you can treat compatibility characters the same as their equivalents.
* Finally, <tt>Normalizer</tt> rearranges accents into the proper canonical
* order, so that you do not have to worry about accent rearrangement on your
* own.
* <p>
* <tt>Normalizer</tt> adds one optional behavior, {@link #IGNORE_HANGUL},
* that differs from
* the standard Unicode Normalization Forms. This option can be passed
* to the {@link #Normalizer constructors} and to the static
* {@link #compose compose} and {@link #decompose decompose} methods. This
* option, and any that are added in the future, will be turned off by default.
* <p>
* There are three common usage models for <tt>Normalizer</tt>. In the first,
* the static {@link #normalize normalize()} method is used to process an
* entire input string at once. Second, you can create a <tt>Normalizer</tt>
* object and use it to iterate through the normalized form of a string by
* calling {@link #first} and {@link #next}. Finally, you can use the
* {@link #setIndex setIndex()} and {@link #getIndex} methods to perform
* random-access iteration, which is very useful for searching.
* <p>
* <b>Note:</b> <tt>Normalizer</tt> objects behave like iterators and have
* methods such as <tt>setIndex</tt>, <tt>next</tt>, <tt>previous</tt>, etc.
* You should note that while the <tt>setIndex</tt> and <tt>getIndex</tt> refer
* to indices in the underlying <em>input</em> text being processed, the
* <tt>next</tt> and <tt>previous</tt> methods it iterate through characters
* in the normalized <em>output</em>. This means that there is not
* necessarily a one-to-one correspondence between characters returned
* by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
* returned from <tt>setIndex</tt> and <tt>getIndex</tt>. It is for this
* reason that <tt>Normalizer</tt> does not implement the
* {@link CharacterIterator} interface.
* <p>
* <b>Note:</b> <tt>Normalizer</tt> is currently based on version 2.1.8
* of the <a href="http://www.unicode.org" target="unicode">Unicode Standard</a>.
* It will be updated as later versions of Unicode are released. If you are
* using this class on a JDK that supports an earlier version of Unicode, it
* is possible that <tt>Normalizer</tt> may generate composed or dedecomposed
* characters for which your JDK's {@link java.lang.Character} class does not
* have any data.
* <p>
* @author Laura Werner, Mark Davis
*/
class U_COMMON_API Normalizer
{
public:
// This tells us what the bits in the "mode" mean.
enum {
COMPAT_BIT = 1,
DECOMP_BIT = 2,
COMPOSE_BIT = 4
};
/** */
static const UChar DONE;
/** The mode of a Normalizer object */
enum EMode {
/**
* Null operation for use with the {@link #Normalizer constructors}
* and the static {@link #normalize normalize} method. This value tells
* the <tt>Normalizer</tt> to do nothing but return unprocessed characters
* from the underlying String or CharacterIterator. If you have code which
* requires raw text at some times and normalized text at others, you can
* use <tt>NO_OP</tt> for the cases where you want raw text, rather
* than having a separate code path that bypasses <tt>Normalizer</tt>
* altogether.
* <p>
* @see #setMode
*/
NO_OP = 0,
/**
* Canonical decomposition followed by canonical composition. Used with
* the {@link #Normalizer constructors} and the static
* {@link #normalize normalize}
* method to determine the operation to be performed.
* <p>
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
* off, this operation produces output that is in
* <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
* Form</a>
* <b>C</b>.
* <p>
* @see #setMode
*/
COMPOSE = COMPOSE_BIT,
/**
* Compatibility decomposition followed by canonical composition.
* Used with the {@link #Normalizer constructors} and the static
* {@link #normalize normalize} method to determine the operation to be
* performed.
* <p>
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
* off, this operation produces output that is in
* <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
* Form</a>
* <b>KC</b>.
* <p>
* @see #setMode
*/
COMPOSE_COMPAT = COMPOSE_BIT | COMPAT_BIT,
/**
* Canonical decomposition. This value is passed to the
* {@link #Normalizer constructors} and the static
* {@link #normalize normalize}
* method to determine the operation to be performed.
* <p>
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
* off, this operation produces output that is in
* <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
* Form</a>
* <b>D</b>.
* <p>
* @see #setMode
*/
DECOMP = DECOMP_BIT,
/**
* Compatibility decomposition. This value is passed to the
* {@link #Normalizer constructors} and the static
* {@link #normalize normalize}
* method to determine the operation to be performed.
* <p>
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
* off, this operation produces output that is in
* <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
* Form</a>
* <b>KD</b>.
* <p>
* @see #setMode
*/
DECOMP_COMPAT = DECOMP_BIT | COMPAT_BIT
};
/** The options for a Normalizer object */
enum {
/**
* Option to disable Hangul/Jamo composition and decomposition.
* This option applies to Korean text,
* which can be represented either in the Jamo alphabet or in Hangul
* characters, which are really just two or three Jamo combined
* into one visual glyph. Since Jamo takes up more storage space than
* Hangul, applications that process only Hangul text may wish to turn
* this option on when decomposing text.
* <p>
* The Unicode standard treates Hangul to Jamo conversion as a
* canonical decomposition, so this option must be turned <b>off</b> if you
* wish to transform strings into one of the standard
* <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
* Unicode Normalization Forms</a>.
* <p>
* @see #setOption
*/
IGNORE_HANGUL = 0x001
};
// Constructors
/**
* Creates a new <tt>Normalizer</tt> object for iterating over the
* normalized form of a given string.
* <p>
* @param str The string to be normalized. The normalization
* will start at the beginning of the string.
*
* @param mode The normalization mode.
*/
Normalizer(const UnicodeString& str,
EMode mode);
/**
* Creates a new <tt>Normalizer</tt> object for iterating over the
* normalized form of a given string.
* <p>
* The <tt>options</tt> parameter specifies which optional
* <tt>Normalizer</tt> features are to be enabled for this object.
* <p>
* @param str The string to be normalized. The normalization
* will start at the beginning of the string.
*
* @param mode The normalization mode.
*
* @param opt Any optional features to be enabled.
* Currently the only available option is {@link #IGNORE_HANGUL}
* If you want the default behavior corresponding to one of the
* standard Unicode Normalization Forms, use 0 for this argument
*/
Normalizer(const UnicodeString& str,
EMode mode,
int32_t opt);
/**
* Creates a new <tt>Normalizer</tt> object for iterating over the
* normalized form of a given UChar string.
* <p>
* @param str The string to be normalized. The normalization
* will start at the beginning of the string.
*
* @param length Lenght of the string
*
*/
Normalizer(const UChar* str,
int32_t length,
EMode mode);
/**
* Creates a new <tt>Normalizer</tt> object for iterating over the
* normalized form of the given text.
* <p>
* @param iter The input text to be normalized. The normalization
* will start at the beginning of the string.
*
* @param mode The normalization mode.
*
*/
Normalizer(const CharacterIterator& iter,
EMode mode);
/**
* Creates a new <tt>Normalizer</tt> object for iterating over the
* normalized form of the given text.
* <p>
* @param iter The input text to be normalized. The normalization
* will start at the beginning of the string.
*
* @param mode The normalization mode.
*
* @param opt Any optional features to be enabled.
* Currently the only available option is {@link #IGNORE_HANGUL}
* If you want the default behavior corresponding to one of the
* standard Unicode Normalization Forms, use 0 for this argument
*/
Normalizer(const CharacterIterator& iter,
EMode mode,
int32_t opt);
/**
* Copy constructor.
*/
Normalizer(const Normalizer& copy);
/**
* Destructor
*/
~Normalizer();
//-------------------------------------------------------------------------
// Static utility methods
//-------------------------------------------------------------------------
/**
* Normalizes a <tt>String</tt> using the given normalization operation.
* <p>
* The <tt>options</tt> parameter specifies which optional
* <tt>Normalizer</tt> features are to be enabled for this operation.
* Currently the only available option is {@link #IGNORE_HANGUL}.
* If you want the default behavior corresponding to one of the standard
* Unicode Normalization Forms, use 0 for this argument.
* <p>
* @param source the input string to be normalized.
*
* @param aMode the normalization mode
*
* @param options the optional features to be enabled.
*
* @param result The normalized string (on output).
*
* @param status The error code.
*/
static void normalize(const UnicodeString& source,
EMode mode,
int32_t options,
UnicodeString& result,
UErrorCode &status);
/**
* Compose a <tt>String</tt>.
* <p>
* The <tt>options</tt> parameter specifies which optional
* <tt>Normalizer</tt> features are to be enabled for this operation.
* Currently the only available option is {@link #IGNORE_HANGUL}.
* If you want the default behavior corresponding
* to Unicode Normalization Form <b>C</b> or <b>KC</b>,
* use 0 for this argument.
* <p>
* @param source the string to be composed.
*
* @param compat Perform compatibility decomposition before composition.
* If this argument is <tt>false</tt>, only canonical
* decomposition will be performed.
*
* @param options the optional features to be enabled.
*
* @param result The composed string (on output).
*
* @param status The error code.
*/
static void compose(const UnicodeString& source,
bool_t compat,
int32_t options,
UnicodeString& result,
UErrorCode &status);
/**
* Static method to decompose a <tt>String</tt>.
* <p>
* The <tt>options</tt> parameter specifies which optional
* <tt>Normalizer</tt> features are to be enabled for this operation.
* Currently the only available option is {@link #IGNORE_HANGUL}.
* The desired options should be OR'ed together to determine the value
* of this argument. If you want the default behavior corresponding
* to Unicode Normalization Form <b>D</b> or <b>KD</b>,
* use 0 for this argument.
* <p>
* @param str the string to be decomposed.
*
* @param compat Perform compatibility decomposition.
* If this argument is <tt>false</tt>, only canonical
* decomposition will be performed.
*
* @param options the optional features to be enabled.
*
* @param result The composed string (on output).
*
* @param status The error code.
*
* @return the decomposed string.
*/
static void decompose(const UnicodeString& source,
bool_t compat,
int32_t options,
UnicodeString& result,
UErrorCode &status);
//-------------------------------------------------------------------------
// CharacterIterator overrides
//-------------------------------------------------------------------------
/**
* Return the current character in the normalized text.
*/
UChar current(void) const;
/**
* Return the first character in the normalized text. This resets
* the <tt>Normalizer's</tt> position to the beginning of the text.
*/
UChar first(void);
/**
* Return the last character in the normalized text. This resets
* the <tt>Normalizer's</tt> position to be just before the
* the input text corresponding to that normalized character.
*/
UChar last(void);
/**
* Return the next character in the normalized text and advance
* the iteration position by one. If the end
* of the text has already been reached, {@link #DONE} is returned.
*/
UChar next(void);
/**
* Return the previous character in the normalized text and decrement
* the iteration position by one. If the beginning
* of the text has already been reached, {@link #DONE} is returned.
*/
UChar previous(void);
/**
* Set the iteration position in the input text that is being normalized
* and return the first normalized character at that position.
* <p>
* <b>Note:</b> This method sets the position in the <em>input</em> text,
* while {@link #next} and {@link #previous} iterate through characters
* in the normalized <em>output</em>. This means that there is not
* necessarily a one-to-one correspondence between characters returned
* by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
* returned from <tt>setIndex</tt> and {@link #getIndex}.
* <p>
* @param index the desired index in the input text.
*
* @return the first normalized character that is the result of iterating
* forward starting at the given index.
*
* @throws IllegalArgumentException if the given index is less than
* {@link #getBeginIndex} or greater than {@link #getEndIndex}.
*/
UChar setIndex(UTextOffset index);
/**
* Reset the iterator so that it is in the same state that it was just after
* it was constructed. A subsequent call to <tt>next</tt> will return the first
* character in the normalized text. In contrast, calling <tt>setIndex(0)</tt> followed
* by <tt>next</tt> will return the <em>second</em> character in the normalized text,
* because <tt>setIndex</tt> itself returns the first character
*/
void reset(void);
/**
* Retrieve the current iteration position in the input text that is
* being normalized. This method is useful in applications such as
* searching, where you need to be able to determine the position in
* the input text that corresponds to a given normalized output character.
* <p>
* <b>Note:</b> This method sets the position in the <em>input</em>, while
* {@link #next} and {@link #previous} iterate through characters in the
* <em>output</em>. This means that there is not necessarily a one-to-one
* correspondence between characters returned by <tt>next</tt> and
* <tt>previous</tt> and the indices passed to and returned from
* <tt>setIndex</tt> and {@link #getIndex}.
*
*/
UTextOffset getIndex(void) const;
/**
* Retrieve the index of the start of the input text. This is the begin index
* of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
* over which this <tt>Normalizer</tt> is iterating
*/
UTextOffset startIndex(void) const;
/**
* Retrieve the index of the end of the input text. This is the end index
* of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
* over which this <tt>Normalizer</tt> is iterating
*/
UTextOffset endIndex(void) const;
/**
* Returns true when both iterators refer to the same character in the same
* character-storage object.
*/
// virtual bool_t operator==(const CharacterIterator& that) const;
bool_t operator==(const Normalizer& that) const;
inline bool_t operator!=(const Normalizer& that) const;
/**
* Returns a pointer to a new Normalizer that is a clone of this one.
* The caller is responsible for deleting the new clone.
*/
Normalizer* clone(void) const;
/**
* Generates a hash code for this iterator.
*/
int32_t hashCode(void) const;
//-------------------------------------------------------------------------
// Property access methods
//-------------------------------------------------------------------------
/**
* Set the normalization mode for this object.
* <p>
* <b>Note:</b>If the normalization mode is changed while iterating
* over a string, calls to {@link #next} and {@link #previous} may
* return previously buffers characters in the old normalization mode
* until the iteration is able to re-sync at the next base character.
* It is safest to call {@link #setText setText()}, {@link #first},
* {@link #last}, etc. after calling <tt>setMode</tt>.
* <p>
* @param newMode the new mode for this <tt>Normalizer</tt>.
* The supported modes are:
* <ul>
* <li>{@link #COMPOSE} - Unicode canonical decompositiion
* followed by canonical composition.
* <li>{@link #COMPOSE_COMPAT} - Unicode compatibility decompositiion
* follwed by canonical composition.
* <li>{@link #DECOMP} - Unicode canonical decomposition
* <li>{@link #DECOMP_COMPAT} - Unicode compatibility decomposition.
* <li>{@link #NO_OP} - Do nothing but return characters
* from the underlying input text.
* </ul>
*
* @see #getMode
*/
void setMode(EMode newMode);
/**
* Return the basic operation performed by this <tt>Normalizer</tt>
*
* @see #setMode
*/
EMode getMode(void) const;
/**
* Set options that affect this <tt>Normalizer</tt>'s operation.
* Options do not change the basic composition or decomposition operation
* that is being performed , but they control whether
* certain optional portions of the operation are done.
* Currently the only available option is:
* <p>
* <ul>
* <li>{@link #IGNORE_HANGUL} - Do not decompose Hangul syllables into the
* Jamo alphabet and vice-versa. This option is off by default
* (<i>i.e.</i> Hangul processing is enabled) since the Unicode
* standard specifies that Hangul to Jamo is a canonical decomposition.
* For any of the standard Unicode Normalization
* Forms, you should leave this option off.
* </ul>
* <p>
* @param option the option whose value is to be set.
* @param value the new setting for the option. Use <tt>true</tt> to
* turn the option on and <tt>false</tt> to turn it off.
*
* @see #getOption
*/
void setOption(int32_t option,
bool_t value);
/**
* Determine whether an option is turned on or off.
* <p>
* @see #setOption
*/
bool_t getOption(int32_t option) const;
/**
* Set the input text over which this <tt>Normalizer</tt> will iterate.
* The iteration position is set to the beginning.
*/
void setText(const UnicodeString& newText,
UErrorCode &status);
/**
* Set the input text over which this <tt>Normalizer</tt> will iterate.
* The iteration position is set to the beginning.
*/
void setText(const CharacterIterator& newText,
UErrorCode &status);
/**
* Set the input text over which this <tt>Normalizer</tt> will iterate.
* The iteration position is set to the beginning.
*/
void setText(const UChar* newText,
int32_t length,
UErrorCode &status);
/**
* Copies the text under iteration into the UnicodeString referred to by
* "result".
* @param result Receives a copy of the text under iteration.
*/
void getText(UnicodeString& result);
private:
// Private utility methods for iteration
// For documentation, see the source code
UChar nextCompose(void);
UChar prevCompose(void);
UChar nextDecomp(void);
UChar prevDecomp(void);
UChar curForward(void);
UChar curBackward(void);
void init(CharacterIterator* iter,
EMode mode,
int32_t option);
void initBuffer(void);
void clearBuffer(void);
// Utilities used by Compose
static void bubbleAppend(UnicodeString& target,
UChar ch,
uint32_t cclass);
static uint32_t getComposeClass(UChar ch);
static uint16_t composeLookup(UChar ch);
static uint16_t composeAction(uint16_t baseIndex,
uint16_t comIndex);
static void explode(UnicodeString& target,
uint16_t index);
static UChar pairExplode(UnicodeString& target,
uint16_t action);
// Utilities used by Decompose
static void fixCanonical(UnicodeString& result); // Reorders combining marks
static uint8_t getClass(UChar ch); // Gets char's combining class
// Other static utility methods
static void doAppend(const UChar source[],
uint16_t offset,
UnicodeString& dest);
static void doInsert(const UChar source[],
uint16_t offset,
UnicodeString& dest,
UTextOffset pos);
static void hangulToJamo(UChar ch,
UnicodeString& result,
uint16_t decompLimit);
static void jamoAppend(UChar ch,
uint16_t decompLimit,
UnicodeString& dest);
static void jamoToHangul(UnicodeString& buffer,
UTextOffset start);
//-------------------------------------------------------------------------
// Private data
//-------------------------------------------------------------------------
EMode fMode;
int32_t fOptions;
int16_t minDecomp;
// The input text and our position in it
CharacterIterator* text;
// A buffer for holding intermediate results
UnicodeString buffer;
UTextOffset bufferPos;
UTextOffset bufferLimit;
UChar currentChar;
// Another buffer for use during iterative composition
UnicodeString explodeBuf;
enum {
EMPTY = -1,
STR_INDEX_SHIFT = 2, //Must agree with the constants used in NormalizerBuilder
STR_LENGTH_MASK = 0x0003
};
static const UChar HANGUL_BASE;
static const UChar HANGUL_LIMIT;
static const UChar JAMO_LBASE;
static const UChar JAMO_VBASE;
static const UChar JAMO_TBASE;
static const int16_t JAMO_LCOUNT;
static const int16_t JAMO_VCOUNT;
static const int16_t JAMO_TCOUNT;
static const int16_t JAMO_NCOUNT;
friend class ComposedCharIter;
};
inline bool_t
Normalizer::operator!= (const Normalizer& other) const
{ return ! operator==(other); }
#endif // _NORMLZR
#error Please include unicode/normlzr.h instead

View file

@ -1,90 +1 @@
/*
*******************************************************************************
*
* Copyright (C) 1997-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* FILE NAME : platform.h
*
* Date Name Description
* 05/13/98 nos Creation (content moved here from ptypes.h).
* 03/02/99 stephen Added AS400 support.
* 03/30/99 stephen Added Linux support.
* 04/13/99 stephen Reworked for autoconf.
*******************************************************************************
*/
/* Define the platform we're on. */
#ifndef OS2
#define OS2 1
#endif
/* Define whether inttypes.h is available */
#define HAVE_INTTYPES_H 0
/* Determines whether specific types are available */
#define HAVE_INT8_T 0
#define HAVE_UINT8_T 0
#define HAVE_INT16_T 0
#define HAVE_UINT16_T 0
#define HAVE_INT32_T 0
#define HAVE_UINT32_T 0
#define HAVE_BOOL_T 0
/* Determines the endianness of the platform */
#define U_IS_BIG_ENDIAN 0
/*===========================================================================*/
/* Generic data types */
/*===========================================================================*/
/* If your platform does not have the <inttypes.h> header, you may
need to edit the typedefs below. */
#if HAVE_INTTYPES_H
#include <inttypes.h>
#else
#if ! HAVE_INT8_T
typedef signed char int8_t;
#endif
#if ! HAVE_UINT8_T
typedef unsigned char uint8_t;
#endif
#if ! HAVE_INT16_T
typedef signed short int16_t;
#endif
#if ! HAVE_UINT16_T
typedef unsigned short uint16_t;
#endif
#if ! HAVE_INT32_T
typedef signed long int32_t;
#endif
#if ! HAVE_UINT32_T
typedef unsigned long uint32_t;
#endif
#endif
#include <limits.h>
#define T_INT32_MAX (LONG_MAX)
/*===========================================================================*/
/* Character data types */
/*===========================================================================*/
#define U_SIZEOF_WCHAR_T 2
/*===========================================================================*/
/* Symbol import-export control */
/*===========================================================================*/
#define U_EXPORT
#define U_EXPORT2
#define U_IMPORT
#error Please include unicode/pos2.h instead

View file

@ -1,92 +1 @@
/*
*******************************************************************************
*
* Copyright (C) 1997-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* FILE NAME : platform.h
*
* Date Name Description
* 05/13/98 nos Creation (content moved here from ptypes.h).
* 03/02/99 stephen Added AS400 support.
* 03/30/99 stephen Added Linux support.
* 04/13/99 stephen Reworked for autoconf.
* 09/21/99 barry Created new for OS/400 platform.
*******************************************************************************
*/
/* Define the platform we're on. */
#ifndef OS400
#define OS400
#endif
/* Define whether inttypes.h is available */
#define HAVE_INTTYPES_H 0
/* Determines whether specific types are available */
#define HAVE_INT8_T 0
#define HAVE_UINT8_T 0
#define HAVE_INT16_T 0
#define HAVE_UINT16_T 0
#define HAVE_INT32_T 0
#define HAVE_UINT32_T 0
#define HAVE_BOOL_T 0
/* Determines the endianness of the platform */
#define U_IS_BIG_ENDIAN 1
/*===========================================================================*/
/* Generic data types */
/*===========================================================================*/
/* If your platform does not have the <inttypes.h> header, you may
need to edit the typedefs below. */
#if HAVE_INTTYPES_H
#include <inttypes.h>
#else
#if ! HAVE_INT8_T
typedef signed char int8_t;
#endif
#if ! HAVE_UINT8_T
typedef unsigned char uint8_t;
#endif
#if ! HAVE_INT16_T
typedef signed short int16_t;
#endif
#if ! HAVE_UINT16_T
typedef unsigned short uint16_t;
#endif
#if ! HAVE_INT32_T
typedef signed long int32_t;
#endif
#if ! HAVE_UINT32_T
typedef unsigned long uint32_t;
#endif
#endif
#include <limits.h>
#define T_INT32_MAX (LONG_MAX)
/*===========================================================================*/
/* Character data types */
/*===========================================================================*/
#define U_CHARSET_FAMILY 1
#define U_SIZEOF_WCHAR_T 2
/*===========================================================================*/
/* Symbol import-export control */
/*===========================================================================*/
#define U_EXPORT
#define U_EXPORT2
#define U_IMPORT
#error Please include unicode/pos400.h instead

View file

@ -48,7 +48,7 @@
#include <locale.h>
/* include ICU headers */
#include "utypes.h"
#include "unicode/utypes.h"
#include "umutex.h"
#include "cmemory.h"
#include "cstring.h"
@ -132,7 +132,7 @@ static char* u_bottomNBytesOfDouble(double* d, int n);
/* Get UTC (GMT) time measured in seconds since 0:00 on 1/1/70.*/
int32_t
icu_getUTCtime()
uprv_getUTCtime()
{
#ifdef XP_MAC
time_t t, t1, t2;
@ -166,7 +166,7 @@ icu_getUTCtime()
---------------------------------------------------------------------------*/
bool_t
icu_isNaN(double number)
uprv_isNaN(double number)
{
#ifdef IEEE_754
/* This should work in theory, but it doesn't, so we resort to the more*/
@ -209,7 +209,7 @@ icu_isNaN(double number)
}
bool_t
icu_isInfinite(double number)
uprv_isInfinite(double number)
{
#ifdef IEEE_754
/* We know the top bit is the sign bit, so we mask that off in a copy of */
@ -218,7 +218,7 @@ icu_isInfinite(double number)
/* scrutinize the pattern itself. */
/* double a = number; */
/* *(int8_t*)u_topNBytesOfDouble(&a, 1) &= 0x7F;*/
/* return a == icu_getInfinity();*/
/* return a == uprv_getInfinity();*/
/* Instead, We want to see either:*/
/* 7FF0 0000 0000 0000*/
@ -239,32 +239,32 @@ icu_isInfinite(double number)
}
bool_t
icu_isPositiveInfinity(double number)
uprv_isPositiveInfinity(double number)
{
#ifdef IEEE_754
return (number > 0 && icu_isInfinite(number));
return (number > 0 && uprv_isInfinite(number));
#else
return icu_isInfinite(number);
return uprv_isInfinite(number);
#endif
}
bool_t
icu_isNegativeInfinity(double number)
uprv_isNegativeInfinity(double number)
{
#ifdef IEEE_754
return (number < 0 && icu_isInfinite(number));
return (number < 0 && uprv_isInfinite(number));
#else
#ifdef OS390
uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
sizeof(uint32_t));
return((highBits & SIGN) && icu_isInfinite(number));
return((highBits & SIGN) && uprv_isInfinite(number));
#endif
return icu_isInfinite(number);
return uprv_isInfinite(number);
#endif
}
double
icu_getNaN()
uprv_getNaN()
{
#if defined(IEEE_754) || defined(OS390)
if( ! fgNaNInitialized) {
@ -289,7 +289,7 @@ icu_getNaN()
}
double
icu_getInfinity()
uprv_getInfinity()
{
#ifdef IEEE_754
if (!fgInfInitialized)
@ -311,37 +311,37 @@ icu_getInfinity()
}
double
icu_floor(double x)
uprv_floor(double x)
{
return floor(x);
}
double
icu_ceil(double x)
uprv_ceil(double x)
{
return ceil(x);
}
double
icu_fabs(double x)
uprv_fabs(double x)
{
return fabs(x);
}
double
icu_modf(double x, double* y)
uprv_modf(double x, double* y)
{
return modf(x, y);
}
double
icu_fmod(double x, double y)
uprv_fmod(double x, double y)
{
return fmod(x, y);
}
double
icu_pow10(int32_t x)
uprv_pow10(int32_t x)
{
#ifdef XP_MAC
return pow(10.0, (double)x);
@ -351,7 +351,7 @@ icu_pow10(int32_t x)
}
double
icu_IEEEremainder(double x, double p)
uprv_IEEEremainder(double x, double p)
{
#ifdef IEEE_754
int32_t hx, hp;
@ -379,11 +379,11 @@ icu_IEEEremainder(double x, double p)
if(hp <= 0x7fdfffff)
x = icu_fmod(x, p + p); /* now x < 2p */
x = uprv_fmod(x, p + p); /* now x < 2p */
if(((hx-hp)|(lx-lp)) == 0)
return 0.0 * x;
x = icu_fabs(x);
p = icu_fabs(p);
x = uprv_fabs(x);
p = uprv_fabs(p);
if (hp < 0x00200000) {
if(x + x > p) {
x -= p;
@ -405,19 +405,19 @@ icu_IEEEremainder(double x, double p)
return x;
#else
/* {sfb} need to fix this*/
return icu_fmod(x, p);
return uprv_fmod(x, p);
#endif
}
double
icu_fmax(double x, double y)
uprv_fmax(double x, double y)
{
#ifdef IEEE_754
int32_t lowBits;
/* first handle NaN*/
if(icu_isNaN(x) || icu_isNaN(y))
return icu_getNaN();
if(uprv_isNaN(x) || uprv_isNaN(y))
return uprv_getNaN();
/* check for -0 and 0*/
lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&x, sizeof(uint32_t));
@ -437,20 +437,20 @@ icu_fmax(double x, double y)
}
int32_t
icu_max(int32_t x, int32_t y)
uprv_max(int32_t x, int32_t y)
{
return (x > y ? x : y);
}
double
icu_fmin(double x, double y)
uprv_fmin(double x, double y)
{
#ifdef IEEE_754
int32_t lowBits;
/* first handle NaN*/
if(icu_isNaN(x) || icu_isNaN(y))
return icu_getNaN();
if(uprv_isNaN(x) || uprv_isNaN(y))
return uprv_getNaN();
/* check for -0 and 0*/
lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&y, sizeof(uint32_t));
@ -471,7 +471,7 @@ icu_fmin(double x, double y)
}
int32_t
icu_min(int32_t x, int32_t y)
uprv_min(int32_t x, int32_t y)
{
return (x > y ? y : x);
}
@ -484,15 +484,15 @@ icu_min(int32_t x, int32_t y)
* ceil(3.3) = 4, ceil(-3.3) = -3
*/
double
icu_trunc(double d)
uprv_trunc(double d)
{
#ifdef IEEE_754
int32_t lowBits;
/* handle error cases*/
if(icu_isNaN(d)) return icu_getNaN();
if(icu_isInfinite(d)) return icu_getInfinity();
if(uprv_isNaN(d)) return uprv_getNaN();
if(uprv_isInfinite(d)) return uprv_getInfinity();
lowBits = *(uint32_t*) u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
if( (d == 0.0 && (lowBits & SIGN)) || d < 0)
@ -505,7 +505,7 @@ icu_trunc(double d)
}
void
icu_longBitsFromDouble(double d, int32_t *hi, uint32_t *lo)
uprv_longBitsFromDouble(double d, int32_t *hi, uint32_t *lo)
{
*hi = *(int32_t*)u_topNBytesOfDouble(&d, sizeof(int32_t));
*lo = *(uint32_t*)u_bottomNBytesOfDouble(&d, sizeof(uint32_t));
@ -520,7 +520,7 @@ icu_longBitsFromDouble(double d, int32_t *hi, uint32_t *lo)
* (Thanks to Alan Liu for supplying this function.)
*/
int16_t
icu_log10(double d)
uprv_log10(double d)
{
/* The reason this routine is needed is that simply taking the*/
/* log and dividing by log10 yields a result which may be off*/
@ -541,7 +541,7 @@ icu_log10(double d)
}
int32_t
icu_digitsAfterDecimal(double x)
uprv_digitsAfterDecimal(double x)
{
char buffer[20];
int16_t numDigits;
@ -555,7 +555,7 @@ icu_digitsAfterDecimal(double x)
/* (it handles mathematical inaccuracy better than we can), then find out */
/* many characters are to the right of the decimal point */
sprintf(buffer, "%.9g", x);
p = icu_strchr(buffer, '.');
p = uprv_strchr(buffer, '.');
if (p == 0)
return 0;
@ -565,7 +565,7 @@ icu_digitsAfterDecimal(double x)
/* if the number's string representation is in scientific notation, find */
/* the exponent and take it into account*/
exponent = 0;
p = icu_strchr(buffer, 'e');
p = uprv_strchr(buffer, 'e');
if (p != 0) {
int16_t expPos = p - buffer;
numDigits -= strlen(buffer) - expPos;
@ -592,7 +592,7 @@ icu_digitsAfterDecimal(double x)
/* Time zone utilities */
void
icu_tzset()
uprv_tzset()
{
#ifdef POSIX
tzset();
@ -608,7 +608,7 @@ icu_tzset()
}
int32_t
icu_timezone()
uprv_timezone()
{
#ifdef POSIX
#ifdef OS390
@ -643,7 +643,7 @@ icu_timezone()
}
char*
icu_tzname(int index)
uprv_tzname(int index)
{
#ifdef POSIX
return tzname[index];
@ -675,14 +675,14 @@ gDataDirectory[1024];
U_CAPI void U_EXPORT2
u_setDataDirectory(const char *directory) {
if(directory!=NULL) {
int length=icu_strlen(directory);
int length=uprv_strlen(directory);
if(length<sizeof(gDataDirectory)-1) {
umtx_lock(NULL);
if(length==0) {
*gDataDirectory=0;
} else {
icu_memcpy(gDataDirectory, directory, length);
uprv_memcpy(gDataDirectory, directory, length);
/* terminate the directory with a separator (/ or \) */
if(gDataDirectory[length-1]!=U_FILE_SEP_CHAR) {
@ -715,7 +715,7 @@ getSystemPath(char *path, int size) {
int length=(uint8_t)volName[0];
if(length>0) {
/* convert the Pascal string to a C string */
icu_memmove(path, path+1, length);
uprv_memmove(path, path+1, length);
path[length]=0;
}
return length;
@ -754,7 +754,7 @@ getLibraryPath(char *path, int size) {
if(mod!=NULL) {
if(GetModuleFileName(mod, path, size)>0) {
/* remove the basename and the last file separator */
char *lastSep=icu_strrchr(path, U_FILE_SEP_CHAR);
char *lastSep=uprv_strrchr(path, U_FILE_SEP_CHAR);
if(lastSep!=NULL) {
*lastSep=0;
return lastSep-path;
@ -768,7 +768,7 @@ getLibraryPath(char *path, int size) {
rc=DosQueryModuleName(mod, (LONG)size, path);
if(rc==NO_ERROR) {
/* remove the basename and the last file separator */
char *lastSep=icu_strrchr(path, U_FILE_SEP_CHAR);
char *lastSep=uprv_strrchr(path, U_FILE_SEP_CHAR);
if(lastSep!=NULL) {
*lastSep=0;
return lastSep-path;
@ -790,13 +790,13 @@ getLibraryPath(char *path, int size) {
if(rc>=0) {
/* search for the list item for the library itself */
while(p!=NULL) {
s=icu_strstr(p->l_name, U_COMMON_LIBNAME); /* "libicu-uc.so" */
s=uprv_strstr(p->l_name, U_COMMON_LIBNAME); /* "libicu-uc.so" */
if(s!=NULL) {
if(s>p->l_name) {
/* copy the path, without the basename and the last separator */
length=(s-p->l_name)-1;
if(0<length && length<size) {
icu_memcpy(path, p->l_name, length);
uprv_memcpy(path, p->l_name, length);
path[length]=0;
} else {
length=0;
@ -831,13 +831,13 @@ getLibraryPath(char *path, int size) {
}
p=(struct ld_info *)((uint8_t *)p+p->ldinfo_next);
s=icu_strstr(p->ldinfo_filename, U_COMMON_LIBNAME); /* "libicuuc.a" */
s=uprv_strstr(p->ldinfo_filename, U_COMMON_LIBNAME); /* "libicuuc.a" */
if(s!=NULL) {
if(s>p->ldinfo_filename) {
/* copy the path, without the basename and the last separator */
length=(s-p->ldinfo_filename)-1;
if(0<length && length<size) {
icu_memcpy(path, p->ldinfo_filename, length);
uprv_memcpy(path, p->ldinfo_filename, length);
path[length]=0;
} else {
length=0;
@ -865,13 +865,13 @@ getLibraryPath(char *path, int size) {
break;
}
s=icu_strstr(p->filename, U_COMMON_LIBNAME);
s=uprv_strstr(p->filename, U_COMMON_LIBNAME);
if(s!=NULL) {
if(s>p->filename) {
/* copy the path, without the basename and the last separator */
length=(s-p->filename)-1;
if(0<length && length<size) {
icu_memcpy(path, p->filename, length);
uprv_memcpy(path, p->filename, length);
path[length]=0;
} else {
length=0;
@ -948,8 +948,8 @@ findLibraryPath(char *path, int size) {
}
/* copy the path and add the library filename */
icu_memcpy(path, libPath, length);
icu_strcpy(path+length, U_FILE_SEP_STRING LIB_FILENAME);
uprv_memcpy(path, libPath, length);
uprv_strcpy(path+length, U_FILE_SEP_STRING LIB_FILENAME);
/* does this file exist in this path? */
f=T_FileStream_open(path, "rb");
@ -1016,7 +1016,7 @@ u_getDataDirectory(void) {
char temporaryPath[1024];
/* copy the path with variables to the temporary one */
icu_memcpy(temporaryPath, pathBuffer, size);
uprv_memcpy(temporaryPath, pathBuffer, size);
/* do the replacement and store it in the pathBuffer */
size=ExpandEnvironmentStrings(temporaryPath, pathBuffer, sizeof(pathBuffer));
@ -1036,7 +1036,7 @@ u_getDataDirectory(void) {
if(path==NULL || *path==0) {
length=getLibraryPath(pathBuffer, sizeof(pathBuffer));
if(length>0) {
icu_strcpy(pathBuffer+length, U_FILE_SEP_STRING ".." FALLBACK_PATH);
uprv_strcpy(pathBuffer+length, U_FILE_SEP_STRING ".." FALLBACK_PATH);
path=pathBuffer;
}
}
@ -1045,7 +1045,7 @@ u_getDataDirectory(void) {
if(path==NULL || *path==0) {
length=findLibraryPath(pathBuffer, sizeof(pathBuffer));
if(length>0) {
icu_strcpy(pathBuffer+length, U_FILE_SEP_STRING ".." FALLBACK_PATH);
uprv_strcpy(pathBuffer+length, U_FILE_SEP_STRING ".." FALLBACK_PATH);
path=pathBuffer;
}
}
@ -1058,7 +1058,7 @@ u_getDataDirectory(void) {
# else
length=getSystemPath(pathBuffer, sizeof(pathBuffer));
if(length>0) {
icu_strcpy(pathBuffer+length, FALLBACK_PATH);
uprv_strcpy(pathBuffer+length, FALLBACK_PATH);
path=pathBuffer;
} else {
path=FALLBACK_PATH;
@ -1178,13 +1178,13 @@ mac_lc_rec mac_lc_recs[] = {
#endif
const char*
icu_getDefaultLocaleID()
uprv_getDefaultLocaleID()
{
#ifdef POSIX
char* posixID = getenv("LC_ALL");
if (posixID == 0) posixID = getenv("LANG");
if (posixID == 0) posixID = setlocale(LC_ALL, NULL);
if (icu_strcmp("C", posixID) == 0) posixID = "en_US";
if (uprv_strcmp("C", posixID) == 0) posixID = "en_US";
return posixID;
#endif
@ -1251,7 +1251,7 @@ icu_getDefaultLocaleID()
/* end of platform-specific implementation */
double
icu_nextDouble(double d, bool_t next)
uprv_nextDouble(double d, bool_t next)
{
#ifdef IEEE_754
int32_t highBits;
@ -1263,7 +1263,7 @@ icu_nextDouble(double d, bool_t next)
uint32_t signBit;
/* filter out NaN's */
if (icu_isNaN(d)) {
if (uprv_isNaN(d)) {
return d;
}
@ -1356,7 +1356,7 @@ static char* u_bottomNBytesOfDouble(double* d, int n)
return U_IS_BIG_ENDIAN ? (char*)(d + 1) - n : (char*)d;
}
const char* icu_getDefaultCodepage()
const char* uprv_getDefaultCodepage()
{
#if defined(OS400)
return "ibm-37";
@ -1366,7 +1366,7 @@ const char* icu_getDefaultCodepage()
/* TBD */
#elif defined(WIN32)
static char codepage[12]={ "cp" };
icu_strcpy(codepage+2, _itoa(GetACP(), tempString, 10));
uprv_strcpy(codepage+2, _itoa(GetACP(), tempString, 10));
return codepage;
#elif defined(POSIX)
return "LATIN_1";

View file

@ -1,215 +1 @@
/*
*******************************************************************************
*
* Copyright (C) 1997-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* FILE NAME : putil.h
*
* Date Name Description
* 05/14/98 nos Creation (content moved here from utypes.h).
* 06/17/99 erm Added IEEE_754
* 07/22/98 stephen Added IEEEremainder, max, min, trunc
* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
* 08/24/98 stephen Added longBitsFromDouble
* 03/02/99 stephen Removed openFile(). Added AS400 support.
* 04/15/99 stephen Converted to C
* 11/15/99 helena Integrated S/390 changes for IEEE support.
*******************************************************************************
*/
#ifndef PUTIL_H
#define PUTIL_H
#include "utypes.h"
/* Define this if your platform supports IEEE 754 floating point */
#define IEEE_754
/*===========================================================================*/
/* Platform utilities */
/*===========================================================================*/
/**
* Platform utilities isolates the platform dependencies of the
* libarary. For each platform which this code is ported to, these
* functions may have to be re-implemented. */
/* Floating point utilities */
U_CAPI bool_t U_EXPORT2 icu_isNaN(double);
U_CAPI bool_t U_EXPORT2 icu_isInfinite(double);
U_CAPI bool_t U_EXPORT2 icu_isPositiveInfinity(double);
U_CAPI bool_t U_EXPORT2 icu_isNegativeInfinity(double);
U_CAPI double U_EXPORT2 icu_getNaN(void);
U_CAPI double U_EXPORT2 icu_getInfinity(void);
U_CAPI double U_EXPORT2 icu_floor(double x);
U_CAPI double U_EXPORT2 icu_ceil(double x);
U_CAPI double U_EXPORT2 icu_fabs(double x);
U_CAPI double U_EXPORT2 icu_modf(double x, double* y);
U_CAPI double U_EXPORT2 icu_fmod(double x, double y);
U_CAPI double U_EXPORT2 icu_pow10(int32_t x);
U_CAPI double U_EXPORT2 icu_IEEEremainder(double x, double y);
U_CAPI double U_EXPORT2 icu_fmax(double x, double y);
U_CAPI double U_EXPORT2 icu_fmin(double x, double y);
U_CAPI int32_t U_EXPORT2 icu_max(int32_t x, int32_t y);
U_CAPI int32_t U_EXPORT2 icu_min(int32_t x, int32_t y);
U_CAPI double U_EXPORT2 icu_trunc(double d);
U_CAPI void U_EXPORT2 icu_longBitsFromDouble(double d, int32_t *hi, uint32_t *lo);
#if U_IS_BIG_ENDIAN
# define icu_isNegative(number) (*((signed char *)&(number))<0)
#else
# define icu_isNegative(number) (*((signed char *)&(number)+sizeof(number)-1)<0)
#endif
/* Conversion from a digit to the character with radix base from 2-19 */
#ifndef OS390
#define T_CString_itosOffset(a) a<=9?(0x30+a):(0x30+a+7)
#else
#define T_CString_itosOffset(a) a<=9?(0xF0+a):(0xC1+a-10) /* C1 is EBCDIC 'A' */
#endif
/*
* Return the floor of the log base 10 of a given double.
* This method compensates for inaccuracies which arise naturally when
* computing logs, and always gives the correct value. The parameter
* must be positive and finite.
* (Thanks to Alan Liu for supplying this function.)
*/
/**
* Returns the common log of the double value d.
*
* @param d the double value to apply the common log function for.
* @return the log of value d.
*/
U_CAPI int16_t U_EXPORT2 icu_log10(double d);
/**
* Returns the number of digits after the decimal point in a double number x.
*
* @param x the double number
*/
U_CAPI int32_t U_EXPORT2 icu_digitsAfterDecimal(double x);
/**
* Time zone utilities
*
* Wrappers for C runtime library functions relating to timezones.
* The t_tzset() function (similar to tzset) uses the current setting
* of the environment variable TZ to assign values to three global
* variables: daylight, timezone, and tzname. These variables have the
* following meanings, and are declared in &lt;time.h>.
*
* daylight Nonzero if daylight-saving-time zone (DST) is specified
* in TZ; otherwise, 0. Default value is 1.
* timezone Difference in seconds between coordinated universal
* time and local time. E.g., -28,800 for PST (GMT-8hrs)
* tzname(0) Three-letter time-zone name derived from TZ environment
* variable. E.g., "PST".
* tzname(1) Three-letter DST zone name derived from TZ environment
* variable. E.g., "PDT". If DST zone is omitted from TZ,
* tzname(1) is an empty string.
*
* Notes: For example, to set the TZ environment variable to correspond
* to the current time zone in Germany, you can use one of the
* following statements:
*
* set TZ=GST1GDT
* set TZ=GST+1GDT
*
* If the TZ value is not set, t_tzset() attempts to use the time zone
* information specified by the operating system. Under Windows NT
* and Windows 95, this information is specified in the Control Panels
* Date/Time application.
*/
U_CAPI void U_EXPORT2 icu_tzset(void);
U_CAPI int32_t U_EXPORT2 icu_timezone(void);
U_CAPI char* U_EXPORT2 icu_tzname(int index);
/* Get UTC (GMT) time measured in seconds since 0:00 on 1/1/70. */
U_CAPI int32_t U_EXPORT2 icu_getUTCtime(void);
/* Return the data directory for this platform. */
U_CAPI const char* U_EXPORT2 u_getDataDirectory(void);
/* Set the data directory. */
U_CAPI void U_EXPORT2 u_setDataDirectory(const char *directory);
/* Return the default codepage for this platform and locale */
U_CAPI const char* U_EXPORT2 icu_getDefaultCodepage(void);
/* Return the default locale ID string by querying ths system, or
zero if one cannot be found. */
U_CAPI const char* U_EXPORT2 icu_getDefaultLocaleID(void);
/*
* Finds the least double greater than d (if positive == true),
* or the greatest double less than d (if positive == false).
*
* This is a special purpose function defined by the ChoiceFormat API
* documentation.
* It is not a general purpose function and not defined for NaN or Infinity
*/
U_CAPI double U_EXPORT2 icu_nextDouble(double d, bool_t positive);
/**
* Filesystem file and path separator characters.
* Example: '/' and ':' on Unix, '\\' and ';' on Windows.
*/
#ifdef XP_MAC
# define U_FILE_SEP_CHAR ':'
# define U_PATH_SEP_CHAR ';'
# define U_FILE_SEP_STRING ":"
# define U_PATH_SEP_STRING ";"
#elif defined(WIN32) || defined(OS2)
# define U_FILE_SEP_CHAR '\\'
# define U_PATH_SEP_CHAR ';'
# define U_FILE_SEP_STRING "\\"
# define U_PATH_SEP_STRING ";"
#else
# define U_FILE_SEP_CHAR '/'
# define U_PATH_SEP_CHAR ':'
# define U_FILE_SEP_STRING "/"
# define U_PATH_SEP_STRING ":"
#endif
/**
* Convert char characters to UChar characters.
* This utility function is useful only for "invariant characters"
* that are encoded in the platform default encoding.
* They are a small, constant subset of the encoding and include
* just the latin letters, digits, and some punctuation.
* For details, see utypes.h .
*
* @param cs Input string, points to <code>length</code>
* character bytes from a subset of the platform encoding.
* @param us Output string, points to memory for <code>length</code>
* Unicode characters.
* @param length The number of characters to convert; this may
* include the terminating <code>NUL</code>.
*/
U_CAPI void U_EXPORT2
u_charsToUChars(const char *cs, UChar *us, UTextOffset length);
/**
* Convert UChar characters to char characters.
* This utility function is useful only for "invariant characters"
* that can be encoded in the platform default encoding.
* They are a small, constant subset of the encoding and include
* just the latin letters, digits, and some punctuation.
* For details, see utypes.h .
*
* @param us Input string, points to <code>length</code>
* Unicode characters that can be encoded with the
* codepage-invariant subset of the platform encoding.
* @param cs Output string, points to memory for <code>length</code>
* character bytes.
* @param length The number of characters to convert; this may
* include the terminating <code>NUL</code>.
*/
U_CAPI void U_EXPORT2
u_UCharsToChars(const UChar *us, char *cs, UTextOffset length);
#endif
#error Please include unicode/putil.h instead

View file

@ -1,90 +1 @@
/*
*******************************************************************************
*
* Copyright (C) 1997-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* FILE NAME : platform.h
*
* Date Name Description
* 05/13/98 nos Creation (content moved here from ptypes.h).
* 03/02/99 stephen Added AS400 support.
* 03/30/99 stephen Added Linux support.
* 04/13/99 stephen Reworked for autoconf.
*******************************************************************************
*/
/* Define the platform we're on. */
#ifndef WIN32
#define WIN32
#endif
/* Define whether inttypes.h is available */
#define HAVE_INTTYPES_H 0
/* Determines whether specific types are available */
#define HAVE_INT8_T 0
#define HAVE_UINT8_T 0
#define HAVE_INT16_T 0
#define HAVE_UINT16_T 0
#define HAVE_INT32_T 0
#define HAVE_UINT32_T 0
#define HAVE_BOOL_T 0
/* Determines the endianness of the platform */
#define U_IS_BIG_ENDIAN 0
/*===========================================================================*/
/* Generic data types */
/*===========================================================================*/
/* If your platform does not have the <inttypes.h> header, you may
need to edit the typedefs below. */
#if HAVE_INTTYPES_H
#include <inttypes.h>
#else
#if ! HAVE_INT8_T
typedef signed char int8_t;
#endif
#if ! HAVE_UINT8_T
typedef unsigned char uint8_t;
#endif
#if ! HAVE_INT16_T
typedef signed short int16_t;
#endif
#if ! HAVE_UINT16_T
typedef unsigned short uint16_t;
#endif
#if ! HAVE_INT32_T
typedef signed long int32_t;
#endif
#if ! HAVE_UINT32_T
typedef unsigned long uint32_t;
#endif
#endif
#include <limits.h>
#define T_INT32_MAX (LONG_MAX)
/*===========================================================================*/
/* Character data types */
/*===========================================================================*/
#define U_SIZEOF_WCHAR_T 2
/*===========================================================================*/
/* Symbol import-export control */
/*===========================================================================*/
#define U_EXPORT __declspec(dllexport)
#define U_EXPORT2
#define U_IMPORT __declspec(dllimport)
#error Please include unicode/pwin32.h instead

View file

@ -18,7 +18,7 @@
*/
#include "uhash.h"
#include "unistr.h"
#include "unicode/unistr.h"
/**
* A class which represents an ordinary Hashtable which deletes its contents when it

View file

@ -18,9 +18,9 @@
#ifndef RBDATA_H
#define RBDATA_H 1
#include "utypes.h"
#include "unicode/utypes.h"
#include "uhash.h"
#include "unistr.h"
#include "unicode/unistr.h"
/**
* Abstract base class for data stored in resource bundles. These

View file

@ -19,10 +19,10 @@
#include "cmemory.h"
#include "cstring.h"
#include "filestrm.h"
#include "ustring.h"
#include "unicode/ustring.h"
#include "rbdata.h"
#include "unistr.h"
#include "unicode/unistr.h"
#include "rbdata.h"
@ -62,8 +62,8 @@ read_ustring(FileStream *rb,
while(remain != 0) {
/* Read the next chunk of data */
readLen = icu_min(BUF_SIZE, remain);
icu_memset(buf, 0, readLen*sizeof(UChar));
readLen = uprv_min(BUF_SIZE, remain);
uprv_memset(buf, 0, readLen*sizeof(UChar));
T_FileStream_read(rb, buf, sizeof(UChar) * readLen);
/* Append the chunk to the string */

View file

@ -18,10 +18,10 @@
#ifndef RBREAD_H
#define RBREAD_H 1
#include "utypes.h"
#include "unicode/utypes.h"
#include "filestrm.h"
#include "uhash.h"
#include "unistr.h"
#include "unicode/unistr.h"
/* Byte order mark for compiled resource bundle files */
static const int32_t sBOM = 0x021C;

View file

@ -1,123 +1 @@
/*
**********************************************************************
* Copyright (C) 1999, International Business Machines Corporation and
* others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 11/17/99 aliu Creation. Ported from java. Modified to
* match current UnicodeString API. Forced
* to use name "handleReplaceBetween" because
* of existing methods in UnicodeString.
**********************************************************************
*/
#ifndef REP_H
#define REP_H
#include "utypes.h"
class UnicodeString;
/**
* <code>Replaceable</code> is an abstract base class representing a
* string of characters that supports the replacement of a range of
* itself with a new string of characters. It is used by APIs that
* change a piece of text while retaining style attributes. In other
* words, an implicit aspect of the <code>Replaceable</code> API is
* that during a replace operation, new characters take on the
* attributes, if any, of the old characters. For example, if the
* string "the <b>bold</b> font" has range (4, 8) replaced with
* "strong", then it becomes "the <b>strong</b> font".
*
* <p><code>Replaceable</code> specifies ranges using an initial
* offset and a limit offset. The range of characters thus specified
* includes the characters at offset initial..limit-1. That is, the
* start offset is inclusive, and the limit offset is exclusive.
*
* <p><code>Replaceable</code> also includes API to access characters
* in the string: <code>length()</code>, <code>charAt()</code>, and
* <code>extractBetween()</code>.
*
* @author Alan Liu
*/
class U_COMMON_API Replaceable {
public:
/**
* Destructor.
*/
virtual ~Replaceable();
/**
* Return the number of characters in the text.
* @return number of characters in text
*/
virtual int32_t length() const = 0;
/**
* Return the character at the given offset into the text.
* @param offset an integer between 0 and <code>length()</code>-1
* inclusive
* @return character of text at given offset
*/
virtual UChar charAt(UTextOffset offset) const = 0;
/**
* Copy characters from this object into the destination character
* array. The first character to be copied is at index
* <code>srcStart</code>; the last character to be copied is at
* index <code>srcLimit-1</code> (thus the total number of
* characters to be copied is <code>srcLimit-srcStart</code>). The
* characters are copied into the subarray of <code>dst</code>
* starting at index <code>dstStart</code> and ending at index
* <code>dstStart + (srcLimit-srcStart) - 1</code>.
*
* @param srcStart the beginning index to copy, inclusive; <code>0
* <= srcStart <= srcLimit</code>.
* @param srcLimit the ending index to copy, exclusive;
* <code>srcStart <= srcLimit <= length()</code>.
* @param dst the destination array.
* @param dstStart the start offset in the destination array. */
virtual void extractBetween(UTextOffset srcStart,
UTextOffset srcLimit,
UChar* dst,
UTextOffset dstStart = 0) const = 0;
/**
* Replace a substring of this object with the given text. If the
* characters being replaced have attributes, the new characters
* that replace them should be given the same attributes.
*
* @param start the beginning index, inclusive; <code>0 <= start
* <= limit</code>.
* @param limit the ending index, exclusive; <code>start <= limit
* <= length()</code>.
* @param text the text to replace characters <code>start</code>
* to <code>limit - 1</code> */
virtual void handleReplaceBetween(UTextOffset start,
UTextOffset limit,
const UnicodeString& text) = 0;
// Note: All other methods in this class take the names of
// existing UnicodeString methods. This method is the exception.
// It is named differently because all replace methods of
// UnicodeString return a UnicodeString&. The 'between' is
// required in order to conform to the UnicodeString naming
// convention; API taking start/length are named <operation>, and
// those taking start/limit are named <operationBetween>. The
// 'handle' is added because 'replaceBetween' and
// 'doReplaceBetween' are already taken.
protected:
/**
* Default constructor.
*/
Replaceable();
};
inline Replaceable::Replaceable() {}
inline Replaceable::~Replaceable() {}
#endif
#error Please include unicode/rep.h instead

View file

@ -47,7 +47,7 @@
#include "rbcache.h"
#include "resbund.h"
#include "unicode/resbund.h"
#include "mutex.h"
#include "unistrm.h"
@ -342,9 +342,9 @@ ResourceBundle::ResourceBundle(const wchar_t* path,
: fgCache(fgUserCache),
fgVisitedFiles(fgUserVisitedFiles)
{
int32_t wideNameLen = icu_mbstowcs(NULL, kDefaultSuffix, kDefaultSuffixLen);
int32_t wideNameLen = uprv_mbstowcs(NULL, kDefaultSuffix, kDefaultSuffixLen);
wchar_t* wideName = new wchar_t[wideNameLen + 1];
icu_mbstowcs(wideName, kDefaultSuffix, kDefaultSuffixLen);
uprv_mbstowcs(wideName, kDefaultSuffix, kDefaultSuffixLen);
wideName[wideNameLen] = 0;
constructForLocale(PathInfo(path, wideName), locale, err);
delete [] wideName;
@ -914,25 +914,25 @@ ResourceBundle::getVersionNumber() const
// the length of the major part + the length of the separator
// (==1) + the length of the minor part (+ 1 for the zero byte at
// the end).
int32_t len = icu_strlen(ICU_VERSION);
int32_t len = uprv_strlen(ICU_VERSION);
int32_t minor_len = 0;
if(U_SUCCESS(status) && minor_version.length() > 0)
minor_len = minor_version.length();
len += (minor_len > 0) ? minor_len : 1 /*==icu_strlen(kDefaultMinorVersion)*/;
len += (minor_len > 0) ? minor_len : 1 /*==uprv_strlen(kDefaultMinorVersion)*/;
++len; // Add length of separator
// Allocate the string, and build it up.
// + 1 for zero byte
((ResourceBundle*)this)->fVersionID = new char[1 + len];
icu_strcpy(fVersionID, ICU_VERSION);
icu_strcat(fVersionID, kVersionSeparator);
uprv_strcpy(fVersionID, ICU_VERSION);
uprv_strcat(fVersionID, kVersionSeparator);
if(minor_len > 0) {
minor_version.extract(0, minor_len, fVersionID + len - minor_len);
fVersionID[len] = 0;
}
else {
icu_strcat(fVersionID, kDefaultMinorVersion);
uprv_strcat(fVersionID, kDefaultMinorVersion);
}
}
return fVersionID;
@ -1148,10 +1148,10 @@ ResourceBundle::PathInfo::PathInfo(const PathInfo& source)
fWPrefix(NULL), fWSuffix(NULL)
{
if(source.fWPrefix) {
fWPrefix = new wchar_t[icu_wcslen(source.fWPrefix)+1];
fWSuffix = new wchar_t[icu_wcslen(source.fWSuffix)+1];
icu_wcscpy(fWPrefix, source.fWPrefix);
icu_wcscpy(fWSuffix, source.fWSuffix);
fWPrefix = new wchar_t[uprv_wcslen(source.fWPrefix)+1];
fWSuffix = new wchar_t[uprv_wcslen(source.fWSuffix)+1];
uprv_wcscpy(fWPrefix, source.fWPrefix);
uprv_wcscpy(fWSuffix, source.fWSuffix);
}
}
@ -1176,10 +1176,10 @@ ResourceBundle::PathInfo::PathInfo(const wchar_t* path,
fWPrefix(NULL),
fWSuffix(NULL)
{
fWPrefix = new wchar_t[icu_wcslen(path)+1];
fWSuffix = new wchar_t[icu_wcslen(suffix)+1];
icu_wcscpy(fWPrefix, path);
icu_wcscpy(fWSuffix, suffix);
fWPrefix = new wchar_t[uprv_wcslen(path)+1];
fWSuffix = new wchar_t[uprv_wcslen(suffix)+1];
uprv_wcscpy(fWPrefix, path);
uprv_wcscpy(fWSuffix, suffix);
}
ResourceBundle::PathInfo::~PathInfo()
@ -1195,10 +1195,10 @@ ResourceBundle::PathInfo::operator=(const PathInfo& source)
wchar_t* tempPref = NULL;
wchar_t* tempSuff = NULL;
if(source.fWPrefix) {
tempPref = new wchar_t[icu_wcslen(source.fWPrefix)+1];
tempSuff = new wchar_t[icu_wcslen(source.fWSuffix)+1];
icu_wcscpy(tempPref, source.fWPrefix);
icu_wcscpy(tempSuff, source.fWSuffix);
tempPref = new wchar_t[uprv_wcslen(source.fWPrefix)+1];
tempSuff = new wchar_t[uprv_wcslen(source.fWSuffix)+1];
uprv_wcscpy(tempPref, source.fWPrefix);
uprv_wcscpy(tempSuff, source.fWSuffix);
}
delete fWPrefix;
fWPrefix = tempPref;
@ -1229,18 +1229,18 @@ ResourceBundle::PathInfo::makeCacheKey(const UnicodeString& name) const
if(fWPrefix) {
UnicodeString key;
size_t prefSize = icu_wcstombs(NULL, fWPrefix, ((size_t)-1) >> 1);
size_t suffSize = icu_wcstombs(NULL, fWSuffix, ((size_t)-1) >> 1);
size_t tempSize = icu_max((int32_t)prefSize, (int32_t)suffSize);
size_t prefSize = uprv_wcstombs(NULL, fWPrefix, ((size_t)-1) >> 1);
size_t suffSize = uprv_wcstombs(NULL, fWSuffix, ((size_t)-1) >> 1);
size_t tempSize = uprv_max((int32_t)prefSize, (int32_t)suffSize);
char *temp = new char[tempSize + 1];
tempSize = icu_wcstombs(temp, fWPrefix, prefSize);
tempSize = uprv_wcstombs(temp, fWPrefix, prefSize);
temp[tempSize] = 0;
key += UnicodeString(temp);
key += name;
tempSize = icu_wcstombs(temp, fWSuffix, suffSize);
tempSize = uprv_wcstombs(temp, fWSuffix, suffSize);
temp[tempSize] = 0;
key += UnicodeString(temp);
@ -1265,18 +1265,18 @@ ResourceBundle::PathInfo::makeHashkey(const UnicodeString& localeName) const
key += kSeparator;
size_t prefSize = icu_wcstombs(NULL, fWPrefix, ((size_t)-1) >> 1);
size_t suffSize = icu_wcstombs(NULL, fWSuffix, ((size_t)-1) >> 1);
size_t tempSize = icu_max((int32_t)prefSize, (int32_t)suffSize);
size_t prefSize = uprv_wcstombs(NULL, fWPrefix, ((size_t)-1) >> 1);
size_t suffSize = uprv_wcstombs(NULL, fWSuffix, ((size_t)-1) >> 1);
size_t tempSize = uprv_max((int32_t)prefSize, (int32_t)suffSize);
char *temp = new char[tempSize + 1];
tempSize = icu_wcstombs(temp, fWSuffix, suffSize);
tempSize = uprv_wcstombs(temp, fWSuffix, suffSize);
temp[tempSize] = 0;
key += UnicodeString(temp);
key += kSeparator;
tempSize = icu_wcstombs(temp, fWPrefix, prefSize);
tempSize = uprv_wcstombs(temp, fWPrefix, prefSize);
temp[tempSize] = 0;
key += UnicodeString(temp);
@ -1303,29 +1303,29 @@ ResourceBundle::PathInfo::openFile(const UnicodeString& localeName) const
char* temp = new char[nameSize + 1];
localeName.extract(0, nameSize, temp);
temp[nameSize] = 0;
int32_t wideNameLen = icu_mbstowcs(NULL, temp, nameSize);
int32_t wideNameLen = uprv_mbstowcs(NULL, temp, nameSize);
wchar_t* wideName = new wchar_t[wideNameLen + 1];
icu_mbstowcs(wideName, temp, nameSize);
uprv_mbstowcs(wideName, temp, nameSize);
wideName[wideNameLen] = 0;
delete [] temp;
size_t prefLen = icu_wcslen(fWPrefix);
size_t suffLen = icu_wcslen(fWSuffix);
size_t prefLen = uprv_wcslen(fWPrefix);
size_t suffLen = uprv_wcslen(fWSuffix);
int32_t destSize = prefLen + suffLen + wideNameLen;
wchar_t* dest = new wchar_t[destSize + 1];
icu_wcscpy(dest, fWPrefix);
uprv_wcscpy(dest, fWPrefix);
dest[prefLen] = 0;
icu_wcscat(dest, wideName);
uprv_wcscat(dest, wideName);
dest[prefLen + wideNameLen] = 0;
icu_wcscat(dest, fWSuffix);
uprv_wcscat(dest, fWSuffix);
dest[destSize] = 0;
int32_t fmodeLen = icu_mbstowcs(NULL, "rb", 2);
int32_t fmodeLen = uprv_mbstowcs(NULL, "rb", 2);
wchar_t* fmode = new wchar_t[fmodeLen + 1];
icu_mbstowcs(fmode, "rb", 2);
uprv_mbstowcs(fmode, "rb", 2);
fmode[fmodeLen] = 0;
FileStream* result = T_FileStream_wopen(dest, fmode);

View file

@ -1,633 +1 @@
/*
*******************************************************************************
*
* Copyright (C) 1996-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* File resbund.h
*
* CREATED BY
* Richard Gillam
*
* Modification History:
*
* Date Name Description
* 2/5/97 aliu Added scanForLocaleInFile. Added
* constructor which attempts to read resource bundle
* from a specific file, without searching other files.
* 2/11/97 aliu Added UErrorCode return values to constructors. Fixed
* infinite loops in scanForFile and scanForLocale.
* Modified getRawResourceData to not delete storage in
* localeData and resourceData which it doesn't own.
* Added Mac compatibility #ifdefs for tellp() and
* ios::nocreate.
* 2/18/97 helena Updated with 100% documentation coverage.
* 3/13/97 aliu Rewrote to load in entire resource bundle and store
* it as a Hashtable of ResourceBundleData objects.
* Added state table to govern parsing of files.
* Modified to load locale index out of new file distinct
* from default.txt.
* 3/25/97 aliu Modified to support 2-d arrays, needed for timezone data.
* Added support for custom file suffixes. Again, needed to
* support timezone data.
* 4/7/97 aliu Cleaned up.
* 03/02/99 stephen Removed dependency on FILE*.
* 03/29/99 helena Merged Bertrand and Stephen's changes.
* 06/11/99 stephen Removed parsing of .txt files.
* Reworked to use new binary format.
* Cleaned up.
* 06/14/99 stephen Removed methods taking a filename suffix.
* 11/09/99 weiv Added getLocale(), fRealLocale, removed fRealLocaleID
*******************************************************************************
*/
#ifndef RESBUND_H
#define RESBUND_H
#include "utypes.h"
#include "unistr.h"
#include "locid.h"
#include <wchar.h>
class RBHashtable;
class ResourceBundleData;
class ResourceBundleCache;
class VisitedFileCache;
#ifndef _FILESTRM
typedef struct _FileStream FileStream;
#endif
/**
* A class representing a collection of resource information pertaining to a given
* locale. A resource bundle provides a way of accessing locale- specfic information in
* a data file. You create a resource bundle that manages the resources for a given
* locale and then ask it for individual resources.
* <P>
* The resource bundle file is a text (ASCII or Unicode) file with the format:
* <pre>
* . locale {
* . tag1 {...}
* . tag2 {...}
* . }
* </pre>
* The tags are used to retrieve the data later. You may not have multiple instances of
* the same tag.
* <P>
* Four data types are supported. These are solitary strings, comma-delimited lists of
* strings, 2-dimensional arrays of strings, and tagged lists of strings.
* <P>
* Note that all data is textual. Adjacent strings are merged by the low-level
* tokenizer, so that the following effects occur: foo bar, baz // 2 elements, "foo
* bar", and "baz" "foo" "bar", baz // 2 elements, "foobar", and "baz" Note that a
* single intervening space is added between merged strings, unless they are both double
* quoted. This extends to more than two strings in a row.
* <P>
* Whitespace is ignored, as in a C source file.
* <P>
* Solitary strings have the format:
* <pre>
* . Tag { Data }
* </pre>
* This is indistinguishable from a comma-delimited list with only one element, and in
* fact may be retrieved as such (as an array, or as element 0 or an array).
* <P>
* Comma-delimited lists have the format:
* <pre>
* . Tag { Data, Data, Data }
* </pre>
* Parsing is lenient; a final string, after the last element, is allowed.
* <P>
* Tagged lists have the format:
* <pre>
* . Tag { Subtag { Data } Subtag {Data} }
* </pre>
* Data is retrieved by specifying the subtag.
* <P>
* Two-dimensional arrays have the format:
* <pre>
* . TwoD {
* . { r1c1, r1c2, ..., r1cm },
* . { r2c1, r2c2, ..., r2cm },
* . ...
* . { rnc1, rnc2, ..., rncm }
* . }
* </pre>
* where n is the number of rows, and m is the number of columns. Parsing is lenient (as
* in other data types). A final comma is always allowed after the last element; either
* the last string in a row, or the last row itself. Furthermore, since there is no
* ambiguity, the commas between the rows are entirely optional. (However, if a comma is
* present, there can only be one comma, no more.) It is possible to have zero columns,
* as follows:
* <pre>
* . Odd { {} {} {} } // 3 x 0 array
* </pre>
* But it is impossible to have zero rows. The smallest array is thus a 1 x 0 array,
* which looks like this:
* <pre>
* . Smallest { {} } // 1 x 0 array
* </pre>
* The array must be strictly rectangular; that is, each row must have the same number
* of elements.
* <P>
* This is an example for using a possible custom resource:
* <pre>
* . Locale currentLocale;
* . UErrorCode success = U_ZERO_ERROR;
* . ResourceBundle myResources("MyResources", currentLocale, success );
* .
* . UnicodeString button1Title, button2Title;
* . myResources.getString("OkKey", button1Title, success );
* . myResources.getString("CancelKey", button2Title, success );
* </pre>
*/
class U_COMMON_API ResourceBundle {
public:
/**
* Constructor
*
* @param path This is a full pathname in the platform-specific format for the
* directory containing the resource data files we want to load
* resources from. We use locale IDs to generate filenames, and the
* filenames have this string prepended to them before being passed
* to the C++ I/O functions. Therefore, this string must always end
* with a directory delimiter (whatever that is for the target OS)
* for this class to work correctly.
* @param locale This is the locale this resource bundle is for. To get resources
* for the French locale, for example, you would create a
* ResourceBundle passing Locale::FRENCH for the "locale" parameter,
* and all subsequent calls to that resource bundle will return
* resources that pertain to the French locale. If the caller doesn't
* pass a locale parameter, the default locale for the system (as
* returned by Locale::getDefault()) will be used.
* The UErrorCode& err parameter is used to return status information to the user. To
* check whether the construction succeeded or not, you should check the value of
* U_SUCCESS(err). If you wish more detailed information, you can check for
* informational error results which still indicate success. U_USING_FALLBACK_ERROR
* indicates that a fall back locale was used. For example, 'de_CH' was requested,
* but nothing was found there, so 'de' was used. U_USING_DEFAULT_ERROR indicates that
* the default locale data was used; neither the requested locale nor any of its
* fall back locales could be found.
*/
ResourceBundle( const UnicodeString& path,
const Locale& locale,
UErrorCode& err);
ResourceBundle( const UnicodeString& path,
UErrorCode& err);
ResourceBundle( const wchar_t* path,
const Locale& locale,
UErrorCode& err);
~ResourceBundle();
/**
* Returns the contents of a string resource. Resource data is undifferentiated
* Unicode text. The resource file may contain quoted strings or escape sequences;
* these will be parsed prior to the data's return.
* [THIS FUNCTION IS DERECATED; USE THE OVERLOAD BELOW INSTEAD]
*
* @param resourceTag The resource tag of the string resource the caller wants
* @param theString Receives the actual data in the resource
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified tag couldn't be found.
*/
void getString( const char *resourceTag,
UnicodeString& theString,
UErrorCode& err) const;
/**
* Returns the contents of a string resource. Resource data is undifferentiated
* Unicode text. The resource file may contain quoted strings or escape sequences;
* these will be parsed prior to the data's return.
*
* @param resourceTag The resource tag of the string resource the caller wants
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified tag couldn't be found.
* @return A pointer to the string from the resource bundle, or NULL if there was
* an error.
*/
const UnicodeString* getString( const char *resourceTag,
UErrorCode& err) const;
/**
* Returns the contents of a string-array resource. This will return the contents of
* a string-array (comma-delimited-list) resource as a C++ array of UnicodeString
* objects. The number of elements in the array is returned in numArrayItems.
* Calling getStringArray on a resource of type string will return an array with one
* element; calling it on a resource of type tagged-array results in a
* U_MISSING_RESOURCE_ERROR error.
*
* @param resourceTag The resource tag of the string-array resource the caller
* wants
* @param numArrayItems Receives the number of items in the array the function
* returns.
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified tag couldn't be found.
* @return The resource requested, as a pointer to an array of
* UnicodeStrings. The caller does not own the storage and
* must not delete it.
*/
const UnicodeString* getStringArray( const char *resourceTag,
int32_t& numArrayItems,
UErrorCode& err) const;
/**
* Returns a single item from a string-array resource. This will return the contents
* of a single item in a resource of string-array (comma-delimited-list) type. If
* the resource is not an array, a U_MISSING_RESOURCE_ERROR will be returned in err.
* [THIS FUNCTION IS DEPRECATED; USE THE OVERLOAD BELOW INSTEAD]
*
* @param resourceTag The resource tag of the resource the caller wants to extract
* an item from.
* @param index The index (zero-based) of the particular array item the user
* wants to extract from the resource.
* @param theArrayItem Receives the actual text of the desired array item.
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified tag couldn't be found, or if the index was out of range.
*/
void getArrayItem( const char *resourceTag,
int32_t index,
UnicodeString& theArrayItem,
UErrorCode& err) const;
/**
* Returns a single item from a string-array resource. This will return the contents
* of a single item in a resource of string-array (comma-delimited-list) type. If
* the resource is not an array, a U_MISSING_RESOURCE_ERROR will be returned in err.
*
* @param resourceTag The resource tag of the resource the caller wants to extract
* an item from.
* @param index The index (zero-based) of the particular array item the user
* wants to extract from the resource.
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified tag couldn't be found, or if the index was out of range.
* @return A pointer to the text of the array item, or NULL is there was an error.
*/
const UnicodeString* getArrayItem( const char *resourceTag,
int32_t index,
UErrorCode& err) const;
/**
* Return the contents of a 2-dimensional array resource. The return value will be a
* UnicodeString** array. (This is really an array of pointers; each pointer is a
* ROW of the data.) The number of rows and columns is returned. If the resource is
* of the wrong type, or not present, U_MISSING_RESOURCE_ERROR is placed in err.
*
* @param resourceTag The resource tag of the string-array resource the caller
* wants
* @param rowCount Receives the number of rows in the array the function
* returns.
* @param columnCount Receives the number of columns in the array the function
* returns.
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified tag couldn't be found.
* @return The resource requested, as a UnicodeStrings**. The caller
* does not own the storage and must not delete it.
*/
const UnicodeString** get2dArray(const char *resourceTag,
int32_t& rowCount,
int32_t& columnCount,
UErrorCode& err) const;
/**
* Return a single string from a 2-dimensional array resource. If the resource does
* not exists, or if it is not a 2-d array, or if the row or column indices are out
* of bounds, err is set to U_MISSING_RESOURCE_ERROR.
* [THIS FUNCTION IS DEPRECATED; USE THE OVERLOAD BELOW INSTEAD]
*
* @param resourceTag The resource tag of the resource the caller wants to extract
* an item from.
* @param rowIndex The row index (zero-based) of the array item the user wants
* to extract from the resource.
* @param columnIndex The column index (zero-based) of the array item the user
* wants to extract from the resource.
* @param theArrayItem Receives the actual text of the desired array item.
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified tag couldn't be found, if the resource data was in
* the wrong format, or if either index is out of bounds.
*/
void get2dArrayItem(const char *resourceTag,
int32_t rowIndex,
int32_t columnIndex,
UnicodeString& theArrayItem,
UErrorCode& err) const;
/**
* Return a single string from a 2-dimensional array resource. If the resource does
* not exists, or if it is not a 2-d array, or if the row or column indices are out
* of bounds, err is set to U_MISSING_RESOURCE_ERROR.
*
* @param resourceTag The resource tag of the resource the caller wants to extract
* an item from.
* @param rowIndex The row index (zero-based) of the array item the user wants
* to extract from the resource.
* @param columnIndex The column index (zero-based) of the array item the user
* wants to extract from the resource.
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified tag couldn't be found, if the resource data was in
* the wrong format, or if either index is out of bounds.
* @return A pointer to the text of the array item, or NULL is there was an error.
*/
const UnicodeString* get2dArrayItem( const char *resourceTag,
int32_t rowIndex,
int32_t columnIndex,
UErrorCode& err) const;
/**
* Returns a single item from a tagged-array resource This will return the contents
* of a single item in a resource of type tagged-array. If this function is called
* for a resource that is not of type tagged-array, it will set err to
* MISSING_RESOUCE_ERROR.
* [THIS FUNCTION IS DEPRECATED; USE THE OVERLOAD BELOW INSTEAD]
*
* @param resourceTag The resource tag of the resource the caller wants to extract
* an item from.
* @param itemTag The item tag for the item the caller wants to extract.
* @param theArrayItem Receives the text of the desired array item.
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified resource tag couldn't be found, or if an item
* with the specified item tag couldn't be found in the resource.
*/
void getTaggedArrayItem( const char *resourceTag,
const UnicodeString& itemTag,
UnicodeString& theArrayItem,
UErrorCode& err) const;
/**
* Returns a single item from a tagged-array resource This will return the contents
* of a single item in a resource of type tagged-array. If this function is called
* for a resource that is not of type tagged-array, it will set err to
* MISSING_RESOUCE_ERROR.
*
* @param resourceTag The resource tag of the resource the caller wants to extract
* an item from.
* @param itemTag The item tag for the item the caller wants to extract.
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified resource tag couldn't be found, or if an item
* with the specified item tag coldn't be found in the resource.
* @return A pointer to the text of the array item, or NULL is there was an error.
*/
const UnicodeString* getTaggedArrayItem( const char *resourceTag,
const UnicodeString& itemTag,
UErrorCode& err) const;
/**
* Returns a tagged-array resource. The contents of the resource is returned as two
* separate arrays of UnicodeStrings, the addresses of which are placed in "itemTags"
* and "items". After calling this function, the items in the resource will be in the
* list pointed to by "items", and for each items[i], itemTags[i] will be the tag that
* corresponds to it. The total number of entries in both arrays is returned in
* numItems.
*
* @param resourceTag The resource tag of the resource the caller wants to extract
* an item from.
* @param itemTags Set to point to an array of UnicodeStrings representing the
* tags in the specified resource. The caller DOES own this array,
* and must delete it.
* @param items Set to point to an array of UnicodeStrings containing the
* individual resource items themselves. itemTags[i] will
* contain the tag corresponding to items[i]. The caller DOES
* own this array, and must delete it.
* @param numItems Receives the number of items in the arrays pointed to by
* items and itemTags.
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified tag couldn't be found.
*/
void getTaggedArray( const char *resourceTag,
UnicodeString*& itemTags,
UnicodeString*& items,
int32_t& numItems,
UErrorCode& err) const;
/**
* Return the version number associated with this ResourceBundle. This version
* number is a string of the form MAJOR.MINOR, where MAJOR is the version number of
* the current analytic code package, and MINOR is the version number contained in
* the resource file as the value of the tag "Version". A change in the MINOR
* version indicated an updated data file. A change in the MAJOR version indicates a
* new version of the code which is not binary-compatible with the previous version.
* If no "Version" tag is present in a resource file, the MINOR version "0" is assigned.
*
* For example, if the Collation sort key algorithm changes, the MAJOR version
* increments. If the collation data in a resource file changes, the MINOR version
* for that file increments.
*
* @return A string of the form N.n, where N is the major version number,
* representing the code version, and n is the minor version number,
* representing the resource data file. The caller does not own this
* string.
*/
const char* getVersionNumber(void) const;
/**
* Return the Locale associated with this ResourceBundle.
*
* @return a Locale object
*/
const Locale &getLocale(void) const ;
private:
class U_COMMON_API PathInfo {
public:
PathInfo();
PathInfo(const PathInfo& source);
PathInfo(const UnicodeString& path);
PathInfo(const UnicodeString& path, const UnicodeString& suffix);
PathInfo(const wchar_t* path, const wchar_t* suffix);
~PathInfo();
PathInfo& operator=(const PathInfo& source);
bool_t fileExists(const UnicodeString& localeName) const;
UnicodeString makeCacheKey(const UnicodeString& localeName) const;
UnicodeString makeHashkey(const UnicodeString& localeName) const;
FileStream* openFile(const UnicodeString& localeName) const;
private:
static const UChar kSeparator;
UnicodeString fPrefix;
UnicodeString fSuffix;
wchar_t* fWPrefix;
wchar_t* fWSuffix;
};
private:
friend class Locale;
friend class RuleBasedCollator;
friend int32_t T_ResourceBundle_countArrayItemsImplementation(const ResourceBundle* resourceBundle,
const char* resourceKey,
UErrorCode& err) ;
friend const UnicodeString** listInstalledLocalesImplementation(const char* path,
int32_t* numInstalledLocales);
friend void getTaggedArrayUCharsImplementation(
const ResourceBundle* bundle,
const char *resourceTag,
UChar const** itemTags,
UChar const** items,
int32_t maxItems,
int32_t& numItems,
UErrorCode& err);
/**
* This constructor is used by Collation to load a resource bundle from a specific
* file, without trying other files. This is used by the Collation caching
* mechanism.
*/
ResourceBundle( const UnicodeString& path,
const UnicodeString& localeName,
UErrorCode& status);
/**
* Return a list of all installed locales. This function returns a list of the IDs
* of all locales represented in the directory specified by this ResourceBundle. It
* depends on that directory having an "Index" tagged-list item in its "index.txt"
* file; it parses that list to determine its return value (therefore, that list
* also has to be up to date). This function is static.
*
* This function is the implementation of the Locale::listInstalledLocales()
* function. It's private because the API for it real;ly belongs in Locale.
*
* @param path The path to the locale data files. The function will
* look here for "index.txt".
* @param numInstalledLocales Receives the number of installed locales, according
* to the Index resource in index.txt.
* @return A list of the installed locales, as a pointer to an
* array of UnicodeStrings. This storage is not owned by
* the caller, who must not delete it. The information
* in this list is derived from the Index resource in
* default.txt, which must be kept up to date.
*/
static const UnicodeString* listInstalledLocales(const UnicodeString& path,
int32_t& numInstalledLocales);
/**
* Retrieve a ResourceBundle from the cache. Return NULL if not found.
*/
static const UHashtable* getFromCache(const PathInfo& path,
const UnicodeString& localeName,
ResourceBundleCache* someCache);
static const UHashtable* getFromCacheWithFallback(const PathInfo& path,
const UnicodeString& desiredLocale,
UnicodeString& returnedLocale,
ResourceBundleCache* someCache,
UErrorCode& error);
/**
* Handlers which are passed to parse() have this signature.
*/
typedef void (*Handler)(const UnicodeString& localeName,
UHashtable* hashtable,
void* context,
ResourceBundleCache* someCache);
/**
* Parse a file, storing the resource data in the cache.
*/
static void parse(const PathInfo& path,
const UnicodeString& localeName,
Handler handler,
void* context,
ResourceBundleCache* someCache,
UErrorCode &error);
/**
* If the given file exists and has not been parsed, then parse it (caching the
* resultant data) and return true.
*/
static bool_t parseIfUnparsed(const PathInfo& path,
const UnicodeString& locale,
ResourceBundleCache* fCache,
VisitedFileCache* vCache,
UErrorCode& error);
const UHashtable* getHashtableForLocale(const UnicodeString& localeName,
UnicodeString& returnedLocale,
UErrorCode& err);
const UHashtable* getHashtableForLocale(const UnicodeString& desiredLocale,
UErrorCode& error);
const ResourceBundleData* getDataForTag(const char *tag,
UErrorCode& err) const;
void constructForLocale(const PathInfo& path,
const Locale& locale,
UErrorCode& error);
static void addToCache(const UnicodeString& localeName,
UHashtable* hashtable,
void* context,
ResourceBundleCache* someCache);
static void saveCollationHashtable(const UnicodeString& localeName,
UHashtable* hashtable,
void* context,
ResourceBundleCache* cache);
private:
/**
* This internal class iterates over the fallback and/or default locales. It
* progresses as follows: Specific: language+country+variant language+country
* language Default: language+country+variant language+country language Root:
*/
class LocaleFallbackIterator
{
public:
LocaleFallbackIterator(const UnicodeString& startingLocale,
const UnicodeString& root,
bool_t useDefaultLocale);
const UnicodeString& getLocale(void) const { return fLocale; }
bool_t nextLocale(UErrorCode& status);
private:
void chopLocale(void);
UnicodeString fLocale;
UnicodeString fDefaultLocale;
UnicodeString fRoot;
bool_t fUseDefaultLocale;
bool_t fTriedDefaultLocale;
bool_t fTriedRoot;
};
private:
static const char* kDefaultSuffix;
static const int32_t kDefaultSuffixLen;
static const char* kDefaultFilename;
static const char* kDefaultLocaleName;
static const char* kIndexLocaleName;
static const char* kIndexFilename;
static const char* kIndexTag;
static const char* kDefaultMinorVersion;
static const char* kVersionSeparator;
static const char* kVersionTag;
static ResourceBundleCache* fgUserCache;
static VisitedFileCache* fgUserVisitedFiles;
ResourceBundleCache* fgCache;
VisitedFileCache* fgVisitedFiles;
/**
* Data members. The ResourceBundle object is kept lightweight by having the fData[]
* array entries be non-owned pointers. The cache (fgCache) owns the entries and
* will delete them at static destruction time.
*/
PathInfo fPath;
enum { kDataCount = 4 };
const UHashtable* fData[kDataCount]; // These aren't const if fIsDataOwned is true
bool_t fLoaded[kDataCount];
UErrorCode fDataStatus[kDataCount]; // Returns the appropriate error code for each data table.
bool_t fIsDataOwned;
Locale fRealLocale;
LocaleFallbackIterator* fLocaleIterator;
char* fVersionID;
};
#endif
#error Please include unicode/rebund.h instead

View file

@ -13,8 +13,8 @@
*******************************************************************************
*/
#include "chariter.h"
#include "schriter.h"
#include "unicode/chariter.h"
#include "unicode/schriter.h"
UClassID StringCharacterIterator::fgClassID = 0;

View file

@ -1,174 +1 @@
/*
*******************************************************************************
*
* Copyright (C) 1998-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* File schriter.h
*
* Modification History:
*
* Date Name Description
* 05/05/99 stephen Cleaned up.
*******************************************************************************
*/
#ifndef SCHRITER_H
#define SCHRITER_H
#include "utypes.h"
#include "chariter.h"
/**
* A concrete subclass of CharacterIterator that iterates over the
* characters in a UnicodeString. It's possible not only to create an
* iterator that iterates over an entire UnicodeString, but also to
* create only that iterates over only a subrange of a UnicodeString
* (iterators over different subranges of the same UnicodeString don't
* compare equal). */
class U_COMMON_API StringCharacterIterator : public CharacterIterator {
public:
/**
* Create an iterator over the UnicodeString referred to by "text".
* The iteration range is the whole string, and the starting position is 0.
*/
StringCharacterIterator(const UnicodeString& text);
/**
* Create an iterator over the UnicodeString referred to by "text".
* The iteration range is the whole string, and the starting
* position is specified by "pos". If "pos" is outside the valid
* iteration range, the behavior of this object is undefined. */
StringCharacterIterator(const UnicodeString& text,
UTextOffset pos);
/**
* Create an iterator over the UnicodeString referred to by "text".
* The iteration range begins with the character specified by
* "begin" and ends with the character BEFORE the character specfied
* by "end". The starting position is specified by "pos". If
* "begin" and "end" don't form a valid range on "text" (i.e., begin
* >= end or either is negative or greater than text.size()), or
* "pos" is outside the range defined by "begin" and "end", the
* behavior of this iterator is undefined. */
StringCharacterIterator(const UnicodeString& text,
UTextOffset begin,
UTextOffset end,
UTextOffset pos);
/**
* Copy constructor. The new iterator iterates over the same range
* of the same string as "that", and its initial position is the
* same as "that"'s current position. */
StringCharacterIterator(const StringCharacterIterator& that);
/**
* Destructor. */
virtual ~StringCharacterIterator();
/**
* Assignment operator. *this is altered to iterate over the sane
* range of the same string as "that", and refers to the same
* character within that string as "that" does. */
StringCharacterIterator&
operator=(const StringCharacterIterator& that);
/**
* Returns true if the iterators iterate over the same range of the
* same string and are pointing at the same character. */
virtual bool_t operator==(const CharacterIterator& that) const;
/**
* Generates a hash code for this iterator. */
virtual int32_t hashCode(void) const;
/**
* Returns a new StringCharacterIterator referring to the same
* character in the same range of the same string as this one. The
* caller must delete the new iterator. */
virtual CharacterIterator* clone(void) const;
/**
* Sets the iterator to refer to the first character in its
* iteration range, and returns that character, */
virtual UChar first(void);
/**
* Sets the iterator to refer to the last character in its iteration
* range, and returns that character. */
virtual UChar last(void);
/**
* Sets the iterator to refer to the "position"-th character in the
* UnicodeString the iterator refers to, and returns that character.
* If the index is outside the iterator's iteration range, the
* behavior of the iterator is undefined. */
virtual UChar setIndex(UTextOffset pos);
/**
* Returns the character the iterator currently refers to. */
virtual UChar current(void) const;
/**
* Advances to the next character in the iteration range (toward
* last()), and returns that character. If there are no more
* characters to return, returns DONE. */
virtual UChar next(void);
/**
* Advances to the previous character in the iteration rance (toward
* first()), and returns that character. If there are no more
* characters to return, returns DONE. */
virtual UChar previous(void);
/**
* Returns the numeric index of the first character in this
* iterator's iteration range. */
virtual UTextOffset startIndex(void) const;
/**
* Returns the numeric index of the character immediately BEYOND the
* last character in this iterator's iteration range. */
virtual UTextOffset endIndex(void) const;
/**
* Returns the numeric index in the underlying UnicodeString of the
* character the iterator currently refers to (i.e., the character
* returned by current()). */
virtual UTextOffset getIndex(void) const;
/**
* Copies the UnicodeString under iteration into the UnicodeString
* referred to by "result". Even if this iterator iterates across
* only a part of this string, the whole string is copied. @param
* result Receives a copy of the text under iteration. */
virtual void getText(UnicodeString& result);
/**
* Return a class ID for this object (not really public) */
virtual UClassID getDynamicClassID(void) const
{ return getStaticClassID(); }
/**
* Return a class ID for this class (not really public) */
static UClassID getStaticClassID(void)
{ return (UClassID)(&fgClassID); }
private:
StringCharacterIterator();
UnicodeString text;
UTextOffset pos;
UTextOffset begin;
UTextOffset end;
static UClassID fgClassID;
};
#endif
#error Please include unicode/schriter.h instead

View file

@ -21,7 +21,7 @@
#include <limits.h>
#include "scsu.h"
#include "unicode/scsu.h"
#include "cmemory.h"
@ -889,7 +889,7 @@ scsu_decompress(UnicodeCompressor *comp,
if(sourceLimit - byteBuffer < newBytes)
newBytes = sourceLimit - byteBuffer;
icu_memcpy(comp->fBuffer + comp->fBufferLength, byteBuffer, newBytes);
uprv_memcpy(comp->fBuffer + comp->fBufferLength, byteBuffer, newBytes);
}
/* reset buffer length to 0 before recursive call */
@ -967,7 +967,7 @@ scsu_decompress(UnicodeCompressor *comp,
if not, save state and break out */
if((unicharBuffer + 1) >= targetLimit) {
--byteBuffer;
icu_memcpy(comp->fBuffer, byteBuffer,
uprv_memcpy(comp->fBuffer, byteBuffer,
sourceLimit - byteBuffer);
comp->fBufferLength = sourceLimit - byteBuffer;
byteBuffer += comp->fBufferLength;
@ -1019,7 +1019,7 @@ scsu_decompress(UnicodeCompressor *comp,
rewind the source stream and break out */
if( (byteBuffer + 1) >= sourceLimit ) {
--byteBuffer;
icu_memcpy(comp->fBuffer, byteBuffer,
uprv_memcpy(comp->fBuffer, byteBuffer,
sourceLimit - byteBuffer);
comp->fBufferLength = sourceLimit - byteBuffer;
byteBuffer += comp->fBufferLength;
@ -1046,7 +1046,7 @@ scsu_decompress(UnicodeCompressor *comp,
not, rewind the source stream and break out*/
if( byteBuffer >= sourceLimit ) {
--byteBuffer;
icu_memcpy(comp->fBuffer, byteBuffer,
uprv_memcpy(comp->fBuffer, byteBuffer,
sourceLimit - byteBuffer);
comp->fBufferLength = sourceLimit - byteBuffer;
byteBuffer += comp->fBufferLength;
@ -1076,7 +1076,7 @@ scsu_decompress(UnicodeCompressor *comp,
not, rewind the source stream and break out*/
if( byteBuffer >= sourceLimit ) {
--byteBuffer;
icu_memcpy(comp->fBuffer, byteBuffer,
uprv_memcpy(comp->fBuffer, byteBuffer,
sourceLimit - byteBuffer);
comp->fBufferLength = sourceLimit - byteBuffer;
byteBuffer += comp->fBufferLength;
@ -1094,7 +1094,7 @@ scsu_decompress(UnicodeCompressor *comp,
rewind the source stream and break out*/
if( (byteBuffer + 1) >= sourceLimit ) {
--byteBuffer;
icu_memcpy(comp->fBuffer, byteBuffer,
uprv_memcpy(comp->fBuffer, byteBuffer,
sourceLimit - byteBuffer);
comp->fBufferLength = sourceLimit - byteBuffer;
byteBuffer += comp->fBufferLength;
@ -1131,7 +1131,7 @@ scsu_decompress(UnicodeCompressor *comp,
rewind the source stream and break out*/
if( byteBuffer >= sourceLimit ) {
--byteBuffer;
icu_memcpy(comp->fBuffer, byteBuffer,
uprv_memcpy(comp->fBuffer, byteBuffer,
sourceLimit - byteBuffer);
comp->fBufferLength = sourceLimit - byteBuffer;
byteBuffer += comp->fBufferLength;
@ -1153,7 +1153,7 @@ scsu_decompress(UnicodeCompressor *comp,
rewind the source stream and break out*/
if( (byteBuffer + 1) >= sourceLimit ) {
--byteBuffer;
icu_memcpy(comp->fBuffer, byteBuffer,
uprv_memcpy(comp->fBuffer, byteBuffer,
sourceLimit - byteBuffer);
comp->fBufferLength = sourceLimit - byteBuffer;
byteBuffer += comp->fBufferLength;
@ -1187,7 +1187,7 @@ scsu_decompress(UnicodeCompressor *comp,
rewind the source stream and break out*/
if( byteBuffer >= sourceLimit - 1) {
--byteBuffer;
icu_memcpy(comp->fBuffer, byteBuffer,
uprv_memcpy(comp->fBuffer, byteBuffer,
sourceLimit - byteBuffer);
comp->fBufferLength = sourceLimit - byteBuffer;
byteBuffer += comp->fBufferLength;
@ -1204,7 +1204,7 @@ scsu_decompress(UnicodeCompressor *comp,
the source stream and break out*/
if( byteBuffer >= sourceLimit ) {
--byteBuffer;
icu_memcpy(comp->fBuffer, byteBuffer,
uprv_memcpy(comp->fBuffer, byteBuffer,
sourceLimit - byteBuffer);
comp->fBufferLength = sourceLimit - byteBuffer;
byteBuffer += comp->fBufferLength;

View file

@ -1,142 +1 @@
/*
*******************************************************************************
*
* Copyright (C) 1998-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* File scsu.h
*
* Modification History:
*
* Date Name Description
* 05/17/99 stephen Creation (ported from java UnicodeCompressor.java)
* 09/21/99 stephen Updated to handle data splits on decompression.
*******************************************************************************
*/
#ifndef SCSU_H
#define SCSU_H 1
#include "utypes.h"
/* Number of windows */
#define USCSU_NUM_WINDOWS 8
#define USCSU_NUM_STATIC_WINDOWS 8
/* Maximum value for a window's index */
#define USCSU_MAX_INDEX 0xFF
/* The size of the internal buffer for a UnicodeCompressor. */
#define USCSU_BUFSIZE 3
/** The UnicodeCompressor struct */
struct UnicodeCompressor {
/** Alias to current dynamic window */
int32_t fCurrentWindow;
/** Dynamic compression window offsets */
int32_t fOffsets [ USCSU_NUM_WINDOWS ];
/** Current compression mode */
int32_t fMode;
/** Keeps count of times character indices are encountered */
int32_t fIndexCount [ USCSU_MAX_INDEX + 1 ];
/** The time stamps indicate when a window was last defined */
int32_t fTimeStamps [ USCSU_NUM_WINDOWS ];
/** The current time stamp */
int32_t fTimeStamp;
/** Internal buffer for saving state */
uint8_t fBuffer [ USCSU_BUFSIZE ];
/** Number of characters in our internal buffer */
int32_t fBufferLength;
};
typedef struct UnicodeCompressor UnicodeCompressor;
/**
* Initialize a UnicodeCompressor.
* Sets all windows to their default values.
* @see #reset
*/
U_CAPI void U_EXPORT2 scsu_init(UnicodeCompressor *comp);
/**
* Reset the compressor to its initial state.
* @param comp The UnicodeCompressor to reset.
*/
U_CAPI void U_EXPORT2 scsu_reset(UnicodeCompressor *comp);
/**
* Compress a Unicode character array into a byte array.
*
* This function is not guaranteed to completely fill the output buffer, nor
* is it guaranteed to compress the entire input.
* If the source data is completely compressed, <TT>status</TT> will be set
* to <TT>U_ZERO_ERROR</TT>.
* If the source data is not completely compressed, <TT>status</TT> will be
* set to <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT>. If this occurs, larger buffers
* should be allocated, or data flushed, and the function should be called
* again with the new buffers.
*
* @param comp A pointer to a previously-initialized UnicodeCompressor
* @param target I/O parameter. On input, a pointer to a buffer of bytes to
* receive the compressed data. On output, points to the byte following
* the last byte written. This buffer must be at least 4 bytes.
* @param targetLimit A pointer to the end of the array <TT>target</TT>.
* @param source I/O parameter. On input, a pointer to a buffer of
* Unicode characters to be compressed. On output, points to the character
* following the last character compressed.
* @param sourceLimit A pointer to the end of the array <TT>source</TT>.
* @param status A pointer to an UErrorCode to receive any errors.
*
* @see #decompress
*/
U_CAPI void U_EXPORT2 scsu_compress(UnicodeCompressor *comp,
uint8_t **target,
const uint8_t *targetLimit,
const UChar **source,
const UChar *sourceLimit,
UErrorCode *status);
/**
* Decompress a byte array into a Unicode character array.
*
* This function will either completely fill the output buffer, or
* consume the entire input.
* If the source data is completely compressed, <TT>status</TT> will be set
* to <TT>U_ZERO_ERROR</TT>.
* If the source data is not completely compressed, <TT>status</TT> will be
* set to <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT>. If this occurs, larger buffers
* should be allocated, or data flushed, and the function should be called
* again with the new buffers.
*
* @param comp A pointer to a previously-initialized UnicodeDecompressor
* @param target I/O parameter. On input, a pointer to a buffer of Unicode
* characters to receive the compressed data. On output, points to the
* character following the last character written. This buffer must be
* at least 2 bytes.
* @param targetLimit A pointer to the end of the array <TT>target</TT>.
* @param source I/O parameter. On input, a pointer to a buffer of
* bytes to be decompressed. On output, points to the byte following the
* last byte decompressed.
* @param sourceLimit A pointer to the end of the array <TT>source</TT>.
* @param status A pointer to an UErrorCode to receive any errors.
* @return The number of Unicode characters writeten to <TT>target</TT>.
*
* @see #compress
*/
U_CAPI void U_EXPORT2 scsu_decompress(UnicodeCompressor *comp,
UChar **target,
const UChar *targetLimit,
const uint8_t **source,
const uint8_t *sourceLimit,
UErrorCode *status);
#endif
#error Please include unicode/scsu.h instead

View file

@ -23,10 +23,10 @@
#endif
#include "cmemory.h"
#include "utypes.h"
#include "ustring.h"
#include "uchar.h"
#include "ubidi.h"
#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "unicode/uchar.h"
#include "unicode/ubidi.h"
#include "ubidiimp.h"
/*
@ -154,14 +154,14 @@ ubidi_openSized(UTextOffset maxLength, UTextOffset maxRunCount, UErrorCode *pErr
}
/* allocate memory for the object */
pBiDi=(UBiDi *)icu_malloc(sizeof(UBiDi));
pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi));
if(pBiDi==NULL) {
*pErrorCode=U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
/* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
icu_memset(pBiDi, 0, sizeof(UBiDi));
uprv_memset(pBiDi, 0, sizeof(UBiDi));
/* allocate memory for arrays as requested */
if(maxLength>0) {
@ -211,7 +211,7 @@ getMemory(void **pMemory, UTextOffset *pSize, bool_t mayAllocate, UTextOffset si
/* check for existing memory */
if(*pMemory==NULL) {
/* we need to allocate memory */
if(mayAllocate && (*pMemory=icu_malloc(sizeNeeded))!=NULL) {
if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) {
*pSize=sizeNeeded;
return TRUE;
} else {
@ -226,7 +226,7 @@ getMemory(void **pMemory, UTextOffset *pSize, bool_t mayAllocate, UTextOffset si
/* we may try to grow or shrink */
void *memory;
if((memory=icu_realloc(*pMemory, sizeNeeded))!=NULL) {
if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) {
*pMemory=memory;
*pSize=sizeNeeded;
return TRUE;
@ -245,15 +245,15 @@ U_CAPI void U_EXPORT2
ubidi_close(UBiDi *pBiDi) {
if(pBiDi!=NULL) {
if(pBiDi->dirPropsMemory!=NULL) {
icu_free(pBiDi->dirPropsMemory);
uprv_free(pBiDi->dirPropsMemory);
}
if(pBiDi->levelsMemory!=NULL) {
icu_free(pBiDi->levelsMemory);
uprv_free(pBiDi->levelsMemory);
}
if(pBiDi->runsMemory!=NULL) {
icu_free(pBiDi->runsMemory);
uprv_free(pBiDi->runsMemory);
}
icu_free(pBiDi);
uprv_free(pBiDi);
}
}

View file

@ -1,892 +1 @@
/*
*******************************************************************************
*
* Copyright (C) 1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: ubidi.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999jul27
* created by: Markus W. Scherer
*/
#ifndef UBIDI_H
#define UBIDI_H
#include "utypes.h"
#include "uchar.h"
/*
* javadoc-style comments are intended to be transformed into HTML
* using DOC++ - see
* http://www.zib.de/Visual/software/doc++/index.html .
*
* The HTML documentation is created with
* doc++ -H ubidi.h
*
* The following #define trick allows us to do it all in one file
* and still be able to compile it.
*/
#define DOCXX_TAG
#define BIDI_SAMPLE_CODE
/**
* @name BIDI algorithm for ICU
*
* <h2>BIDI algorithm for ICU</h2>
*
* This is an implementation of the Unicode Bidirectional algorithm.
* The algorithm is defined in the
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>,
* version 5, also described in The Unicode Standard, Version 3.0 .<p>
*
* <h3>General remarks about the API:</h3>
*
* In functions with an error code parameter,
* the <code>pErrorCode</code> pointer must be valid
* and the value that it points to must not indicate a failure before
* the function call. Otherwise, the function returns immediately.
* After the function call, the value indicates success or failure.<p>
*
* The <quote>limit</quote> of a sequence of characters is the position just after their
* last character, i.e., one more than that position.<p>
*
* Some of the API functions provide access to <quote>runs</quote>.
* Such a <quote>run</quote> is defined as a sequence of characters
* that are at the same embedding level
* after performing the BIDI algorithm.<p>
*
* @author Markus W. Scherer
* @version 1.0
*/
DOCXX_TAG
/*@{*/
/**
* UBiDiLevel is the type of the level values in this
* BiDi implementation.
* It holds an embedding level and indicates the visual direction
* by its bit&nbsp;0 (even/odd value).<p>
*
* It can also hold non-level values for the
* <code>paraLevel</code> and <code>embeddingLevels</code>
* arguments of <code>ubidi_setPara()</code>; there:
* <ul>
* <li>bit&nbsp;7 of an <code>embeddingLevels[]</code>
* value indicates whether the using application is
* specifying the level of a character to <i>override</i> whatever the
* BiDi implementation would resolve it to.</li>
* <li><code>paraLevel</code> can be set to the
* pesudo-level values <code>UBIDI_DEFAULT_LTR</code>
* and <code>UBIDI_DEFAULT_RTL</code>.</li>
*
* @see ubidi_setPara
*
* <p>The related constants are not real, valid level values.
* <code>UBIDI_DEFAULT_XXX</code> can be used to specify
* a default for the paragraph level for
* when the <code>ubidi_setPara()</code> function
* shall determine it but there is no
* strongly typed character in the input.<p>
*
* Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even
* and the one for <code>UBIDI_DEFAULT_RTL</code> is odd,
* just like with normal LTR and RTL level values -
* these special values are designed that way. Also, the implementation
* assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
*
* @see UBIDI_DEFAULT_LTR
* @see UBIDI_DEFAULT_RTL
* @see UBIDI_LEVEL_OVERRIDE
* @see UBIDI_MAX_EXPLICIT_LEVEL
*/
typedef uint8_t UBiDiLevel;
/** Paragraph level setting.
* If there is no strong character, then set the paragraph level to 0 (left-to-right).
*/
#define UBIDI_DEFAULT_LTR 0xfe
/** Paragraph level setting.
* If there is no strong character, then set the paragraph level to 1 (right-to-left).
*/
#define UBIDI_DEFAULT_RTL 0xff
/**
* Maximum explicit embedding level.
* (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>).
*
*/
#define UBIDI_MAX_EXPLICIT_LEVEL 61
/** Bit flag for level input.
* Overrides directional properties.
*/
#define UBIDI_LEVEL_OVERRIDE 0x80
/**
* @memo <code>UBiDiDirection</code> values indicate the text direction.
*/
enum UBiDiDirection {
/** @memo All left-to-right text. This is a 0 value. */
UBIDI_LTR,
/** @memo All right-to-left text. This is a 1 value. */
UBIDI_RTL,
/** @memo Mixed-directional text. */
UBIDI_MIXED
};
typedef enum UBiDiDirection UBiDiDirection;
/**
* Forward declaration of the <code>UBiDi</code> structure for the declaration of
* the API functions. Its fields are implementation-specific.<p>
* This structure holds information about a paragraph of text
* with BiDi-algorithm-related details, or about one line of
* such a paragraph.<p>
* Reordering can be done on a line, or on a paragraph which is
* then interpreted as one single line.
*/
struct UBiDi;
typedef struct UBiDi UBiDi;
/**
* Allocate a <code>UBiDi</code> structure.
* Such an object is initially empty. It is assigned
* the BiDi properties of a paragraph by <code>ubidi_setPara()</code>
* or the BiDi properties of a line of a paragraph by
* <code>ubidi_getLine()</code>.<p>
* This object can be reused for as long as it is not deallocated
* by calling <code>ubidi_close()</code>.<p>
* <code>ubidi_set()</code> will allocate additional memory for
* internal structures as necessary.
*
* @return An empty <code>UBiDi</code> object.
*/
U_CAPI UBiDi * U_EXPORT2
ubidi_open(void);
/**
* Allocate a <code>UBiDi</code> structure with preallocated memory
* for internal structures.
* This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code>
* with no arguments, but it also preallocates memory for internal structures
* according to the sizings supplied by the caller.<p>
* Subsequent functions will not allocate any more memory, and are thus
* guaranteed not to fail because of lack of memory.<p>
* The preallocation can be limited to some of the internal memory
* by setting some values to 0 here. That means that if, e.g.,
* <code>maxRunCount</code> cannot be reasonably predetermined and should not
* be set to <code>maxLength</code> (the only failproof value) to avoid
* wasting memory, then <code>maxRunCount</code> could be set to 0 here
* and the internal structures that are associated with it will be allocated
* on demand, just like with <code>ubidi_open()</code>.
*
* @param maxLength is the maximum paragraph or line length that internal memory
* will be preallocated for. An attempt to associate this object with a
* longer text will fail, unless this value is 0, which leaves the allocation
* up to the implementation.
*
* @param maxRunCount is the maximum anticipated number of same-level runs
* that internal memory will be preallocated for. An attempt to access
* visual runs on an object that was not preallocated for as many runs
* as the text was actually resolved to will fail,
* unless this value is 0, which leaves the allocation up to the implementation.<p>
* The number of runs depends on the actual text and maybe anywhere between
* 1 and <code>maxLength</code>. It is typically small.<p>
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @return An empty <code>UBiDi</code> object with preallocated memory.
*/
U_CAPI UBiDi * U_EXPORT2
ubidi_openSized(UTextOffset maxLength, UTextOffset maxRunCount, UErrorCode *pErrorCode);
/**
* <code>ubidi_close()</code> must be called to free the memory
* associated with a UBiDi object.<p>
*
* <strong>Important: </strong>
* If a <code>UBiDi</code> object is the <quote>child</quote>
* of another one (its <quote>parent</quote>), after calling
* <code>ubidi_setLine()</code>, then the child object must
* be destroyed (closed) or reused (by calling
* <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>)
* before the parent object.
*
* @param pBiDi is a <code>UBiDi</code> object.
*
* @see ubidi_setPara
* @see ubidi_setLine
*/
U_CAPI void U_EXPORT2
ubidi_close(UBiDi *pBiDi);
/**
* Perform the Unicode BiDi algorithm. It is defined in the
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>,
* version 5,
* also described in The Unicode Standard, Version 3.0 .<p>
*
* This function takes a single plain text paragraph with or without
* externally specified embedding levels from <quote>styled</quote> text
* and computes the left-right-directionality of each character.<p>
*
* If the entire paragraph consists of text of only one direction, then
* the function may not perform all the steps described by the algorithm,
* i.e., some levels may not be the same as if all steps were performed.
* This is not relevant for unidirectional text.<br>
* For example, in pure LTR text with numbers the numbers would get
* a resolved level of 2 higher than the surrounding text according to
* the algorithm. This implementation may set all resolved levels to
* the same value in such a case.<p>
*
* The text must be externally split into separate paragraphs (rule P1).
* Paragraph separators (B) should appear at most at the very end.
*
* @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code>
* which will be set to contain the reordering information,
* especially the resolved levels for all the characters in <code>text</code>.
*
* @param text is a pointer to the single-paragraph text that the
* BiDi algorithm will be performed on
* (step (P1) of the algorithm is performed externally).
* <strong>The text must be (at least) <code>length</code> long.</strong>
*
* @param length is the length of the text; if <code>length==-1</code> then
* the text must be zero-terminated.
*
* @param paraLevel specifies the default level for the paragraph;
* it is typically 0 (LTR) or 1 (RTL).
* If the function shall determine the paragraph level from the text,
* then <code>paraLevel</code> can be set to
* either <code>UBIDI_DEFAULT_LTR</code>
* or <code>UBIDI_DEFAULT_RTL</code>;
* if there is no strongly typed character, then
* the desired default is used (0 for LTR or 1 for RTL).
* Any other value between 0 and <code>UBIDI_MAX_EXPLICIT_LEVEL</code> is also valid,
* with odd levels indicating RTL.
*
* @param embeddingLevels (in) may be used to preset the embedding and override levels,
* ignoring characters like LRE and PDF in the text.
* A level overrides the directional property of its corresponding
* (same index) character if the level has the
* <code>UBIDI_LEVEL_OVERRIDE</code> bit set.<p>
* Except for that bit, it must be
* <code>paraLevel&lt;=embeddingLevels[]&lt;=UBIDI_MAX_EXPLICIT_LEVEL</code>.<p>
* <strong>Caution: </strong>A copy of this pointer, not of the levels,
* will be stored in the <code>UBiDi</code> object;
* the <code>embeddingLevels</code> array must not be
* deallocated before the <code>UBiDi</code> structure is destroyed or reused,
* and the <code>embeddingLevels</code>
* should not be modified to avoid unexpected results on subsequent BiDi operations.
* However, the <code>ubidi_setPara()</code> and
* <code>ubidi_setLine()</code> functions may modify some or all of the levels.<p>
* After the <code>UBiDi</code> object is reused or destroyed, the caller
* must take care of the deallocation of the <code>embeddingLevels</code> array.<p>
* <strong>The <code>embeddingLevels</code> array must be
* at least <code>length</code> long.</strong>
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*/
U_CAPI void U_EXPORT2
ubidi_setPara(UBiDi *pBiDi, const UChar *text, UTextOffset length,
UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
UErrorCode *pErrorCode);
/**
* <code>ubidi_getLine()</code> sets a <code>UBiDi</code> to
* contain the reordering information, especially the resolved levels,
* for all the characters in a line of text. This line of text is
* specified by referring to a <code>UBiDi</code> object representing
* this information for a paragraph of text, and by specifying
* a range of indexes in this paragraph.<p>
* In the new line object, the indexes will range from 0 to <code>limit-start</code>.<p>
*
* This is used after calling <code>ubidi_setPara()</code>
* for a paragraph, and after line-breaking on that paragraph.
* It is not necessary if the paragraph is treated as a single line.<p>
*
* After line-breaking, rules (L1) and (L2) for the treatment of
* trailing WS and for reordering are performed on
* a <code>UBiDi</code> object that represents a line.<p>
*
* <strong>Important: </strong><code>pLineBiDi</code> shares data with
* <code>pParaBiDi</code>.
* You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>.
* In other words, you must destroy or reuse the <code>UBiDi</code> object for a line
* before the object for its parent paragraph.
*
* @param pParaBiDi is the parent paragraph object.
*
* @param start is the line's first index into the paragraph text.
*
* @param limit is just behind the line's last index into the paragraph text
* (its last index +1).<br>
* It must be <code>0&lt;=start&lt;=limit&lt;=</code>paragraph length.
*
* @param pLineBiDi is the object that will now represent a line of the paragraph.
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @see ubidi_setPara
*/
U_CAPI void U_EXPORT2
ubidi_setLine(const UBiDi *pParaBiDi,
UTextOffset start, UTextOffset limit,
UBiDi *pLineBiDi,
UErrorCode *pErrorCode);
/**
* Get the directionality of the text.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @return A <code>UBIDI_XXX</code> value that indicates if the entire text
* represented by this object is unidirectional,
* and which direction, or if it is mixed-directional.
*
* @see UBiDiDirection
*/
U_CAPI UBiDiDirection U_EXPORT2
ubidi_getDirection(const UBiDi *pBiDi);
/**
* Get the length of the text.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @return The length of the text that the UBiDi object was created for.
*/
U_CAPI UTextOffset U_EXPORT2
ubidi_getLength(const UBiDi *pBiDi);
/**
* Get the paragraph level of the text.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @return The paragraph level.
*
* @see UBiDiLevel
*/
U_CAPI UBiDiLevel U_EXPORT2
ubidi_getParaLevel(const UBiDi *pBiDi);
/**
* Get the level for one character.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @param charIndex the index of a character.
*
* @return The level for the character at charIndex.
*
* @see UBiDiLevel
*/
U_CAPI UBiDiLevel U_EXPORT2
ubidi_getLevelAt(const UBiDi *pBiDi, UTextOffset charIndex);
/**
* Get an array of levels for each character.<p>
*
* Note that this function may allocate memory under some
* circumstances, unlike <code>ubidi_getLevelAt()</code>.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @return The levels array for the text,
* or <code>NULL</code> if an error occurs.
*
* @see UBiDiLevel
*/
U_CAPI const UBiDiLevel * U_EXPORT2
ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode);
/**
* Get a logical run.
* This function returns information about a run and is used
* to retrieve runs in logical order.<p>
* This is especially useful for line-breaking on a paragraph.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @param logicalStart is the first character of the run.
*
* @param pLogicalLimit will receive the limit of the run.
* The l-value that you point to here may be the
* same expression (variable) as the one for
* <code>logicalStart</code>.
* This pointer can be <code>NULL</code> if this
* value is not necessary.
*
* @param pLevel will receive the level of the run.
* This pointer can be <code>NULL</code> if this
* value is not necessary.
*/
U_CAPI void U_EXPORT2
ubidi_getLogicalRun(const UBiDi *pBiDi, UTextOffset logicalStart,
UTextOffset *pLogicalLimit, UBiDiLevel *pLevel);
/**
* Get the number of runs.
* This function may invoke the actual reordering on the
* <code>UBiDi</code> object, after <code>ubidi_setPara()</code>
* may have resolved only the levels of the text. Therefore,
* <code>ubidi_countRuns()</code> may have to allocate memory,
* and may fail doing so.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @return The number of runs.
*/
U_CAPI UTextOffset U_EXPORT2
ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
/**
* Get one run's logical start, length, and directionality,
* which can be 0 for LTR or 1 for RTL.
* In an RTL run, the character at the logical start is
* visually on the right of the displayed run.
* The length is the number of characters in the run.<p>
* <code>ubidi_countRuns()</code> should be called
* before the runs are retrieved.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @param runIndex is the number of the run in visual order, in the
* range <code>[0..ubidi_countRuns(pBiDi)-1]</code>.
*
* @param pLogicalStart is the first logical character index in the text.
* The pointer may be <code>NULL</code> if this index is not needed.
*
* @param pLength is the number of characters (at least one) in the run.
* The pointer may be <code>NULL</code> if this is not needed.
*
* @return the directionality of the run,
* <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>,
* never <code>UBIDI_MIXED</code>.
*
* @see ubidi_countRuns
*
* Example:
* <pre>
*&nbsp; UTextOffset i, count=ubidi_countRuns(pBiDi),
*&nbsp; logicalStart, visualIndex=0, length;
*&nbsp; for(i=0; i&lt;count; ++i) {
*&nbsp; if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) {
*&nbsp; do { // LTR
*&nbsp; show_char(text[logicalStart++], visualIndex++);
*&nbsp; } while(--length>0);
*&nbsp; } else {
*&nbsp; logicalStart+=length; // logicalLimit
*&nbsp; do { // RTL
*&nbsp; show_char(text[--logicalStart], visualIndex++);
*&nbsp; } while(--length>0);
*&nbsp; }
*&nbsp; }
* </pre>
*
* Note that in right-to-left runs, code like this places
* modifier letters before base characters and second surrogates
* before first ones.
*/
U_CAPI UBiDiDirection U_EXPORT2
ubidi_getVisualRun(UBiDi *pBiDi, UTextOffset runIndex,
UTextOffset *pLogicalStart, UTextOffset *pLength);
/**
* Get the visual position from a logical text position.
* If such a mapping is used many times on the same
* <code>UBiDi</code> object, then calling
* <code>ubidi_getLogicalMap()</code> is more efficient.<p>
*
* Note that in right-to-left runs, this mapping places
* modifier letters before base characters and second surrogates
* before first ones.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @param logicalIndex is the index of a character in the text.
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @return The visual position of this character.
*
* @see ubidi_getLogicalMap
* @see ubidi_getLogicalIndex
*/
U_CAPI UTextOffset U_EXPORT2
ubidi_getVisualIndex(UBiDi *pBiDi, UTextOffset logicalIndex, UErrorCode *pErrorCode);
/**
* Get the logical text position from a visual position.
* If such a mapping is used many times on the same
* <code>UBiDi</code> object, then calling
* <code>ubidi_getVisualMap()</code> is more efficient.<p>
*
* This is the inverse function to <code>ubidi_getVisualIndex()</code>.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @param visualIndex is the visual position of a character.
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @return The index of this character in the text.
*
* @see ubidi_getVisualMap
* @see ubidi_getVisualIndex
*/
U_CAPI UTextOffset U_EXPORT2
ubidi_getLogicalIndex(UBiDi *pBiDi, UTextOffset visualIndex, UErrorCode *pErrorCode);
/**
* Get a logical-to-visual index map (array) for the characters in the UBiDi
* (paragraph or line) object.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @param indexMap is a pointer to an array of <code>ubidi_getLength()</code>
* indexes which will reflect the reordering of the characters.
* The array does not need to be initialized.<p>
* The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.<p>
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @see ubidi_getVisualMap
* @see ubidi_getVisualIndex
*/
U_CAPI void U_EXPORT2
ubidi_getLogicalMap(UBiDi *pBiDi, UTextOffset *indexMap, UErrorCode *pErrorCode);
/**
* Get a visual-to-logical index map (array) for the characters in the UBiDi
* (paragraph or line) object.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @param indexMap is a pointer to an array of <code>ubidi_getLength()</code>
* indexes which will reflect the reordering of the characters.
* The array does not need to be initialized.<p>
* The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.<p>
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @see ubidi_getLogicalMap
* @see ubidi_getLogicalIndex
*/
U_CAPI void U_EXPORT2
ubidi_getVisualMap(UBiDi *pBiDi, UTextOffset *indexMap, UErrorCode *pErrorCode);
/**
* This is a convenience function that does not use a UBiDi object.
* It is intended to be used for when an application has determined the levels
* of objects (character sequences) and just needs to have them reordered (L2).
* This is equivalent to using <code>ubidi_getLogicalMap</code> on a
* <code>UBiDi</code> object.
*
* @param levels is an array with <code>length</code> levels that have been determined by
* the application.
*
* @param length is the number of levels in the array, or, semantically,
* the number of objects to be reordered.
* It must be <code>length&gt;0</code>.
*
* @param indexMap is a pointer to an array of <code>length</code>
* indexes which will reflect the reordering of the characters.
* The array does not need to be initialized.<p>
* The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
*/
U_CAPI void U_EXPORT2
ubidi_reorderLogical(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap);
/**
* This is a convenience function that does not use a UBiDi object.
* It is intended to be used for when an application has determined the levels
* of objects (character sequences) and just needs to have them reordered (L2).
* This is equivalent to using <code>ubidi_getVisualMap</code> on a
* <code>UBiDi</code> object.
*
* @param levels is an array with <code>length</code> levels that have been determined by
* the application.
*
* @param length is the number of levels in the array, or, semantically,
* the number of objects to be reordered.
* It must be <code>length&gt;0</code>.
*
* @param indexMap is a pointer to an array of <code>length</code>
* indexes which will reflect the reordering of the characters.
* The array does not need to be initialized.<p>
* The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
*/
U_CAPI void U_EXPORT2
ubidi_reorderVisual(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap);
/**
* Invert an index map.
* The one-to-one index mapping of the first map is inverted and written to
* the second one.
*
* @param srcMap is an array with <code>length</code> indexes
* which define the original mapping.
*
* @param destMap is an array with <code>length</code> indexes
* which will be filled with the inverse mapping.
*
* @param length is the length of each array.
*/
U_CAPI void U_EXPORT2
ubidi_invertMap(const UTextOffset *srcMap, UTextOffset *destMap, UTextOffset length);
/**
* @name Sample code for the ICU BIDI API
*
* <h2>Rendering a paragraph with the ICU BiDi API</h2>
*
* This is (hypothetical) sample code that illustrates
* how the ICU BiDi API could be used to render a paragraph of text.
* Rendering code depends highly on the graphics system,
* therefore this sample code must make a lot of assumptions,
* which may or may not match any existing graphics system's properties.
*
* <p>The basic assumptions are:</p>
* <ul>
* <li>Rendering is done from left to right on a horizontal line.</li>
* <li>A run of single-style, unidirectional text can be rendered at once.</li>
* <li>Such a run of text is passed to the graphics system with
* characters (code units) in logical order.</li>
* <li>The line-breaking algorithm is very complicated
* and Locale-dependent -
* and therefore its implementation omitted from this sample code.</li>
* </ul>
*
* <pre>
*&nbsp; #include "ubidi.h"
*&nbsp;
*&nbsp; typedef enum {
*&nbsp; styleNormal=0, styleSelected=1,
*&nbsp; styleBold=2, styleItalics=4,
*&nbsp; styleSuper=8, styleSub=16
*&nbsp; } Style;
*&nbsp;
*&nbsp; typedef struct { UTextOffset limit; Style style; } StyleRun;
*&nbsp;
*&nbsp; int getTextWidth(const UChar *text, UTextOffset start, UTextOffset limit,
*&nbsp; const StyleRun *styleRuns, int styleRunCount);
*&nbsp;
*&nbsp; // set *pLimit and *pStyleRunLimit for a line
*&nbsp; // from text[start] and from styleRuns[styleRunStart]
*&nbsp; // using ubidi_getLogicalRun(para, ...)
*&nbsp; void getLineBreak(const UChar *text, UTextOffset start, UTextOffset *pLimit,
*&nbsp; UBiDi *para,
*&nbsp; const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit,
*&nbsp; int *pLineWidth);
*&nbsp;
*&nbsp; // render runs on a line sequentially, always from left to right
*&nbsp;
*&nbsp; // prepare rendering a new line
*&nbsp; void startLine(UBiDiDirection textDirection, int lineWidth);
*&nbsp;
*&nbsp; // render a run of text and advance to the right by the run width
*&nbsp; // the text[start..limit-1] is always in logical order
*&nbsp; void renderRun(const UChar *text, UTextOffset start, UTextOffset limit,
*&nbsp; UBiDiDirection textDirection, Style style);
*&nbsp;
*&nbsp; // We could compute a cross-product
*&nbsp; // from the style runs with the directional runs
*&nbsp; // and then reorder it.
*&nbsp; // Instead, here we iterate over each run type
*&nbsp; // and render the intersections -
*&nbsp; // with shortcuts in simple (and common) cases.
*&nbsp; // renderParagraph() is the main function.
*&nbsp;
*&nbsp; // render a directional run with
*&nbsp; // (possibly) multiple style runs intersecting with it
*&nbsp; void renderDirectionalRun(const UChar *text,
*&nbsp; UTextOffset start, UTextOffset limit,
*&nbsp; UBiDiDirection direction,
*&nbsp; const StyleRun *styleRuns, int styleRunCount) {
*&nbsp; int i;
*&nbsp;
*&nbsp; // iterate over style runs
*&nbsp; if(direction==UBIDI_LTR) {
*&nbsp; int styleLimit;
*&nbsp;
*&nbsp; for(i=0; i&lt;styleRunCount; ++i) {
*&nbsp; styleLimit=styleRun[i].limit;
*&nbsp; if(start&lt;styleLimit) {
*&nbsp; if(styleLimit>limit) { styleLimit=limit; }
*&nbsp; renderRun(text, start, styleLimit,
*&nbsp; direction, styleRun[i].style);
*&nbsp; if(styleLimit==limit) { break; }
*&nbsp; start=styleLimit;
*&nbsp; }
*&nbsp; }
*&nbsp; } else {
*&nbsp; int styleStart;
*&nbsp;
*&nbsp; for(i=styleRunCount-1; i>=0; --i) {
*&nbsp; if(i>0) {
*&nbsp; styleStart=styleRun[i-1].limit;
*&nbsp; } else {
*&nbsp; styleStart=0;
*&nbsp; }
*&nbsp; if(limit>=styleStart) {
*&nbsp; if(styleStart&lt;start) { styleStart=start; }
*&nbsp; renderRun(text, styleStart, limit,
*&nbsp; direction, styleRun[i].style);
*&nbsp; if(styleStart==start) { break; }
*&nbsp; limit=styleStart;
*&nbsp; }
*&nbsp; }
*&nbsp; }
*&nbsp; }
*&nbsp;
*&nbsp; // the line object represents text[start..limit-1]
*&nbsp; void renderLine(UBiDi *line, const UChar *text,
*&nbsp; UTextOffset start, UTextOffset limit,
*&nbsp; const StyleRun *styleRuns, int styleRunCount) {
*&nbsp; UBiDiDirection direction=ubidi_getDirection(line);
*&nbsp; if(direction!=UBIDI_MIXED) {
*&nbsp; // unidirectional
*&nbsp; if(styleRunCount&lt;=1) {
*&nbsp; renderRun(text, start, limit, direction, styleRuns[0].style);
*&nbsp; } else {
*&nbsp; renderDirectionalRun(text, start, limit,
*&nbsp; direction, styleRuns, styleRunCount);
*&nbsp; }
*&nbsp; } else {
*&nbsp; // mixed-directional
*&nbsp; UTextOffset count, i, length;
*&nbsp; UBiDiLevel level;
*&nbsp;
*&nbsp; count=ubidi_countRuns(para, pErrorCode);
*&nbsp; if(U_SUCCESS(*pErrorCode)) {
*&nbsp; if(styleRunCount&lt;=1) {
*&nbsp; Style style=styleRuns[0].style;
*&nbsp;
*&nbsp; // iterate over directional runs
*&nbsp; for(i=0; i&lt;count; ++i) {
*&nbsp; direction=ubidi_getVisualRun(para, i, &start, &length);
*&nbsp; renderRun(text, start, start+length, direction, style);
*&nbsp; }
*&nbsp; } else {
*&nbsp; UTextOffset j;
*&nbsp;
*&nbsp; // iterate over both directional and style runs
*&nbsp; for(i=0; i&lt;count; ++i) {
*&nbsp; direction=ubidi_getVisualRun(line, i, &start, &length);
*&nbsp; renderDirectionalRun(text, start, start+length,
*&nbsp; direction, styleRuns, styleRunCount);
*&nbsp; }
*&nbsp; }
*&nbsp; }
*&nbsp; }
*&nbsp; }
*&nbsp;
*&nbsp; void renderParagraph(const UChar *text, UTextOffset length,
*&nbsp; UBiDiDirection textDirection,
*&nbsp; const StyleRun *styleRuns, int styleRunCount,
*&nbsp; int lineWidth,
*&nbsp; UErrorCode *pErrorCode) {
*&nbsp; UBiDi *para;
*&nbsp;
*&nbsp; if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length&lt;=0) {
*&nbsp; return;
*&nbsp; }
*&nbsp;
*&nbsp; para=ubidi_openSized(length, 0, pErrorCode);
*&nbsp; if(para==NULL) { return; }
*&nbsp;
*&nbsp; ubidi_setPara(para, text, length,
*&nbsp; textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR,
*&nbsp; NULL, pErrorCode);
*&nbsp; if(U_SUCCESS(*pErrorCode)) {
*&nbsp; UBiDiLevel paraLevel=1&ubidi_getParaLevel(para);
*&nbsp; StyleRun styleRun={ length, styleNormal };
*&nbsp; int width;
*&nbsp;
*&nbsp; if(styleRuns==NULL || styleRunCount&lt;=0) {
*&nbsp; styleRunCount=1;
*&nbsp; styleRuns=&styleRun;
*&nbsp; }
*&nbsp;
*&nbsp; // assume styleRuns[styleRunCount-1].limit>=length
*&nbsp;
*&nbsp; width=getTextWidth(text, 0, length, styleRuns, styleRunCount);
*&nbsp; if(width&lt;=lineWidth) {
*&nbsp; // everything fits onto one line
*&nbsp;
*&nbsp; // prepare rendering a new line from either left or right
*&nbsp; startLine(paraLevel, width);
*&nbsp;
*&nbsp; renderLine(para, text, 0, length,
*&nbsp; styleRuns, styleRunCount);
*&nbsp; } else {
*&nbsp; UBiDi *line;
*&nbsp;
*&nbsp; // we need to render several lines
*&nbsp; line=ubidi_openSized(length, 0, pErrorCode);
*&nbsp; if(line!=NULL) {
*&nbsp; UTextOffset start=0, limit;
*&nbsp; int styleRunStart=0, styleRunLimit;
*&nbsp;
*&nbsp; for(;;) {
*&nbsp; limit=length;
*&nbsp; styleRunLimit=styleRunCount;
*&nbsp; getLineBreak(text, start, &limit, para,
*&nbsp; styleRuns, styleRunStart, &styleRunLimit,
*&nbsp; &width);
*&nbsp; ubidi_setLine(para, start, limit, line, pErrorCode);
*&nbsp; if(U_SUCCESS(*pErrorCode)) {
*&nbsp; // prepare rendering a new line
*&nbsp; // from either left or right
*&nbsp; startLine(paraLevel, width);
*&nbsp;
*&nbsp; renderLine(line, text, start, limit,
*&nbsp; styleRuns+styleRunStart,
*&nbsp; styleRunLimit-styleRunStart);
*&nbsp; }
*&nbsp; if(limit==length) { break; }
*&nbsp; start=limit;
*&nbsp; styleRunStart=styleRunLimit-1;
*&nbsp; if(start>=styleRuns[styleRunStart].limit) {
*&nbsp; ++styleRunStart;
*&nbsp; }
*&nbsp; }
*&nbsp;
*&nbsp; ubidi_close(line);
*&nbsp; }
*&nbsp; }
*&nbsp; }
*&nbsp;
*&nbsp; ubidi_close(para);
*&nbsp; }
* </pre>
*/
BIDI_SAMPLE_CODE
/*@{*/
/*@}*/
/*@}*/
#endif
#error Please include unicode/ubidi.h instead

View file

@ -20,8 +20,8 @@
/* set import/export definitions */
#ifdef U_COMMON_IMPLEMENTATION
#include "utypes.h"
#include "uchar.h"
#include "unicode/utypes.h"
#include "unicode/uchar.h"
/* miscellaneous definitions ------------------------------------------------ */

View file

@ -23,10 +23,10 @@
#endif
#include "cmemory.h"
#include "utypes.h"
#include "ustring.h"
#include "uchar.h"
#include "ubidi.h"
#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "unicode/uchar.h"
#include "unicode/ubidi.h"
#include "ubidiimp.h"
/*
@ -254,9 +254,9 @@ ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
UBiDiLevel *levels=pBiDi->levelsMemory;
if(start>0 && levels!=pBiDi->levels) {
icu_memcpy(levels, pBiDi->levels, start);
uprv_memcpy(levels, pBiDi->levels, start);
}
icu_memset(levels+start, pBiDi->paraLevel, length-start);
uprv_memset(levels+start, pBiDi->paraLevel, length-start);
/* this new levels array is set for the line and reflects the WS run */
pBiDi->trailingWSStart=length;

View file

@ -23,11 +23,11 @@
* 11/11/1999 weiv added u_isalnum(), cleaned comments
********************************************************************************************
*/
#include "utypes.h"
#include "unicode/utypes.h"
#include "ucmp16.h"
#include "ucmp8.h"
#include "umutex.h"
#include "uchar.h"
#include "unicode/uchar.h"
#include "cmemory.h"
#include <string.h>
@ -5263,7 +5263,7 @@ createDirTables()
const char* u_getVersion()
{
int32_t len=strlen(UNICODE_VERSION) + strlen("Unicode Version ");
_ucdVersion=(char*)icu_realloc(_ucdVersion, len + 1 );
_ucdVersion=(char*)uprv_realloc(_ucdVersion, len + 1 );
strcpy(_ucdVersion, "Unicode Version ");
strcat(_ucdVersion, UNICODE_VERSION);
return _ucdVersion;

View file

@ -1,841 +1 @@
/*
**********************************************************************
* Copyright (C) 1997-1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* File UCHAR.H
*
* Modification History:
*
* Date Name Description
* 04/02/97 aliu Creation.
* 03/29/99 helena Updated for C APIs.
* 4/15/99 Madhu Updated for C Implementation and Javadoc
* 5/20/99 Madhu Added the function u_getVersion()
* 8/19/1999 srl Upgraded scripts to Unicode 3.0
* 8/27/1999 schererm UCharDirection constants: U_...
* 11/11/1999 weiv added u_isalnum(), cleaned comments
********************************************************************************
*/
#ifndef UCHAR_H
#define UCHAR_H
#include "utypes.h"
/*===========================================================================*/
/* Unicode version number */
/*===========================================================================*/
#define UNICODE_VERSION "3.0.0"
/**
* The Unicode C API allows you to query the properties associated with individual
* Unicode character values.
* <p>
* The Unicode character information, provided implicitly by the
* Unicode character encoding standard, includes information about the script
* (for example, symbols or control characters) to which the character belongs,
* as well as semantic information such as whether a character is a digit or
* uppercase, lowercase, or uncased.
* <P>
*/
/**
* Constants.
*/
/**
* The minimum value a UChar can have. The lowest value a
* UChar can have is 0x0000.
*/
static UChar UCHAR_MIN_VALUE;
/**
* The maximum value a UChar can have. The greatest value a
* UChar can have is 0xffff.
*/
static UChar UCHAR_MAX_VALUE;
/**
* Data for enumerated Unicode general category types
*/
enum UCharCategory
{
/** */
U_UNASSIGNED = 0,
/** */
U_UPPERCASE_LETTER = 1,
/** */
U_LOWERCASE_LETTER = 2,
/** */
U_TITLECASE_LETTER = 3,
/** */
U_MODIFIER_LETTER = 4,
/** */
U_OTHER_LETTER = 5,
/** */
U_NON_SPACING_MARK = 6,
/** */
U_ENCLOSING_MARK = 7,
/** */
U_COMBINING_SPACING_MARK = 8,
/** */
U_DECIMAL_DIGIT_NUMBER = 9,
/** */
U_LETTER_NUMBER = 10,
/** */
U_OTHER_NUMBER = 11,
/** */
U_SPACE_SEPARATOR = 12,
/** */
U_LINE_SEPARATOR = 13,
/** */
U_PARAGRAPH_SEPARATOR = 14,
/** */
U_CONTROL_CHAR = 15,
/** */
U_FORMAT_CHAR = 16,
/** */
U_PRIVATE_USE_CHAR = 17,
/** */
U_SURROGATE = 18,
/** */
U_DASH_PUNCTUATION = 19,
/** */
U_START_PUNCTUATION = 20,
/** */
U_END_PUNCTUATION = 21,
/** */
U_CONNECTOR_PUNCTUATION = 22,
/** */
U_OTHER_PUNCTUATION = 23,
/** */
U_MATH_SYMBOL = 24,
/** */
U_CURRENCY_SYMBOL = 25,
/** */
U_MODIFIER_SYMBOL = 26,
/** */
U_OTHER_SYMBOL = 27,
/** */
U_INITIAL_PUNCTUATION = 28,
/** */
U_FINAL_PUNCTUATION = 29,
/** */
U_GENERAL_OTHER_TYPES = 30,
/** */
U_CHAR_CATEGORY_COUNT
};
typedef enum UCharCategory UCharCategory;
/**
* This specifies the language directional property of a character set.
*/
enum UCharDirection {
/** */
U_LEFT_TO_RIGHT = 0,
/** */
U_RIGHT_TO_LEFT = 1,
/** */
U_EUROPEAN_NUMBER = 2,
/** */
U_EUROPEAN_NUMBER_SEPARATOR = 3,
/** */
U_EUROPEAN_NUMBER_TERMINATOR = 4,
/** */
U_ARABIC_NUMBER = 5,
/** */
U_COMMON_NUMBER_SEPARATOR = 6,
/** */
U_BLOCK_SEPARATOR = 7,
/** */
U_SEGMENT_SEPARATOR = 8,
/** */
U_WHITE_SPACE_NEUTRAL = 9,
/** */
U_OTHER_NEUTRAL = 10,
/** */
U_LEFT_TO_RIGHT_EMBEDDING = 11,
/** */
U_LEFT_TO_RIGHT_OVERRIDE = 12,
/** */
U_RIGHT_TO_LEFT_ARABIC = 13,
/** */
U_RIGHT_TO_LEFT_EMBEDDING = 14,
/** */
U_RIGHT_TO_LEFT_OVERRIDE = 15,
/** */
U_POP_DIRECTIONAL_FORMAT = 16,
/** */
U_DIR_NON_SPACING_MARK = 17,
/** */
U_BOUNDARY_NEUTRAL = 18,
/** */
U_CHAR_DIRECTION_COUNT
};
typedef enum UCharDirection UCharDirection;
/**
* Script range as defined in the Unicode standard.
*/
/* Generated from Unicode Data files */
enum UCharScript {
/* Script names */
/** */
U_BASIC_LATIN,
/** */
U_LATIN_1_SUPPLEMENT,
/** */
U_LATIN_EXTENDED_A,
/** */
U_LATIN_EXTENDED_B,
/** */
U_IPA_EXTENSIONS,
/** */
U_SPACING_MODIFIER_LETTERS,
/** */
U_COMBINING_DIACRITICAL_MARKS,
/** */
U_GREEK,
/** */
U_CYRILLIC,
/** */
U_ARMENIAN,
/** */
U_HEBREW,
/** */
U_ARABIC,
/** */
U_SYRIAC,
/** */
U_THAANA,
/** */
U_DEVANAGARI,
/** */
U_BENGALI,
/** */
U_GURMUKHI,
/** */
U_GUJARATI,
/** */
U_ORIYA,
/** */
U_TAMIL,
/** */
U_TELUGU,
/** */
U_KANNADA,
/** */
U_MALAYALAM,
/** */
U_SINHALA,
/** */
U_THAI,
/** */
U_LAO,
/** */
U_TIBETAN,
/** */
U_MYANMAR,
/** */
U_GEORGIAN,
/** */
U_HANGUL_JAMO,
/** */
U_ETHIOPIC,
/** */
U_CHEROKEE,
/** */
U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
/** */
U_OGHAM,
/** */
U_RUNIC,
/** */
U_KHMER,
/** */
U_MONGOLIAN,
/** */
U_LATIN_EXTENDED_ADDITIONAL,
/** */
U_GREEK_EXTENDED,
/** */
U_GENERAL_PUNCTUATION,
/** */
U_SUPERSCRIPTS_AND_SUBSCRIPTS,
/** */
U_CURRENCY_SYMBOLS,
/** */
U_COMBINING_MARKS_FOR_SYMBOLS,
/** */
U_LETTERLIKE_SYMBOLS,
/** */
U_NUMBER_FORMS,
/** */
U_ARROWS,
/** */
U_MATHEMATICAL_OPERATORS,
/** */
U_MISCELLANEOUS_TECHNICAL,
/** */
U_CONTROL_PICTURES,
/** */
U_OPTICAL_CHARACTER_RECOGNITION,
/** */
U_ENCLOSED_ALPHANUMERICS,
/** */
U_BOX_DRAWING,
/** */
U_BLOCK_ELEMENTS,
/** */
U_GEOMETRIC_SHAPES,
/** */
U_MISCELLANEOUS_SYMBOLS,
/** */
U_DINGBATS,
/** */
U_BRAILLE_PATTERNS,
/** */
U_CJK_RADICALS_SUPPLEMENT,
/** */
U_KANGXI_RADICALS,
/** */
U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
/** */
U_CJK_SYMBOLS_AND_PUNCTUATION,
/** */
U_HIRAGANA,
/** */
U_KATAKANA,
/** */
U_BOPOMOFO,
/** */
U_HANGUL_COMPATIBILITY_JAMO,
/** */
U_KANBUN,
/** */
U_BOPOMOFO_EXTENDED,
/** */
U_ENCLOSED_CJK_LETTERS_AND_MONTHS,
/** */
U_CJK_COMPATIBILITY,
/** */
U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
/** */
U_CJK_UNIFIED_IDEOGRAPHS,
/** */
U_YI_SYLLABLES,
/** */
U_YI_RADICALS,
/** */
U_HANGUL_SYLLABLES,
/** */
U_HIGH_SURROGATES,
/** */
U_HIGH_PRIVATE_USE_SURROGATES,
/** */
U_LOW_SURROGATES,
/** */
U_PRIVATE_USE_AREA,
/** */
U_CJK_COMPATIBILITY_IDEOGRAPHS,
/** */
U_ALPHABETIC_PRESENTATION_FORMS,
/** */
U_ARABIC_PRESENTATION_FORMS_A,
/** */
U_COMBINING_HALF_MARKS,
/** */
U_CJK_COMPATIBILITY_FORMS,
/** */
U_SMALL_FORM_VARIANTS,
/** */
U_ARABIC_PRESENTATION_FORMS_B,
/** */
U_SPECIALS,
/** */
U_HALFWIDTH_AND_FULLWIDTH_FORMS,
/** */
U_CHAR_SCRIPT_COUNT,
/** */
U_NO_SCRIPT=U_CHAR_SCRIPT_COUNT
};
typedef enum UCharScript UCharScript;
/**
* Values returned by the u_getCellWidth() function.
*/
enum UCellWidth
{
/** */
U_ZERO_WIDTH = 0,
/** */
U_HALF_WIDTH = 1,
/** */
U_FULL_WIDTH = 2,
/** */
U_NEUTRAL_WIDTH = 3,
/** */
U_CELL_WIDTH_COUNT
};
typedef enum UCellWidth UCellWidth;
/**
* Selector constants for u_charName().
* <code>u_charName() returns either the "modern" name of a
* Unicode character or the name that was defined in
* Unicode version 1.0, before the Unicode standard merged
* with ISO-10646.
*
* @see u_charName()
*/
enum UCharNameChoice {
U_UNICODE_CHAR_NAME,
U_UNICODE_10_CHAR_NAME,
U_CHAR_NAME_CHOICE_COUNT
};
typedef enum UCharNameChoice UCharNameChoice;
/**
* Functions to classify characters.
*/
/**
* Determines whether the specified UChar is a lowercase character
* according to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the character is lowercase; false otherwise.
* @see UNICODE_VERSION
* @see u_isupper()
* @see u_istitle()
* @see u_islower()
*/
U_CAPI bool_t U_EXPORT2
u_islower(UChar c);
/**
* Determines whether the specified character is an uppercase character
* according to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the character is uppercase; false otherwise.
* @see u_islower()
* @see u_istitle
* @see u_tolower()
*/
U_CAPI bool_t U_EXPORT2
u_isupper(UChar c);
/**
* Determines whether the specified character is a titlecase character
* according to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the character is titlecase; false otherwise.
* @see u_isupper()
* @see u_islower()
* @see u_totitle()
*/
U_CAPI bool_t U_EXPORT2
u_istitle(UChar c);
/**
* Determines whether the specified character is a digit according to Unicode
* 2.1.2.
*
* @param ch the character to be tested
* @return true if the character is a digit; false otherwise.
*/
U_CAPI bool_t U_EXPORT2
u_isdigit(UChar c);
/**
* Determines whether the specified character is an alphanumeric character
* (letter or digit)according to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the character is a letter or a digit; false otherwise.
*/
U_CAPI bool_t U_EXPORT2
u_isalnum(UChar c);
/**
* Determines whether the specified numeric value is actually a defined character
* according to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the character has a defined Unicode meaning; false otherwise.
*
* @see u_isdigit()
* @see u_isalpha()
* @see u_isalnum()
* @see u_isupper()
* @see u_islower()
* @see u_istitle()
*/
U_CAPI bool_t U_EXPORT2
u_isdefined(UChar c);
/**
* Determines whether the specified character is a letter
* according to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the character is a letter; false otherwise.
*
* @see u_isdigit()
* @see u_isalnum()
*/
U_CAPI bool_t U_EXPORT2
u_isalpha(UChar c);
/**
* Determines if the specified character is a space character or not.
*
* @param ch the character to be tested
* @return true if the character is a space character; false otherwise.
*/
U_CAPI bool_t U_EXPORT2
u_isspace(UChar c);
/**
* Determines whether the specified character is a control character or not.
*
* @param ch the character to be tested
* @return true if the Unicode character is a control character; false otherwise.
*
* @see u_isprint()
*/
U_CAPI bool_t U_EXPORT2
u_iscntrl(UChar c);
/**
* Determines whether the specified character is a printable character according
* to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the Unicode character is a printable character; false otherwise.
*
* @see u_iscntrl()
*/
U_CAPI bool_t U_EXPORT2
u_isprint(UChar c);
/**
* Determines whether the specified character is of the base form according
* to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the Unicode character is of the base form; false otherwise.
*
* @see u_isalpha()
* @see u_isdigit()
*/
U_CAPI bool_t U_EXPORT2
u_isbase(UChar c);
/**
* Returns the linguistic direction property of a character.
* <P>
* Returns the linguistic direction property of a character.
* For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
* property.
* @see UCharDirection
*/
U_CAPI UCharDirection U_EXPORT2
u_charDirection(UChar c);
/**
* Returns a value indicating the display-cell width of the character
* when used in Asian text, according to the Unicode standard (see p. 6-130
* of The Unicode Standard, Version 2.0). The results for various characters
* are as follows:
* <P>
* ZERO_WIDTH: Characters which are considered to take up no display-cell space:
* control characters
* format characters
* line and paragraph separators
* non-spacing marks
* combining Hangul jungseong
* combining Hangul jongseong
* unassigned Unicode values
* <P>
* HALF_WIDTH: Characters which take up half a cell in standard Asian text:
* all characters in the General Scripts Area except combining Hangul choseong
* and the characters called out specifically above as ZERO_WIDTH
* alphabetic and Arabic presentation forms
* halfwidth CJK punctuation
* halfwidth Katakana
* halfwidth Hangul Jamo
* halfwidth forms, arrows, and shapes
* <P>
* FULL_WIDTH: Characters which take up a full cell in standard Asian text:
* combining Hangul choseong
* all characters in the CJK Phonetics and Symbols Area
* all characters in the CJK Ideographs Area
* all characters in the Hangul Syllables Area
* CJK compatibility ideographs
* CJK compatibility forms
* small form variants
* fullwidth ASCII
* fullwidth punctuation and currency signs
* <P>
* NEUTRAL: Characters whose cell width is context-dependent:
* all characters in the Symbols Area, except those specifically called out above
* all characters in the Surrogates Area
* all charcaters in the Private Use Area
* <P>
* For Korean text, this algorithm should work properly with properly normalized Korean
* text. Precomposed Hangul syllables and non-combining jamo are all considered full-
* width characters. For combining jamo, we treat we treat choseong (initial consonants)
* as double-width characters and junseong (vowels) and jongseong (final consonants)
* as non-spacing marks. This will work right in text that uses the precomposed
* choseong characters instead of teo choseong characters in a row, and which uses the
* choseong filler character at the beginning of syllables that don't have an initial
* consonant. The results may be slightly off with Korean text following different
* conventions.
*/
U_CAPI uint16_t U_EXPORT2
u_charCellWidth(UChar c);
/**
* Returns a value indicating a character category according to Unicode
* 2.1.2.
* @param c the character to be tested
* @return a value of type int, the character category.
* @see UCharCategory
*/
U_CAPI int8_t U_EXPORT2
u_charType(UChar c);
/**
* Retrives the decimal numeric value of a digit character.
*
* @param c the digit character for which to get the numeric value
* @return the numeric value of ch in decimal radix. This method returns
* -1 if ch is not a valid digit character.
*/
U_CAPI int32_t U_EXPORT2
u_charDigitValue(UChar c);
/**
* Returns the script associated with a character.
*
* @see #UCharScript
*/
U_CAPI UCharScript U_EXPORT2
u_charScript(UChar ch);
/**
* Retrieve the name of a Unicode character.
* Depending on <code>nameChoice</code>, the character name written
* into the buffer is the "modern" name or the name that was defined
* in Unicode version 1.0.
* The name contains only "invariant" characters
* like A-Z, 0-9, space, and '-'.
*
* @param code The character (code point) for which to get the name.
* It must be <code>0&lt;=code&lt;0x10ffff</code>.
* @param nameChoice Selector for which name to get.
* @param buffer Destination address for copying the name.
* @param bufferLength <code>==sizeof(buffer)</code>
* @param pErrorCode Pointer to a UErrorCode variable;
* check for <code>U_SUCCESS()</code> after <code>u_charName()</code>
* returns.
*
* @see UCharNameChoice
*/
U_CAPI UTextOffset U_EXPORT2
u_charName(uint32_t code, UCharNameChoice nameChoice,
char *buffer, UTextOffset bufferLength,
UErrorCode *pErrorCode);
/**
* The following functions are java specific.
*/
/**
* A convenience method for determining if a Unicode character
* is allowed to start in a Unicode identifier.
* A character may start a Unicode identifier if and only if
* it is a letter.
*
* @param c the Unicode character.
* @return TRUE if the character may start a Unicode identifier;
* FALSE otherwise.
* @see u_isalpha
* @see u_isIDPart
*/
U_CAPI bool_t U_EXPORT2
u_isIDStart(UChar c);
/**
* A convenience method for determining if a Unicode character
* may be part of a Unicode identifier other than the starting
* character.
* <P>
* A character may be part of a Unicode identifier if and only if
* it is one of the following:
* <ul>
* <li> a letter
* <li> a connecting punctuation character (such as "_").
* <li> a digit
* <li> a numeric letter (such as a Roman numeral character)
* <li> a combining mark
* <li> a non-spacing mark
* <li> an ignorable control character
* </ul>
*
* @param c the Unicode character.
* @return TRUE if the character may be part of a Unicode identifier;
* FALSE otherwise.
* @see u_isIDIgnorable
* @see u_isIDStart
*/
U_CAPI bool_t U_EXPORT2
u_isIDPart(UChar c);
/**
* A convenience method for determining if a Unicode character
* should be regarded as an ignorable character
* in a Unicode identifier.
* <P>
* The following Unicode characters are ignorable in a
* Unicode identifier:
* <table>
* <tr><td>0x0000 through 0x0008,</td>
* <td>ISO control characters that</td></tr>
* <tr><td>0x000E through 0x001B,</td> <td>are not whitespace</td></tr>
* <tr><td>and 0x007F through 0x009F</td></tr>
* <tr><td>0x200C through 0x200F</td> <td>join controls</td></tr>
* <tr><td>0x200A through 0x200E</td> <td>bidirectional controls</td></tr>
* <tr><td>0x206A through 0x206F</td> <td>format controls</td></tr>
* <tr><td>0xFEFF</td> <td>zero-width no-break space</td></tr>
* </table>
*
* @param c the Unicode character.
* @return TRUE if the character may be part of a Unicode identifier;
* FALSE otherwise.
* @see u_isIDPart
*/
U_CAPI bool_t U_EXPORT2
u_isIDIgnorable(UChar c);
/**
* A convenience method for determining if a Unicode character
* is allowed as the first character in a Java identifier.
* <P>
* A character may start a Java identifier if and only if
* it is one of the following:
* <ul>
* <li> a letter
* <li> a currency symbol (such as "$")
* <li> a connecting punctuation symbol (such as "_").
* </ul>
*
* @param c the Unicode character.
* @return TRUE if the character may start a Java identifier;
* FALSE otherwise.
* @see u_isJavaIDPart
* @see u_isalpha
* @see u_isIDStart
*/
U_CAPI bool_t U_EXPORT2
u_isJavaIDStart(UChar c);
/**
* A convenience method for determining if a Unicode character
* may be part of a Java identifier other than the starting
* character.
* <P>
* A character may be part of a Java identifier if and only if
* it is one of the following:
* <ul>
* <li> a letter
* <li> a currency symbol (such as "$")
* <li> a connecting punctuation character (such as "_").
* <li> a digit
* <li> a numeric letter (such as a Roman numeral character)
* <li> a combining mark
* <li> a non-spacing mark
* <li> an ignorable control character
* </ul>
*
* @param c the Unicode character.
* @return TRUE if the character may be part of a Unicode identifier;
* FALSE otherwise.
* @see u_isIDIgnorable
* @see u_isJavaIDStart
* @see u_isalpha
* @see u_isdigit
* @see u_isIDPart
*/
U_CAPI bool_t U_EXPORT2
u_isJavaIDPart(UChar c);
/**
* Functions to change character case.
*/
/**
* The given character is mapped to its lowercase equivalent according to
* Unicode 2.1.2; if the character has no lowercase equivalent, the character
* itself is returned.
* <P>
* A character has a lowercase equivalent if and only if a lowercase mapping
* is specified for the character in the Unicode 2.1.2 attribute table.
* <P>
* u_tolower() only deals with the general letter case conversion.
* For language specific case conversion behavior, use ustrToUpper().
* For example, the case conversion for dot-less i and dotted I in Turkish,
* or for final sigma in Greek.
*
* @param ch the character to be converted
* @return the lowercase equivalent of the character, if any;
* otherwise the character itself.
*/
U_CAPI UChar U_EXPORT2
u_tolower(UChar c);
/**
* The given character is mapped to its uppercase equivalent according to Unicode
* 2.1.2; if the character has no uppercase equivalent, the character itself is
* returned.
* <P>
* u_toupper() only deals with the general letter case conversion.
* For language specific case conversion behavior, use ustrToUpper().
* For example, the case conversion for dot-less i and dotted I in Turkish,
* or ess-zed (i.e., "sharp S") in German.
*
* @param ch the character to be converted
* @return the uppercase equivalent of the character, if any;
* otherwise the character itself.
*/
U_CAPI UChar U_EXPORT2
u_toupper(UChar c);
/**
* The given character is mapped to its titlecase equivalent according to Unicode
* 2.1.2. There are only four Unicode characters that are truly titlecase forms
* that are distinct from uppercase forms. As a rule, if a character has no
* true titlecase equivalent, its uppercase equivalent is returned.
* <P>
* A character has a titlecase equivalent if and only if a titlecase mapping
* is specified for the character in the Unicode 2.1.2 data.
*
* @param ch the character to be converted
* @return the titlecase equivalent of the character, if any;
* otherwise the character itself.
*/
U_CAPI UChar U_EXPORT2
u_totitle(UChar c);
/**
* The function is used to get the Unicode standard Version that is used.
*
* @return the Unicode stabdard Version number
*/
U_CAPI const char* U_EXPORT2
u_getVersion(void);
#endif /*_UCHAR*/
/*eof*/
#error Please include unicode/uchar.h instead

View file

@ -5,7 +5,7 @@
*******************************************************************************
*/
#include "uchriter.h"
#include "unicode/uchriter.h"
UCharCharacterIterator::UCharCharacterIterator(const UChar* text,
int32_t textLength)

View file

@ -1,142 +1 @@
/*
**********************************************************************
* Copyright (C) 1998-1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
#ifndef UCHRITER_H
#define UCHRITER_H
#include "utypes.h"
#include "chariter.h"
/**
* A concrete subclass of CharacterIterator that iterates over the
* characters in a UnicodeString. It's possible not only to create an
* iterator that iterates over an entire UnicodeString, but also to
* create only that iterates over only a subrange of a UnicodeString
* (iterators over different subranges of the same UnicodeString don't
* compare equal). */
class U_COMMON_API UCharCharacterIterator : public CharacterIterator {
public:
/**
* Create an iterator over the UnicodeString referred to by "text".
* The iteration range is the whole string, and the starting
* position is 0. */
UCharCharacterIterator(const UChar* text, int32_t len);
/**
* Copy constructor. The new iterator iterates over the same range
* of the same string as "that", and its initial position is the
* same as "that"'s current position. */
UCharCharacterIterator(const UCharCharacterIterator& that);
/**
* Destructor. */
~UCharCharacterIterator();
/**
* Assignment operator. *this is altered to iterate over the sane
* range of the same string as "that", and refers to the same
* character within that string as "that" does. */
UCharCharacterIterator&
operator=(const UCharCharacterIterator& that);
/**
* Returns true if the iterators iterate over the same range of the
* same string and are pointing at the same character. */
virtual bool_t operator==(const CharacterIterator& that) const;
/**
* Generates a hash code for this iterator. */
virtual int32_t hashCode(void) const;
/**
* Returns a new StringCharacterIterator referring to the same
* character in the same range of the same string as this one. The
* caller must delete the new iterator. */
virtual CharacterIterator* clone(void) const;
/**
* Sets the iterator to refer to the first character in its
* iteration range, and returns that character, */
virtual UChar first(void);
/**
* Sets the iterator to refer to the last character in its iteration
* range, and returns that character. */
virtual UChar last(void);
/**
* Sets the iterator to refer to the "position"-th character in the
* UnicodeString the iterator refers to, and returns that character.
* If the index is outside the iterator's iteration range, the
* behavior of the iterator is undefined. */
virtual UChar setIndex(UTextOffset pos);
/**
* Returns the character the iterator currently refers to. */
virtual UChar current(void) const;
/**
* Advances to the next character in the iteration range (toward
* last()), and returns that character. If there are no more
* characters to return, returns DONE. */
virtual UChar next(void);
/**
* Advances to the previous character in the iteration rance (toward
* first()), and returns that character. If there are no more
* characters to return, returns DONE. */
virtual UChar previous(void);
/**
* Returns the numeric index of the first character in this
* iterator's iteration range. */
virtual UTextOffset startIndex(void) const;
/**
* Returns the numeric index of the character immediately BEYOND the
* last character in this iterator's iteration range. */
virtual UTextOffset endIndex(void) const;
/**
* Returns the numeric index in the underlying UnicodeString of the
* character the iterator currently refers to (i.e., the character
* returned by current()). */
virtual UTextOffset getIndex(void) const;
/**
* Copies the UnicodeString under iteration into the UnicodeString
* referred to by "result". Even if this iterator iterates across
* only a part of this string, the whole string is copied. @param
* result Receives a copy of the text under iteration. */
virtual void getText(UnicodeString& result);
/**
* Return a class ID for this object (not really public) */
virtual UClassID getDynamicClassID(void) const
{ return getStaticClassID(); }
/**
* Return a class ID for this class (not really public) */
static UClassID getStaticClassID(void)
{ return (UClassID)(&fgClassID); }
private:
UCharCharacterIterator();
const UChar* text;
UTextOffset pos;
UTextOffset begin;
UTextOffset end;
static char fgClassID;
};
#endif
#error Please include unicode/uchriter.h instead

View file

@ -33,7 +33,7 @@
#define arrayRegionMatches(source, sourceStart, target, targetStart, len) (icu_memcmp(&source[sourceStart], &target[targetStart], len * sizeof(int16_t)) != 0)
#define arrayRegionMatches(source, sourceStart, target, targetStart, len) (uprv_memcmp(&source[sourceStart], &target[targetStart], len * sizeof(int16_t)) != 0)
/* internal constants*/
#define UCMP16_kMaxUnicode_int 65535
@ -78,7 +78,7 @@ int32_t ucmp16_getkBlockCount()
CompactShortArray* ucmp16_open(int16_t defaultValue)
{
int32_t i;
CompactShortArray* this_obj = (CompactShortArray*) icu_malloc(sizeof(CompactShortArray));
CompactShortArray* this_obj = (CompactShortArray*) uprv_malloc(sizeof(CompactShortArray));
if (this_obj == NULL) return NULL;
this_obj->fStructSize = sizeof(CompactShortArray);
@ -91,17 +91,17 @@ CompactShortArray* ucmp16_open(int16_t defaultValue)
this_obj->fHashes = NULL;
this_obj->fDefaultValue = defaultValue;
this_obj->fArray = (int16_t*)icu_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
this_obj->fArray = (int16_t*)uprv_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
if (this_obj->fArray == NULL)
{
this_obj->fBogus = TRUE;
return NULL;
}
this_obj->fIndex = (uint16_t*)icu_malloc(UCMP16_kIndexCount * sizeof(uint16_t));
this_obj->fIndex = (uint16_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(uint16_t));
if (this_obj->fIndex == NULL)
{
icu_free(this_obj->fArray);
uprv_free(this_obj->fArray);
this_obj->fArray = NULL;
this_obj->fBogus = TRUE;
@ -115,11 +115,11 @@ CompactShortArray* ucmp16_open(int16_t defaultValue)
this_obj->fArray[i] = defaultValue;
}
this_obj->fHashes =(int32_t*)icu_malloc(UCMP16_kIndexCount * sizeof(int32_t));
this_obj->fHashes =(int32_t*)uprv_malloc(UCMP16_kIndexCount * sizeof(int32_t));
if (this_obj->fHashes == NULL)
{
icu_free(this_obj->fArray);
icu_free(this_obj->fIndex);
uprv_free(this_obj->fArray);
uprv_free(this_obj->fIndex);
this_obj->fBogus = TRUE;
return NULL;
}
@ -138,7 +138,7 @@ CompactShortArray* ucmp16_openAdopt(uint16_t *indexArray,
int32_t count,
int16_t defaultValue)
{
CompactShortArray* this_obj = (CompactShortArray*) icu_malloc(sizeof(CompactShortArray));
CompactShortArray* this_obj = (CompactShortArray*) uprv_malloc(sizeof(CompactShortArray));
if (this_obj == NULL) return NULL;
this_obj->fHashes = NULL;
this_obj->fCount = count;
@ -179,7 +179,7 @@ CompactShortArray* ucmp16_openAlias(uint16_t *indexArray,
int32_t count,
int16_t defaultValue)
{
CompactShortArray* this_obj = (CompactShortArray*) icu_malloc(sizeof(CompactShortArray));
CompactShortArray* this_obj = (CompactShortArray*) uprv_malloc(sizeof(CompactShortArray));
if (this_obj == NULL) return NULL;
this_obj->fHashes = NULL;
this_obj->fCount = count;
@ -202,16 +202,16 @@ void ucmp16_close(CompactShortArray* this_obj)
if(this_obj != NULL) {
if(!this_obj->fAlias) {
if(this_obj->fArray != NULL) {
icu_free(this_obj->fArray);
uprv_free(this_obj->fArray);
}
if(this_obj->fIndex != NULL) {
icu_free(this_obj->fIndex);
uprv_free(this_obj->fIndex);
}
}
if(this_obj->fHashes != NULL) {
icu_free(this_obj->fHashes);
uprv_free(this_obj->fHashes);
}
icu_free(this_obj);
uprv_free(this_obj);
}
}
@ -219,13 +219,13 @@ CompactShortArray* setToBogus(CompactShortArray* this_obj)
{
if(this_obj != NULL) {
if(!this_obj->fAlias) {
icu_free(this_obj->fArray);
uprv_free(this_obj->fArray);
this_obj->fArray = NULL;
icu_free(this_obj->fIndex);
uprv_free(this_obj->fIndex);
this_obj->fIndex = NULL;
}
icu_free(this_obj->fHashes);
uprv_free(this_obj->fHashes);
this_obj->fHashes = NULL;
this_obj->fCount = 0;
@ -242,7 +242,7 @@ void ucmp16_expand(CompactShortArray* this_obj)
if (this_obj->fCompact)
{
int32_t i;
int16_t *tempArray = (int16_t*)icu_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
int16_t *tempArray = (int16_t*)uprv_malloc(UCMP16_kUnicodeCount * sizeof(int16_t));
if (tempArray == NULL)
{
@ -260,7 +260,7 @@ void ucmp16_expand(CompactShortArray* this_obj)
this_obj->fIndex[i] = (uint16_t)(i<<this_obj->kBlockShift);
}
icu_free(this_obj->fArray);
uprv_free(this_obj->fArray);
this_obj->fArray = tempArray;
this_obj->fCompact = FALSE;
}
@ -357,7 +357,7 @@ void ucmp16_compact(CompactShortArray* this_obj)
if (this_obj->fIndex[i] == 0xFFFF)
{
/* we didn't match, so copy & update*/
icu_memcpy(&(this_obj->fArray[jBlockStart]),
uprv_memcpy(&(this_obj->fArray[jBlockStart]),
&(this_obj->fArray[iBlockStart]),
(1 << this_obj->kBlockShift)*sizeof(int16_t));
@ -378,14 +378,14 @@ void ucmp16_compact(CompactShortArray* this_obj)
/* we are done compacting, so now make the array shorter*/
{
int32_t newSize = limitCompacted * (1 << this_obj->kBlockShift);
int16_t *result = (int16_t*) icu_malloc(sizeof(int16_t) * newSize);
int16_t *result = (int16_t*) uprv_malloc(sizeof(int16_t) * newSize);
icu_memcpy(result, this_obj->fArray, newSize * sizeof(int16_t));
uprv_memcpy(result, this_obj->fArray, newSize * sizeof(int16_t));
icu_free(this_obj->fArray);
uprv_free(this_obj->fArray);
this_obj->fArray = result;
this_obj->fCount = newSize;
icu_free(this_obj->fHashes);
uprv_free(this_obj->fHashes);
this_obj->fHashes = NULL;
this_obj->fCompact = TRUE;
@ -453,7 +453,7 @@ U_CAPI CompactShortArray * U_EXPORT2 ucmp16_cloneFromData(const uint8_t **sourc
}
array = (CompactShortArray*)malloc(sizeof(*array));
icu_memcpy(array,*source, sizeof(*array));
uprv_memcpy(array,*source, sizeof(*array));
*source += array->fStructSize;

View file

@ -19,7 +19,7 @@
#define UCMP16_H
#include "utypes.h"
#include "unicode/utypes.h"
@ -66,7 +66,7 @@
* @see CompactIntArray
* @see CompactCharArray
* @see CompactStringArray
* @version $Revision: 1.5 $ 8/25/98
* @version $Revision: 1.6 $ 8/25/98
* @author Helena Shih
*/

View file

@ -48,9 +48,9 @@ char c;
if (this_obj->fCount != newCount)
{
this_obj->fCount = newCount;
icu_free(this_obj->fArray);
uprv_free(this_obj->fArray);
this_obj->fArray = 0;
this_obj->fArray = (int32_t*)icu_malloc(this_obj->fCount * sizeof(int32_t));
this_obj->fArray = (int32_t*)uprv_malloc(this_obj->fCount * sizeof(int32_t));
if (!this_obj->fArray) {
this_obj->fBogus = TRUE;
return;
@ -60,16 +60,16 @@ char c;
T_FileStream_read(is, &len, sizeof(len));
if (len == 0)
{
icu_free(this_obj->fIndex);
uprv_free(this_obj->fIndex);
this_obj->fIndex = 0;
}
else if (len == UCMP32_kIndexCount)
{
if (this_obj->fIndex == 0)
this_obj->fIndex =(uint16_t*)icu_malloc(UCMP32_kIndexCount * sizeof(uint16_t));
this_obj->fIndex =(uint16_t*)uprv_malloc(UCMP32_kIndexCount * sizeof(uint16_t));
if (!this_obj->fIndex) {
this_obj->fBogus = TRUE;
icu_free(this_obj->fArray);
uprv_free(this_obj->fArray);
this_obj->fArray = 0;
return;
}
@ -123,7 +123,7 @@ CompactIntArray* ucmp32_open(int32_t defaultValue)
uint16_t i;
int32_t *p, *p_end;
uint16_t *q, *q_end;
CompactIntArray* this_obj = (CompactIntArray*) icu_malloc(sizeof(CompactIntArray));
CompactIntArray* this_obj = (CompactIntArray*) uprv_malloc(sizeof(CompactIntArray));
if (this_obj == NULL) return NULL;
this_obj->fCount = UCMP32_kUnicodeCount;
@ -152,15 +152,15 @@ CompactIntArray* ucmp32_open(int32_t defaultValue)
* to data position number 8, which has elements "bced". In the compressed
* version, index# 2 points to data position 1, which also has "bced"
*/
this_obj->fArray = (int32_t*)icu_malloc(UCMP32_kUnicodeCount * sizeof(int32_t));
this_obj->fArray = (int32_t*)uprv_malloc(UCMP32_kUnicodeCount * sizeof(int32_t));
if (this_obj->fArray == NULL) {
this_obj->fBogus = TRUE;
return NULL;
}
this_obj->fIndex = (uint16_t*)icu_malloc(UCMP32_kIndexCount * sizeof(uint16_t));
this_obj->fIndex = (uint16_t*)uprv_malloc(UCMP32_kIndexCount * sizeof(uint16_t));
if (!this_obj->fIndex) {
icu_free(this_obj->fArray);
uprv_free(this_obj->fArray);
this_obj->fArray = NULL;
this_obj->fBogus = TRUE;
return NULL;
@ -182,7 +182,7 @@ CompactIntArray* ucmp32_open(int32_t defaultValue)
CompactIntArray* ucmp32_openAdopt(uint16_t *indexArray, int32_t *newValues, int32_t count)
{
CompactIntArray* this_obj = (CompactIntArray*) icu_malloc(sizeof(CompactIntArray));
CompactIntArray* this_obj = (CompactIntArray*) uprv_malloc(sizeof(CompactIntArray));
if (this_obj == NULL) return NULL;
this_obj->fCount = count;
this_obj->fBogus = FALSE;
@ -198,12 +198,12 @@ void ucmp32_close( CompactIntArray* this_obj)
{
if(this_obj != NULL) {
if(this_obj->fArray != NULL) {
icu_free(this_obj->fArray);
uprv_free(this_obj->fArray);
}
if(this_obj->fIndex != NULL) {
icu_free(this_obj->fIndex);
uprv_free(this_obj->fIndex);
}
icu_free(this_obj);
uprv_free(this_obj);
}
}
@ -229,7 +229,7 @@ void ucmp32_expand(CompactIntArray* this_obj) {
int32_t i;
int32_t* tempArray;
if (this_obj->fCompact) {
tempArray = (int32_t*)icu_malloc(UCMP32_kUnicodeCount * sizeof(int32_t));
tempArray = (int32_t*)uprv_malloc(UCMP32_kUnicodeCount * sizeof(int32_t));
if (tempArray == NULL) {
this_obj->fBogus = TRUE;
return;
@ -240,7 +240,7 @@ void ucmp32_expand(CompactIntArray* this_obj) {
for (i = 0; i < UCMP32_kIndexCount; ++i) {
this_obj->fIndex[i] = (uint16_t)(i<<UCMP32_kBlockShift);
}
icu_free(this_obj->fArray);
uprv_free(this_obj->fArray);
this_obj->fArray = tempArray;
this_obj->fCompact = FALSE;
}
@ -345,7 +345,7 @@ void ucmp32_compact(CompactIntArray* this_obj, int32_t cycle) {
cycle = UCMP32_kBlockCount;
/* make temp storage, larger than we need*/
tempIndex =(UChar*)icu_malloc(UCMP32_kUnicodeCount * sizeof(uint32_t));
tempIndex =(UChar*)uprv_malloc(UCMP32_kUnicodeCount * sizeof(uint32_t));
if (tempIndex == NULL) {
this_obj->fBogus = TRUE;
return;
@ -381,23 +381,23 @@ void ucmp32_compact(CompactIntArray* this_obj, int32_t cycle) {
/* now allocate and copy the items into the array*/
tempArray = (int32_t*)icu_malloc(tempIndexCount * sizeof(uint32_t));
tempArray = (int32_t*)uprv_malloc(tempIndexCount * sizeof(uint32_t));
if (tempArray == NULL) {
this_obj->fBogus = TRUE;
icu_free(tempIndex);
uprv_free(tempIndex);
return;
}
for (iIndex = 0; iIndex < tempIndexCount; ++iIndex) {
tempArray[iIndex] = this_obj->fArray[tempIndex[iIndex]];
}
icu_free(this_obj->fArray);
uprv_free(this_obj->fArray);
this_obj->fArray = tempArray;
this_obj->fCount = tempIndexCount;
/* free up temp storage*/
icu_free(tempIndex);
uprv_free(tempIndex);
this_obj->fCompact = TRUE;
#ifdef _DEBUG

View file

@ -20,7 +20,7 @@
#define UCMP32_H
#include "utypes.h"
#include "unicode/utypes.h"
#include "filestrm.h"
@ -75,7 +75,7 @@
* @see CompactIntArray
* @see CompactCharArray
* @see CompactStringArray
* @version $Revision: 1.5 $ 8/25/98
* @version $Revision: 1.6 $ 8/25/98
* @author Helena Shih
*/
/*====================================*/

View file

@ -54,7 +54,7 @@ CompactByteArray* ucmp8_open(int8_t defaultValue)
* to data position number 8, which has elements "bced". In the compressed
* version, index# 2 points to data position 1, which also has "bced"
*/
CompactByteArray* this_obj = (CompactByteArray*) icu_malloc(sizeof(CompactByteArray));
CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
int32_t i;
if (this_obj == NULL) return NULL;
@ -68,16 +68,16 @@ CompactByteArray* ucmp8_open(int8_t defaultValue)
this_obj->fAlias = FALSE;
this_obj->fArray = (int8_t*) icu_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
this_obj->fArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
if (!this_obj->fArray)
{
this_obj->fBogus = TRUE;
return NULL;
}
this_obj->fIndex = (uint16_t*) icu_malloc(sizeof(uint16_t) * UCMP8_kIndexCount);
this_obj->fIndex = (uint16_t*) uprv_malloc(sizeof(uint16_t) * UCMP8_kIndexCount);
if (!this_obj->fIndex)
{
icu_free(this_obj->fArray);
uprv_free(this_obj->fArray);
this_obj->fArray = NULL;
this_obj->fBogus = TRUE;
return NULL;
@ -98,7 +98,7 @@ CompactByteArray* ucmp8_openAdopt(uint16_t *indexArray,
int8_t *newValues,
int32_t count)
{
CompactByteArray* this_obj = (CompactByteArray*) icu_malloc(sizeof(CompactByteArray));
CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
if (!this_obj) return NULL;
this_obj->fArray = NULL;
@ -118,7 +118,7 @@ CompactByteArray* ucmp8_openAlias(uint16_t *indexArray,
int8_t *newValues,
int32_t count)
{
CompactByteArray* this_obj = (CompactByteArray*) icu_malloc(sizeof(CompactByteArray));
CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
if (!this_obj) return NULL;
this_obj->fArray = NULL;
@ -141,13 +141,13 @@ void ucmp8_close(CompactByteArray* this_obj)
if(this_obj != NULL) {
if(!this_obj->fAlias) {
if(this_obj->fArray != NULL) {
icu_free(this_obj->fArray);
uprv_free(this_obj->fArray);
}
if(this_obj->fIndex != NULL) {
icu_free(this_obj->fIndex);
uprv_free(this_obj->fIndex);
}
}
icu_free(this_obj);
uprv_free(this_obj);
}
}
@ -173,7 +173,7 @@ void ucmp8_expand(CompactByteArray* this_obj)
if (this_obj->fCompact)
{
int8_t* tempArray;
tempArray = (int8_t*) icu_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
tempArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
if (!tempArray)
{
this_obj->fBogus = TRUE;
@ -187,7 +187,7 @@ void ucmp8_expand(CompactByteArray* this_obj)
{
this_obj->fIndex[i] = (uint16_t)(i<< UCMP8_kBlockShift);
}
icu_free(this_obj->fArray);
uprv_free(this_obj->fArray);
this_obj->fArray = tempArray;
this_obj->fCompact = FALSE;
this_obj->fAlias = FALSE;
@ -321,7 +321,7 @@ ucmp8_compact(CompactByteArray* this_obj,
else if (cycle > (uint32_t)UCMP8_kBlockCount) cycle = UCMP8_kBlockCount;
/* make temp storage, larger than we need*/
tempIndex = (UChar*) icu_malloc(sizeof(UChar)* UCMP8_kUnicodeCount);
tempIndex = (UChar*) uprv_malloc(sizeof(UChar)* UCMP8_kUnicodeCount);
if (!tempIndex)
{
this_obj->fBogus = TRUE;
@ -364,24 +364,24 @@ ucmp8_compact(CompactByteArray* this_obj,
} /* endfor (iBlock = 1.....)*/
/* now allocate and copy the items into the array*/
tempArray = (int8_t*) icu_malloc(tempIndexCount * sizeof(int8_t));
tempArray = (int8_t*) uprv_malloc(tempIndexCount * sizeof(int8_t));
if (!tempArray)
{
this_obj->fBogus = TRUE;
icu_free(tempIndex);
uprv_free(tempIndex);
return;
}
for (iIndex = 0; iIndex < tempIndexCount; ++iIndex)
{
tempArray[iIndex] = this_obj->fArray[tempIndex[iIndex]];
}
icu_free(this_obj->fArray);
uprv_free(this_obj->fArray);
this_obj->fArray = tempArray;
this_obj->fCount = tempIndexCount;
/* free up temp storage*/
icu_free(tempIndex);
uprv_free(tempIndex);
this_obj->fCompact = TRUE;
} /* endif (!this_obj->fCompact)*/
}
@ -407,7 +407,7 @@ U_CAPI CompactByteArray * U_EXPORT2 ucmp8_cloneFromData(const uint8_t **source,
}
array = (CompactByteArray*)malloc(sizeof(*array));
icu_memcpy(array,*source, sizeof(*array));
uprv_memcpy(array,*source, sizeof(*array));
array->fAlias = TRUE;

View file

@ -12,7 +12,7 @@
#define UCMP8_H
#include "utypes.h"
#include "unicode/utypes.h"
/*====================================*/
/* class CompactByteArray

View file

@ -21,20 +21,20 @@
* 04/04/99 helena Fixed internal header inclusion.
*/
#include "umutex.h"
#include "ures.h"
#include "unicode/ures.h"
#include "uhash.h"
#include "ucmp16.h"
#include "ucmp8.h"
#include "ucnv_bld.h"
#include "unicode/ucnv_bld.h"
#include "ucnv_io.h"
#include "ucnv_err.h"
#include "unicode/ucnv_err.h"
#include "ucnv_cnv.h"
#include "ucnv_imp.h"
#include "ucnv.h"
#include "unicode/ucnv.h"
#include "cmemory.h"
#include "cstring.h"
#include "ustring.h"
#include "uloc.h"
#include "unicode/ustring.h"
#include "unicode/uloc.h"
#define CHUNK_SIZE 5*1024
@ -207,8 +207,8 @@ UConverter* ucnv_openCCSID (int32_t codepage,
return NULL;
copyPlatformString (myName, platform);
icu_strcat (myName, "-");
T_CString_integerToString (myName + icu_strlen (myName), codepage, 10);
uprv_strcat (myName, "-");
T_CString_integerToString (myName + uprv_strlen (myName), codepage, 10);
return createConverter (myName, err);
@ -225,13 +225,13 @@ void ucnv_close (UConverter * converter)
(converter->mode == UCNV_SO))
{
ucnv_close (((UConverterDataISO2022 *) (converter->extraInfo))->currentConverter);
icu_free (converter->extraInfo);
uprv_free (converter->extraInfo);
}
umtx_lock (NULL);
converter->sharedData->referenceCounter--;
umtx_unlock (NULL);
icu_free (converter);
uprv_free (converter);
return;
}
@ -306,7 +306,7 @@ void ucnv_getSubstChars (const UConverter * converter,
return;
}
icu_memcpy (mySubChar, converter->subChar, converter->subCharLen); /*fills in the subchars */
uprv_memcpy (mySubChar, converter->subChar, converter->subCharLen); /*fills in the subchars */
*len = converter->subCharLen; /*store # of bytes copied to buffer */
return;
@ -330,7 +330,7 @@ void ucnv_setSubstChars (UConverter * converter,
return;
}
icu_memcpy (converter->subChar, mySubChar, len); /*copies the subchars */
uprv_memcpy (converter->subChar, mySubChar, len); /*copies the subchars */
converter->subCharLen = len; /*sets the new len */
return;
@ -373,7 +373,7 @@ int32_t ucnv_getDisplayName (const UConverter * converter,
*sets stringToWriteLength (which accounts for a NULL terminator)
*and stringToWrite
*/
stringToWriteLength = icu_strlen (converter->sharedData->name) + 1;
stringToWriteLength = uprv_strlen (converter->sharedData->name) + 1;
stringToWrite = u_uastrcpy (stringToWriteBuffer, converter->sharedData->name);
/*Hides the fallback to the internal name from the user */
@ -931,7 +931,7 @@ UChar ucnv_getNextUChar (UConverter * converter,
*Note that in the call itself we decrement
*UCharErrorBufferLength
*/
icu_memmove (converter->UCharErrorBuffer,
uprv_memmove (converter->UCharErrorBuffer,
converter->UCharErrorBuffer + 1,
--(converter->UCharErrorBufferLength) * sizeof (UChar));
return myUChar;
@ -1151,6 +1151,6 @@ void ucnv_getStarters(const UConverter* converter,
}
/*fill's in the starters boolean array*/
icu_memcpy(starters, converter->sharedData->table->mbcs.starters, 256*sizeof(bool_t));
uprv_memcpy(starters, converter->sharedData->table->mbcs.starters, 256*sizeof(bool_t));
return;
}

View file

@ -1,605 +1 @@
/*
**********************************************************************
* Copyright (C) 1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* ucnv.h:
* External APIs for the ICU's codeset conversion library
* Bertrand A. Damiba
*
* Modification History:
*
* Date Name Description
* 04/04/99 helena Fixed internal header inclusion.
*/
/**
* @name Character Conversion C API
*
* Character Conversion C API documentation is still under construction.
* Please check for updates soon.
*/
#ifndef UCNV_H
#define UCNV_H
#include "utypes.h"
#include "ucnv_bld.h"
#include "ucnv_err.h"
U_CDECL_BEGIN
typedef void (*UConverterToUCallback) (UConverter *,
UChar **,
const UChar *,
const char **,
const char *,
int32_t* offsets,
bool_t,
UErrorCode *);
typedef void (*UConverterFromUCallback) (UConverter *,
char **,
const char *,
const UChar **,
const UChar *,
int32_t* offsets,
bool_t,
UErrorCode *);
U_CDECL_END
/**
* Creates a UConverter object with the names specified as a C string.
* The actual name will be resolved with the alias file.
* if <TT>NULL</TT> is passed for the converter name, it will create one with the
* getDefaultName return value.
* @param converterName : name of the uconv table
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND</TT>
* @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
* @see ucnv_openU
* @see ucnv_openCCSID
* @see ucnv_close
*/
U_CAPI
UConverter* U_EXPORT2 ucnv_open (const char *converterName,
UErrorCode * err);
/**
* Creates a Unicode converter with the names specified as unicode string. The name should be limited to
* the ASCII-7 alphanumerics range. The actual name will be resolved with the alias file.
* if <TT>NULL</TT> is passed for the converter name, it will create one with the
* getDefaultName return value.
* @param converterName : name of the uconv table in a zero terminated Unicode string
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND</TT>
* @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
* @see ucnv_open
* @see ucnv_openCCSID
* @see ucnv_close
*/
U_CAPI UConverter* U_EXPORT2 ucnv_openU (const UChar * name,
UErrorCode * err);
/**
* Creates a UConverter object using a CCSID number.
*
* @param codepage : codepage # of the uconv table
* @param platform : codepage's platform (now only <TT>IBM</TT> supported)
* @param err error status <TT>U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND</TT>
* @return the created Unicode converter object, or <TT>NULL</TT> if and error occured
* @see ucnv_open
* @see ucnv_openU
* @see ucnv_close
*/
U_CAPI UConverter* U_EXPORT2 ucnv_openCCSID (int32_t codepage,
UConverterPlatform platform,
UErrorCode * err);
/**
* Deletes the unicode converter.
*
* @param converter the converter object to be deleted
* @see ucnv_open
* @see ucnv_openU
* @see ucnv_openCCSID
*/
U_CAPI void U_EXPORT2 ucnv_close (UConverter * converter);
/**
* Fills in the output parameter, subChars, with the substitution characters
* as multiple bytes.
*
* @param converter: the Unicode converter
* @param subChars: the subsitution characters
* @param len: on input the capacity of subChars, on output the number of bytes copied to it
* @param err: the outgoing error status code.
* If the substitution character array is too small, an
* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
* @see ucnv_setSubstChars
*/
U_CAPI void U_EXPORT2
ucnv_getSubstChars (const UConverter * converter,
char *subChars,
int8_t * len,
UErrorCode * err);
/**
* Sets the substitution chars when converting from unicode to a codepage. The
* substitution is specified as a string of 1-4 bytes, and may contain <TT>NULL</TT> byte.
* The fill-in parameter err will get the error status on return.
* @param converter the Unicode converter
* @param subChars the substitution character byte sequence we want set
* @param len the number of bytes in subChars
* @param err the error status code. <TT>U_INDEX_OUTOFBOUNDS_ERROR </TT> if
* len is bigger than the maximum number of bytes allowed in subchars
* @see ucnv_getSubstChars
*/
U_CAPI void U_EXPORT2
ucnv_setSubstChars (UConverter * converter,
const char *subChars,
int8_t len,
UErrorCode * err);
/**
* Fills in the output parameter, errBytes, with the error characters from the
* last failing conversion.
*
* @param converter: the Unicode converter
* @param errBytes: the bytes in error
* @param len: on input the capacity of errBytes, on output the number of bytes copied to it
* @param err: the outgoing error status code.
* If the substitution character array is too small, an
* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
*/
U_CAPI void U_EXPORT2
ucnv_getInvalidChars (const UConverter * converter,
char *errBytes,
int8_t * len,
UErrorCode * err);
/**
* Fills in the output parameter, errChars, with the error characters from the
* last failing conversion.
*
* @param converter: the Unicode converter
* @param errUChars: the bytes in error
* @param len: on input the capacity of errUChars, on output the number of UChars copied to it
* @param err: the outgoing error status code.
* If the substitution character array is too small, an
* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
*/
U_CAPI void U_EXPORT2
ucnv_getInvalidUChars (const UConverter * converter,
char *errUChars,
int8_t * len,
UErrorCode * err);
/**
* Resets the state of stateful conversion to the default state. This is used
* in the case of error to restart a conversion from a known default state.
* it will also empty the internal output buffers.
* @param converter the Unicode converter
*/
U_CAPI void U_EXPORT2
ucnv_reset (UConverter * converter);
/**
* Returns the maximum length of bytes used by a character. This varies between 1 and 4
* @param converter the Unicode converter
* @return the maximum number of bytes allowed by this particular converter
* @see ucnv_getMinCharSize
*/
U_CAPI int8_t U_EXPORT2
ucnv_getMaxCharSize (const UConverter * converter);
/**
* Returns the minimum byte length for characters in this codepage. This is either
* 1 or 2 for all supported codepages.
* @param converter the Unicode converter
* @return the minimum number of bytes allowed by this particular converter
* @see ucnv_getMaxCharSize
*/
U_CAPI int8_t U_EXPORT2
ucnv_getMinCharSize (const UConverter * converter);
/**
* Returns the display name of the converter passed in based on the Locale passed in,
* in the case the locale contains no display name, the internal ASCII name will be
* filled in.
*
* @param converter the Unicode converter.
* @param displayLocale is the specific Locale we want to localised for
* @param displayName user provided buffer to be filled in
* @param displayNameCapacty size of displayName Buffer
* @param err: outgoing error code.
* @return displayNameLength number of UChar needed in displayName
* @see ucnv_getName
*/
U_CAPI
int32_t U_EXPORT2 ucnv_getDisplayName (const UConverter * converter,
const char *displayLocale,
UChar * displayName,
int32_t displayNameCapacity,
UErrorCode * err);
/**
* Gets the name of the converter (zero-terminated).
* the name will be the internal name of the converter, the lifetime of the returned
* string will be that of the converter passed to this function.
* @param converter the Unicode converter
* @param err UErrorCode status
* @return the internal name of the converter
* @see ucnv_getDisplayName
*/
U_CAPI
const char * U_EXPORT2 ucnv_getName (const UConverter * converter, UErrorCode * err);
/**
* Gets a codepage number associated with the converter. This is not guaranteed
* to be the one used to create the converter. Some converters do not represent
* IBM registered codepages and return zero for the codepage number.
* The error code fill-in parameter indicates if the codepage number is available.
* @param converter the Unicode converter
* @param err the error status code.
* the converter is <TT>NULL</TT> or if converter's data table is <TT>NULL</TT>.
* @return If any error occurrs, -1 will be returned otherwise, the codepage number
* will be returned
*/
U_CAPI int32_t U_EXPORT2
ucnv_getCCSID (const UConverter * converter,
UErrorCode * err);
/**
* Gets a codepage platform associated with the converter. Currently, only <TT>IBM</TT> is supported
* The error code fill-in parameter indicates if the codepage number is available.
* @param converter the Unicode converter
* @param err the error status code.
* the converter is <TT>NULL</TT> or if converter's data table is <TT>NULL</TT>.
* @return The codepage platform
*/
U_CAPI UConverterPlatform U_EXPORT2
ucnv_getPlatform (const UConverter * converter,
UErrorCode * err);
/**
*Gets the type of conversion associated with the converter
* e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022, EBCDIC_STATEFUL, LATIN_1
* @param converter: a valid, opened converter
* @return the type of the converter
*/
U_CAPI UConverterType U_EXPORT2
ucnv_getType (const UConverter * converter);
/**
*Gets the "starter" bytes for the converters of type MBCS
*will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in
*is not MBCS.
*fills in an array of boolean, with the value of the byte as offset to the array.
*At return, if TRUE is found in at offset 0x20, it means that the byte 0x20 is a starter byte
*in this converter.
* @param converter: a valid, opened converter of type MBCS
* @param starters: an array of size 256 to be filled in
* @param err: an array of size 256 to be filled in
* @see ucnv_getType
*/
U_CAPI void U_EXPORT2 ucnv_getStarters(const UConverter* converter,
bool_t starters[256],
UErrorCode* err);
/**
* Gets the current calback function used by the converter when illegal or invalid sequence found.
*
* @param converter the unicode converter
* @return a pointer to the callback function
* @see ucnv_setToUCallBack
*/
U_CAPI UConverterToUCallback U_EXPORT2
ucnv_getToUCallBack (const UConverter * converter);
/**
* Gets the current callback function used by the converter when illegal or invalid sequence found.
*
* @param converter the unicode converter
* @return a pointer to the callback function
* @see ucnv_setFromUCallBack
*/
U_CAPI UConverterFromUCallback U_EXPORT2
ucnv_getFromUCallBack (const UConverter * converter);
/**
* Gets the current callback function used by the converter when illegal or invalid sequence found.
*
* @param converter the unicode converter
* @param action the callback function we want to set.
* @param err The error code status
* @return the previously assigned callback function pointer
* @see ucnv_getToUCallBack
*/
U_CAPI UConverterToUCallback U_EXPORT2
ucnv_setToUCallBack (UConverter * converter,
UConverterToUCallback action,
UErrorCode * err);
/**
* Gets the current callback function used by the converter when illegal or invalid sequence found.
*
* @param converter the unicode converter
* @param action the callback function we want to set.
* @param err The error code status
* @return the previously assigned callback function pointer
* @see ucnv_getFromUCallBack
*/
U_CAPI UConverterFromUCallback U_EXPORT2
ucnv_setFromUCallBack (UConverter * converter,
UConverterFromUCallback action,
UErrorCode * err);
/**
* Transcodes an array of unicode characters to an array of codepage characters.
* The source pointer is an I/O parameter, it starts out pointing where the function is
* to begin transcoding, and ends up pointing after the first sequence of the bytes
* that it encounters that are semantically invalid.
* if ucnv_setToUCallBack is called with an action other than <TT>STOP</TT>
* before a call is made to this API, <TT>consumed</TT> and <TT>source</TT> should point to the same place
* (unless <TT>target</TT> ends with an imcomplete sequence of bytes and <TT>flush</TT> is <TT>FALSE</TT>).
* the <TT>target</TT> buffer buffer needs to be a least the size of the maximum # of bytes per characters
* allowed by the target codepage.
* @param converter the Unicode converter
* @param converter the Unicode converter
* @param target : I/O parameter. Input : Points to the beginning of the buffer to copy
* codepage characters to. Output : points to after the last codepage character copied
* to <TT>target</TT>.
* @param targetLimit the pointer to the end of the <TT>target</TT> array
* @param source the source Unicode character array
* @param sourceLimit the pointer to the end of the source array
* @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
* of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
* e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
* For output data carried across calls -1 will be placed for offsets.
* @param flush <TT>TRUE</TT> if the buffer is the last buffer of the conversion interation
* and the conversion will finish with this call, FALSE otherwise.
* @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be returned if the
* converter is <TT>NULL</TT>.
* @see ucnv_fromUChars
* @see ucnv_convert
* @see ucnv_getMinCharSize
* @see ucnv_setToUCallBack
*/
U_CAPI
void U_EXPORT2 ucnv_fromUnicode (UConverter * converter,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
/**
* Converts an array of codepage characters into an array of unicode characters.
* The source pointer is an I/O parameter, it starts out pointing at the place
* to begin translating, and ends up pointing after the first sequence of the bytes
* that it encounters that are semantically invalid.
* if ucnv_setFromUCallBack is called with an action other than STOP
* before a call is made to this API, consumed and source should point to the same place
* (unless target ends with an imcomplete sequence of bytes and flush is FALSE).
* @param converter the Unicode converter
* @param target : I/O parameter. Input : Points to the beginning of the buffer to copy
* Unicode characters to. Output : points to after the last UChar copied to target.
* @param targetLimit the pointer to the end of the target array
* @param source the source codepage character array
* @param sourceLimit the pointer to the end of the source array
* @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
* of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
* e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
* For output data carried across calls -1 will be placed for offsets.
* @param flush TRUE if the buffer is the last buffer and the conversion will finish
* in this call, FALSE otherwise.
* @param err the error code status <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be returned if the
* converter is <TT>NULL</TT>, or if <TT>targetLimit</TT> and <TT>sourceLimit</TT> are misaligned.
* @see ucnv_toUChars
* @see ucnv_getNextUChar
* @see ucnv_convert
* @see ucnv_setFromUCallBack
*/
U_CAPI
void U_EXPORT2 ucnv_toUnicode (UConverter * converter,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
/**
* Transcodes the source Unicode string to the target string in a codepage encoding
* with the specified Unicode converter. For example, if a Unicode to/from JIS
* converter is specified, the source string in Unicode will be transcoded to JIS
* encoding. The result will be stored in JIS encoding.
* if any problems during conversion are encountered it will SUBSTITUTE with the default (initial)
* substitute characters.
* This function is a more convenient but less efficient version of \Ref{ucnv_fromUnicode}.
* @param converter the Unicode converter
* @param source the <TT>source</TT> Unicode string (zero Terminated)
* @param target the <TT>target</TT> string in codepage encoding (<STRONG>not zero-terminated</STRONG> because some
* codepage do not use '\0' as a string terminator
* @param targetCapacity Input the number of bytes available in the <TT>target</TT> buffer
* @param err the error status code.
* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned if the
* the # of bytes provided are not enough for transcoding.
* <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> is returned if the converter is <TT>NULL</TT> or the source or target string is empty.
* <TT>U_BUFFER_OVERFLOW_ERROR</TT> when <TT>targetSize</TT> turns out to be bigger than <TT>targetCapacity</TT>
* @return number of bytes needed in target, regardless of <TT>targetCapacity</TT>
* @see ucnv_fromUnicode
* @see ucnv_convert
*/
U_CAPI
int32_t U_EXPORT2 ucnv_fromUChars (const UConverter * converter,
char *target,
int32_t targetCapacity,
const UChar * source,
UErrorCode * err);
/**
* Transcode the source string in codepage encoding to the target string in
* Unicode encoding. For example, if a Unicode to/from JIS
* converter is specified, the source string in JIS encoding will be transcoded
* to Unicode and placed into a provided target buffer.
* if any problems during conversion are encountered it will SUBSTITUTE with the Unicode REPLACEMENT char
* We recomment, the size of the target buffer needs to be at least as long as the maximum # of bytes per char
* in this character set.
* A zero-terminator will be placed at the end of the target buffer
* This function is a more convenient but less efficient version of \Ref{ucnv_toUnicode}.
* @param converter the Unicode converter
* @param source the source string in codepage encoding
* @param target the target string in Unicode encoding
* @param targetCapacity capacity of the target buffer
* @param sourceSize : Number of bytes in <TT>source</TT> to be transcoded
* @param err the error status code
* <TT>U_MEMORY_ALLOCATION_ERROR</TT> will be returned if the
* the internal process buffer cannot be allocated for transcoding.
* <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> is returned if the converter is <TT>NULL</TT> or
* if the source or target string is empty.
* <TT>U_BUFFER_OVERFLOW_ERROR</TT> when the input buffer is prematurely exhausted and targetSize non-<TT>NULL</TT>.
* @return the number of UChar needed in target (including the zero terminator)
* @see ucnv_getNextUChar
* @see ucnv_toUnicode
* @see ucnv_convert
*/
U_CAPI
int32_t U_EXPORT2 ucnv_toUChars (const UConverter * converter,
UChar * target,
int32_t targetCapacity,
const char *source,
int32_t sourceSize,
UErrorCode * err);
/********************************
* Will convert a codepage buffer one character at a time.
* This function was written to be efficient when transcoding small amounts of data at a time.
* In that case it will be more efficient than \Ref{ucnv_toUnicode}.
* When converting large buffers use \Ref{ucnv_toUnicode}.
*@param converter an open UConverter
*@param source the address of a pointer to the codepage buffer, will be updated to point after
*the bytes consumed in the conversion call.
*@param points to the end of the input buffer
*@param err fills in error status (see ucnv_toUnicode)
*@return a UChar resulting from the partial conversion of source
*@see ucnv_toUnicode
*@see ucnv_toUChars
*@see ucnv_convert
*/
U_CAPI
UChar U_EXPORT2 ucnv_getNextUChar (UConverter * converter,
const char **source,
const char *sourceLimit,
UErrorCode * err);
/**************************
* Will convert a sequence of bytes from one codepage to another.
* This is <STRONG>NOT AN EFFICIENT</STRONG> way to transcode.
* use \Ref{ucnv_toUnicode} and \Ref{ucnv_fromUnicode} for efficiency
* @param toConverterName: The name of the converter that will be used to encode the output buffer
* @param fromConverterName: The name of the converter that will be used to decode the input buffer
* @param target: Pointer to the output buffer to write to
* @param targetCapacity: on input contains the capacity of target
* @param source: Pointer to the input buffer
* @param sourceLength: on input contains the capacity of source
* @param err: fills in an error status
* @return will be filled in with the number of bytes needed in target
* @see ucnv_fromUnicode
* @see ucnv_toUnicode
* @see ucnv_fromUChars
* @see ucnv_toUChars
* @see ucnv_getNextUChar
*/
U_CAPI
int32_t U_EXPORT2 ucnv_convert (const char *toConverterName,
const char *fromConverterName,
char *target,
int32_t targetCapacity,
const char *source,
int32_t sourceLength,
UErrorCode * err);
/**
* SYSTEM API
* Iterates through every cached converter and frees all the unused ones.
*
* @return the number of cached converters successfully deleted
*/
U_CAPI int32_t U_EXPORT2 ucnv_flushCache (void);
/**
* provides a string containing the internal name (based on the alias file) of the converter.
* given an index.
* @param index the number of converters available on the system (<TT>[0..ucnv_countAvaiable()]</TT>)
* @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds.
* @see ucnv_countAvailable
*/
U_CAPI
const char * U_EXPORT2 ucnv_getAvailableName (int32_t index);
/**
* returns the number of available converters.
*
* @return the number of available converters
* @see ucnv_getAvailableName
*/
U_CAPI int32_t U_EXPORT2 ucnv_countAvailable (void);
/**
* returns the current default converter name.
*
* @return returns the current default converter name;
* if a default converter name cannot be determined,
* then <code>NULL</code> is returned
* @see ucnv_setDefaultName
*/
U_CAPI const char * U_EXPORT2 ucnv_getDefaultName (void);
/**
* sets the current default converter name.
* The lifetime of the return ptr is that of the library
* @param name: the converter name you want as default (has to appear in alias file)
* @see ucnv_getDefaultName
*
*/
U_CAPI void U_EXPORT2 ucnv_setDefaultName (const char *name);
#endif
/*_UCNV*/
#error Please include unicode/ucnv.h instead

View file

@ -18,11 +18,11 @@
#include "uhash.h"
#include "ucmp16.h"
#include "ucmp8.h"
#include "ucnv_bld.h"
#include "ucnv_err.h"
#include "unicode/ucnv_bld.h"
#include "unicode/ucnv_err.h"
#include "ucnv_imp.h"
#include "udata.h"
#include "ucnv.h"
#include "unicode/udata.h"
#include "unicode/ucnv.h"
#include "umutex.h"
#include "cstring.h"
#include "cmemory.h"
@ -100,7 +100,7 @@ char *
{
int32_t i = 0;
while (name[i] = icu_toupper (name[i]))
while (name[i] = uprv_toupper (name[i]))
i++;
return name;
@ -153,7 +153,7 @@ char *
int32_t uhash_hashIString(const void* name)
{
char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
icu_strcpy(myName, (char*)name);
uprv_strcpy(myName, (char*)name);
strtoupper(myName);
return uhash_hashString(myName);
@ -181,7 +181,7 @@ CompactShortArray* createCompactShortArrayFromFile (FileStream * infile, UError
*err = U_INVALID_TABLE_FILE;
return NULL;
}
myShortArray = (int16_t *) icu_malloc (myValuesCount * sizeof (int16_t));
myShortArray = (int16_t *) uprv_malloc (myValuesCount * sizeof (int16_t));
if (myShortArray == NULL)
{
*err = U_MEMORY_ALLOCATION_ERROR;
@ -192,15 +192,15 @@ CompactShortArray* createCompactShortArrayFromFile (FileStream * infile, UError
if (myIndexCount < 0)
{
icu_free (myShortArray);
uprv_free (myShortArray);
*err = U_INVALID_TABLE_FILE;
return NULL;
}
myIndexArray = (uint16_t *) icu_malloc (myIndexCount * sizeof (uint16_t));
myIndexArray = (uint16_t *) uprv_malloc (myIndexCount * sizeof (uint16_t));
if (myIndexArray == NULL)
{
icu_free (myShortArray);
uprv_free (myShortArray);
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
@ -235,7 +235,7 @@ CompactByteArray* createCompactByteArrayFromFile (FileStream * infile,
*err = U_INVALID_TABLE_FILE;
return NULL;
}
myByteArray = (int8_t *) icu_malloc (myValuesCount * sizeof (int8_t));
myByteArray = (int8_t *) uprv_malloc (myValuesCount * sizeof (int8_t));
if (myByteArray == NULL)
{
*err = U_MEMORY_ALLOCATION_ERROR;
@ -246,14 +246,14 @@ CompactByteArray* createCompactByteArrayFromFile (FileStream * infile,
if (myIndexCount < 0)
{
icu_free (myByteArray);
uprv_free (myByteArray);
*err = U_INVALID_TABLE_FILE;
return NULL;
}
myIndexArray = (uint16_t *) icu_malloc (myIndexCount * sizeof (uint16_t));
myIndexArray = (uint16_t *) uprv_malloc (myIndexCount * sizeof (uint16_t));
if (myIndexArray == NULL)
{
icu_free (myByteArray);
uprv_free (myByteArray);
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
@ -308,7 +308,7 @@ UConverter* createConverterFromFile (const char *fileName, UErrorCode * err)
return NULL;
}
myConverter = (UConverter *) icu_malloc (sizeof (UConverter));
myConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
if (myConverter == NULL)
{
udata_close(data);
@ -322,7 +322,7 @@ UConverter* createConverterFromFile (const char *fileName, UErrorCode * err)
if (myConverter->sharedData == NULL)
{
udata_close(data);
icu_free (myConverter);
uprv_free (myConverter);
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
@ -336,7 +336,7 @@ UConverter* createConverterFromFile (const char *fileName, UErrorCode * err)
if(U_FAILURE(*err))
{
udata_close(data);
icu_free (myConverter);
uprv_free (myConverter);
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
@ -357,12 +357,12 @@ void
{
case UCNV_IBM:
{
icu_strcpy (platformString, "ibm");
uprv_strcpy (platformString, "ibm");
break;
}
default:
{
icu_strcpy (platformString, "");
uprv_strcpy (platformString, "");
break;
}
};
@ -375,29 +375,29 @@ void
UConverterType
getAlgorithmicTypeFromName (const char *realName)
{
if (icu_strcmp (realName, "UTF8") == 0)
if (uprv_strcmp (realName, "UTF8") == 0)
return UCNV_UTF8;
else if (icu_strcmp (realName, "UTF16_BigEndian") == 0)
else if (uprv_strcmp (realName, "UTF16_BigEndian") == 0)
return UCNV_UTF16_BigEndian;
else if (icu_strcmp (realName, "UTF16_LittleEndian") == 0)
else if (uprv_strcmp (realName, "UTF16_LittleEndian") == 0)
return UCNV_UTF16_LittleEndian;
else if (icu_strcmp (realName, "LATIN_1") == 0)
else if (uprv_strcmp (realName, "LATIN_1") == 0)
return UCNV_LATIN_1;
else if (icu_strcmp (realName, "JIS") == 0)
else if (uprv_strcmp (realName, "JIS") == 0)
return UCNV_JIS;
else if (icu_strcmp (realName, "EUC") == 0)
else if (uprv_strcmp (realName, "EUC") == 0)
return UCNV_EUC;
else if (icu_strcmp (realName, "GB") == 0)
else if (uprv_strcmp (realName, "GB") == 0)
return UCNV_GB;
else if (icu_strcmp (realName, "ISO_2022") == 0)
else if (uprv_strcmp (realName, "ISO_2022") == 0)
return UCNV_ISO_2022;
else if (icu_strcmp (realName, "UTF16_PlatformEndian") == 0)
else if (uprv_strcmp (realName, "UTF16_PlatformEndian") == 0)
# if U_IS_BIG_ENDIAN
return UCNV_UTF16_BigEndian;
# else
return UCNV_UTF16_LittleEndian;
# endif
else if (icu_strcmp (realName, "UTF16_OppositeEndian") == 0)
else if (uprv_strcmp (realName, "UTF16_OppositeEndian") == 0)
# if U_IS_BIG_ENDIAN
return UCNV_UTF16_LittleEndian;
# else
@ -417,7 +417,7 @@ UConverterPlatform
getToken (myPlatform, name, mySeparators);
strtoupper (myPlatform);
if (icu_strcmp (myPlatform, "IBM") == 0)
if (uprv_strcmp (myPlatform, "IBM") == 0)
return UCNV_IBM;
else
return UCNV_UNKNOWN;
@ -502,7 +502,7 @@ bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData)
case UCNV_SBCS:
{
ucmp8_close (deadSharedData->table->sbcs.fromUnicode);
icu_free (deadSharedData->table);
uprv_free (deadSharedData->table);
};
break;
@ -510,7 +510,7 @@ bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData)
{
ucmp16_close (deadSharedData->table->mbcs.fromUnicode);
ucmp16_close (deadSharedData->table->mbcs.toUnicode);
icu_free (deadSharedData->table);
uprv_free (deadSharedData->table);
};
break;
@ -519,7 +519,7 @@ bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData)
{
ucmp16_close (deadSharedData->table->dbcs.fromUnicode);
ucmp16_close (deadSharedData->table->dbcs.toUnicode);
icu_free (deadSharedData->table);
uprv_free (deadSharedData->table);
};
break;
@ -532,7 +532,7 @@ bool_t deleteSharedConverterData (UConverterSharedData * deadSharedData)
udata_close(data);
}
icu_free (deadSharedData);
uprv_free (deadSharedData);
return TRUE;
}
@ -661,7 +661,7 @@ UConverter *
else
{
/*Is already cached, point to an existing one */
myUConverter = (UConverter *) icu_malloc (sizeof (UConverter));
myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
if (myUConverter == NULL)
{
*err = U_MEMORY_ALLOCATION_ERROR;
@ -690,7 +690,7 @@ UConverter *
myUConverter = createConverterFromAlgorithmicType (realName, err);
if (U_FAILURE (*err) || (myUConverter == NULL))
{
icu_free (myUConverter);
uprv_free (myUConverter);
return NULL;
}
else
@ -702,7 +702,7 @@ UConverter *
}
else
{
myUConverter = (UConverter *) icu_malloc (sizeof (UConverter));
myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
if (myUConverter == NULL)
{
*err = U_MEMORY_ALLOCATION_ERROR;
@ -736,7 +736,7 @@ void initializeDataConverter (UConverter * myUConverter)
myUConverter->UCharErrorBufferLength = 0;
myUConverter->charErrorBufferLength = 0;
myUConverter->subCharLen = myUConverter->sharedData->defaultConverterValues.subCharLen;
icu_memcpy (myUConverter->subChar,
uprv_memcpy (myUConverter->subChar,
myUConverter->sharedData->defaultConverterValues.subChar,
myUConverter->subCharLen);
myUConverter->toUnicodeStatus = 0x00;
@ -788,9 +788,9 @@ void
myConverter->fromUnicodeStatus = 0; /* srl */
myConverter->sharedData->platform = UCNV_IBM;
myConverter->sharedData->codepage = 1208;
icu_strcpy(myConverter->sharedData->name, "UTF8");
icu_memcpy (myConverter->subChar, UTF8_subChar, 3);
icu_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF8_subChar, 3);
uprv_strcpy(myConverter->sharedData->name, "UTF8");
uprv_memcpy (myConverter->subChar, UTF8_subChar, 3);
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF8_subChar, 3);
break;
}
@ -804,7 +804,7 @@ void
myConverter->toUnicodeStatus = 0;
myConverter->sharedData->platform = UCNV_IBM;
myConverter->sharedData->codepage = 819;
icu_strcpy(myConverter->sharedData->name, "LATIN_1");
uprv_strcpy(myConverter->sharedData->name, "LATIN_1");
*(myConverter->subChar) = LATIN1_subChar;
*(myConverter->sharedData->defaultConverterValues.subChar) = LATIN1_subChar;
break;
@ -819,11 +819,11 @@ void
myConverter->subCharLen = 2;
myConverter->toUnicodeStatus = 0;
myConverter->fromUnicodeStatus = 0;
icu_strcpy(myConverter->sharedData->name, "UTF_16BE");
uprv_strcpy(myConverter->sharedData->name, "UTF_16BE");
myConverter->sharedData->platform = UCNV_IBM;
myConverter->sharedData->codepage = 1200;
icu_memcpy (myConverter->subChar, UTF16BE_subChar, 2);
icu_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF16BE_subChar, 2);
uprv_memcpy (myConverter->subChar, UTF16BE_subChar, 2);
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF16BE_subChar, 2);
break;
}
@ -839,9 +839,9 @@ void
myConverter->fromUnicodeStatus = 0;
myConverter->sharedData->platform = UCNV_IBM;
myConverter->sharedData->codepage = 1200;
icu_strcpy(myConverter->sharedData->name, "UTF_16LE");
icu_memcpy (myConverter->subChar, UTF16LE_subChar, 2);
icu_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF16LE_subChar, 2);
uprv_strcpy(myConverter->sharedData->name, "UTF_16LE");
uprv_memcpy (myConverter->subChar, UTF16LE_subChar, 2);
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, UTF16LE_subChar, 2);
break;
}
case UCNV_EUC:
@ -852,8 +852,8 @@ void
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
myConverter->subCharLen = 2;
myConverter->toUnicodeStatus = 0;
icu_memcpy (myConverter->subChar, EUC_subChar, 2);
icu_memcpy (myConverter->sharedData->defaultConverterValues.subChar, EUC_subChar, 2);
uprv_memcpy (myConverter->subChar, EUC_subChar, 2);
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, EUC_subChar, 2);
break;
}
case UCNV_ISO_2022:
@ -870,10 +870,10 @@ void
myConverter->toUnicodeStatus = 0;
myConverter->fromUnicodeStatus = 0; /* srl */
myConverter->sharedData->codepage = 2022;
icu_strcpy(myConverter->sharedData->name, "ISO_2022");
uprv_strcpy(myConverter->sharedData->name, "ISO_2022");
*(myConverter->subChar) = LATIN1_subChar;
*(myConverter->sharedData->defaultConverterValues.subChar) = LATIN1_subChar;
myConverter->extraInfo = icu_malloc (sizeof (UConverterDataISO2022));
myConverter->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));
((UConverterDataISO2022 *) myConverter->extraInfo)->currentConverter = NULL;
((UConverterDataISO2022 *) myConverter->extraInfo)->escSeq2022Length = 0;
break;
@ -886,8 +886,8 @@ void
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
myConverter->subCharLen = 2;
myConverter->toUnicodeStatus = 0;
icu_memcpy (myConverter->subChar, GB_subChar, 2);
icu_memcpy (myConverter->sharedData->defaultConverterValues.subChar, GB_subChar, 2);
uprv_memcpy (myConverter->subChar, GB_subChar, 2);
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, GB_subChar, 2);
break;
}
case UCNV_JIS:
@ -898,8 +898,8 @@ void
myConverter->sharedData->defaultConverterValues.subCharLen = 2;
myConverter->subCharLen = 2;
myConverter->toUnicodeStatus = 0;
icu_memcpy (myConverter->subChar, JIS_subChar, 2);
icu_memcpy (myConverter->sharedData->defaultConverterValues.subChar, JIS_subChar, 2);
uprv_memcpy (myConverter->subChar, JIS_subChar, 2);
uprv_memcpy (myConverter->sharedData->defaultConverterValues.subChar, JIS_subChar, 2);
break;
}
default:
@ -927,7 +927,7 @@ UConverter *
if (U_FAILURE (*err))
return NULL;
myConverter = (UConverter *) icu_malloc (sizeof (UConverter));
myConverter = (UConverter *) uprv_malloc (sizeof (UConverter));
if (myConverter == NULL)
{
*err = U_MEMORY_ALLOCATION_ERROR;
@ -935,17 +935,17 @@ UConverter *
}
myConverter->sharedData = NULL;
mySharedData = (UConverterSharedData *) icu_malloc (sizeof (UConverterSharedData));
mySharedData = (UConverterSharedData *) uprv_malloc (sizeof (UConverterSharedData));
if (mySharedData == NULL)
{
*err = U_MEMORY_ALLOCATION_ERROR;
icu_free (myConverter);
uprv_free (myConverter);
return NULL;
}
mySharedData->structSize = sizeof(UConverterSharedData);
mySharedData->table = NULL;
mySharedData->dataMemory = NULL;
icu_strcpy (mySharedData->name, actualName);
uprv_strcpy (mySharedData->name, actualName);
/*Initializes the referenceCounter to 1 */
mySharedData->referenceCounter = 1;
mySharedData->platform = UCNV_UNKNOWN;
@ -974,7 +974,7 @@ UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData *sourc
data = (UConverterSharedData*) malloc(sizeof(UConverterSharedData));
raw = (uint8_t*)source;
icu_memcpy(data,source,sizeof(UConverterSharedData));
uprv_memcpy(data,source,sizeof(UConverterSharedData));
raw += data->structSize;
@ -993,7 +993,7 @@ UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData *sourc
case UCNV_EBCDIC_STATEFUL:
case UCNV_DBCS:
data->table = icu_malloc(sizeof(UConverterDBCSTable));
data->table = uprv_malloc(sizeof(UConverterDBCSTable));
oldraw = raw;
@ -1007,7 +1007,7 @@ UConverterSharedData* ucnv_data_unFlattenClone(const UConverterSharedData *sourc
break;
case UCNV_MBCS:
data->table = icu_malloc(sizeof(UConverterMBCSTable));
data->table = uprv_malloc(sizeof(UConverterMBCSTable));
data->table->mbcs.starters = (bool_t*)raw;
raw += sizeof(bool_t)*256;

View file

@ -1,215 +1 @@
/*
**********************************************************************
* Copyright (C) 1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
*
* ucnv_bld.h:
* Contains all internal and external data structure definitions
* Created & Maitained by Bertrand A. Damiba
*
*
*
* ATTENTION:
* ---------
* Although the data structures in this file are open and stack allocatable
* we reserve the right to hide them in further releases.
*/
#ifndef UCNV_BLD_H
#define UCNV_BLD_H
#include "utypes.h"
#define UCNV_MAX_SUBCHAR_LEN 4
#define UCNV_ERROR_BUFFER_LENGTH 20
#ifndef UCMP16_H
typedef struct _CompactShortArray CompactShortArray;
#endif
#ifndef UCMP8_H
typedef struct _CompactByteArray CompactByteArray;
#endif
#define UCNV_IMPLEMENTED_CONVERSION_TYPES 9
/*Sentinel Value used to check the integrity of the binary data files */
#define UCNV_FILE_CHECK_MARKER 0xBEDA
/*maximum length of the converter names */
#define UCNV_MAX_CONVERTER_NAME_LENGTH 60
#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
/*Pointer to the aforementioned file */
#define UCNV_MAX_LINE_TEXT (UCNV_MAX_CONVERTER_NAME_LENGTH*400)
#define UCNV_SI 0x0F /*Shift in for EBDCDIC_STATEFUL and iso2022 states */
#define UCNV_SO 0x0E /*Shift out for EBDCDIC_STATEFUL and iso2022 states */
typedef enum {
UCNV_UNSUPPORTED_CONVERTER = -1,
UCNV_SBCS = 0,
UCNV_DBCS = 1,
UCNV_MBCS = 2,
UCNV_LATIN_1 = 3,
UCNV_UTF8 = 4,
UCNV_UTF16_BigEndian = 5,
UCNV_UTF16_LittleEndian = 6,
UCNV_EBCDIC_STATEFUL = 7,
UCNV_ISO_2022 = 8,
/* Number of converter types for which we have conversion routines. */
UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES = 9,
UCNV_JIS = 9,
UCNV_EUC = 10,
UCNV_GB = 11
} UConverterType;
typedef enum {
UCNV_UNKNOWN = -1,
UCNV_IBM = 0
} UConverterPlatform;
/*Table Node Definitions */
typedef struct
{
UChar *toUnicode; /* [256]; */
CompactByteArray *fromUnicode;
}
UConverterSBCSTable;
typedef struct
{
CompactShortArray *toUnicode;
CompactShortArray *fromUnicode;
}
UConverterDBCSTable;
typedef struct
{
bool_t *starters; /* [256]; */
CompactShortArray *toUnicode;
CompactShortArray *fromUnicode;
}
UConverterMBCSTable;
typedef union
{
UConverterSBCSTable sbcs;
UConverterDBCSTable dbcs;
UConverterMBCSTable mbcs;
}
UConverterTable;
/*Defines the struct of a UConverterSharedData the immutable, shared part of
*UConverter
*/
typedef struct
{
uint32_t structSize; /* Size of this structure */
void *dataMemory;
uint32_t referenceCounter; /*used to count number of clients */
char name[UCNV_MAX_CONVERTER_NAME_LENGTH]; /*internal name of the converter */
UConverterPlatform platform; /*platform of the converter (only IBM now) */
int32_t codepage; /*codepage # (now IBM-$codepage) */
UConverterType conversionType; /*conversion type */
int8_t minBytesPerChar; /*Minimum # bytes per char in this codepage */
int8_t maxBytesPerChar; /*Maximum # bytes per char in this codepage */
struct
{ /*initial values of some members of the mutable part of object */
uint32_t toUnicodeStatus;
int8_t subCharLen;
unsigned char subChar[UCNV_MAX_SUBCHAR_LEN];
}
defaultConverterValues;
UConverterTable *table; /*Pointer to conversion data */
}
UConverterSharedData;
/*Defines a UConverter, the lightweight mutable part the user sees */
U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv
itself is compiled under C++, the linkage of the funcptrs will
work.
*/
struct UConverter
{
int32_t toUnicodeStatus; /*Used to internalize stream status information */
int32_t fromUnicodeStatus;
int8_t invalidCharLength;
int8_t invalidUCharLength;
int8_t pad;
int32_t mode;
int8_t subCharLen; /*length of the codepage specific character sequence */
unsigned char subChar[UCNV_MAX_SUBCHAR_LEN]; /*codepage specific character sequence */
UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store unicode data meant for
*output stream by the Error function pointers
*/
unsigned char charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store codepage data meant for
* output stream by the Error function pointers
*/
int8_t UCharErrorBufferLength; /*used to indicate the number of valid UChars
*in charErrorBuffer
*/
int8_t charErrorBufferLength; /*used to indicate the number of valid bytes
*in charErrorBuffer
*/
UChar invalidUCharBuffer[3];
char invalidCharBuffer[UCNV_MAX_SUBCHAR_LEN];
/*Error function pointer called when conversion issues
*occur during a T_UConverter_fromUnicode call
*/
void (*fromUCharErrorBehaviour) (struct UConverter *,
char **,
const char *,
const UChar **,
const UChar *,
int32_t* offsets,
bool_t,
UErrorCode *);
/*Error function pointer called when conversion issues
*occur during a T_UConverter_toUnicode call
*/
void (*fromCharErrorBehaviour) (struct UConverter *,
UChar **,
const UChar *,
const char **,
const char *,
int32_t* offsets,
bool_t,
UErrorCode *);
UConverterSharedData *sharedData; /*Pointer to the shared immutable part of the
*converter object
*/
void *extraInfo; /*currently only used to point to a struct containing UConverter used by iso 2022
Could be used by clients writing their own call back function to
pass context to them
*/
};
U_CDECL_END /* end of UConverter */
typedef struct UConverter UConverter;
typedef struct
{
UConverter *currentConverter;
unsigned char escSeq2022[10];
int8_t escSeq2022Length;
}
UConverterDataISO2022;
#define CONVERTER_FILE_EXTENSION ".cnv"
/*case insensitive hash key*/
U_CAPI int32_t U_EXPORT2 uhash_hashIString(const void* name);
#endif /* _UCNV_BLD */
#error Please include unicode/ucnv_bld.h instead

View file

@ -16,14 +16,14 @@
*
*/
#include "utypes.h"
#include "unicode/utypes.h"
#include "uhash.h"
#include "ucmp16.h"
#include "ucmp8.h"
#include "ucnv_bld.h"
#include "ucnv_err.h"
#include "unicode/ucnv_bld.h"
#include "unicode/ucnv_err.h"
#include "ucnv_cnv.h"
#include "ucnv.h"
#include "unicode/ucnv.h"
#include "cmemory.h"
#ifdef Debug
@ -1581,7 +1581,7 @@ void T_UConverter_fromUnicode_ISO_2022_OFFSETS_LOGIC(UConverter* _this,
{
int32_t len = *target - targetStart;
int32_t i;
/* icu_memmove(offsets+3, offsets, len); MEMMOVE SEEMS BROKEN --srl */
/* uprv_memmove(offsets+3, offsets, len); MEMMOVE SEEMS BROKEN --srl */
for(i=len-1;i>=0;i--) offsets[i] = offsets[i];
@ -1797,7 +1797,7 @@ void changeState_2022(UConverter* _this,
/*Customize the converter with the attributes set on the 2022 converter*/
myUConverter->fromUCharErrorBehaviour = _this->fromUCharErrorBehaviour;
myUConverter->fromCharErrorBehaviour = _this->fromCharErrorBehaviour;
icu_memcpy(myUConverter->subChar,
uprv_memcpy(myUConverter->subChar,
_this->subChar,
myUConverter->subCharLen = _this->subCharLen);
@ -2855,7 +2855,7 @@ void flushInternalUnicodeBuffer (UConverter * _this,
{
/*we have enough space
*So we just copy the whole Error Buffer in to the output stream*/
icu_memcpy (myTarget,
uprv_memcpy (myTarget,
_this->UCharErrorBuffer,
sizeof (UChar) * myUCharErrorBufferLength);
if (offsets)
@ -2872,14 +2872,14 @@ void flushInternalUnicodeBuffer (UConverter * _this,
/* We don't have enough space so we copy as much as we can
* on the output stream and update the object
* by updating the internal buffer*/
icu_memcpy (myTarget, _this->UCharErrorBuffer, sizeof (UChar) * targetLength);
uprv_memcpy (myTarget, _this->UCharErrorBuffer, sizeof (UChar) * targetLength);
if (offsets)
{
int32_t i=0;
for (i=0; i< targetLength;i++) (*offsets)[i] = -1;
*offsets += targetLength;
}
icu_memmove (_this->UCharErrorBuffer,
uprv_memmove (_this->UCharErrorBuffer,
_this->UCharErrorBuffer + targetLength,
sizeof (UChar) * (myUCharErrorBufferLength - targetLength));
_this->UCharErrorBufferLength -= (int8_t) targetLength;
@ -2903,7 +2903,7 @@ void flushInternalCharBuffer (UConverter * _this,
/*we have enough space */
if (myCharErrorBufferLength <= targetLength)
{
icu_memcpy (myTarget, _this->charErrorBuffer, myCharErrorBufferLength);
uprv_memcpy (myTarget, _this->charErrorBuffer, myCharErrorBufferLength);
if (offsets)
{
int32_t i=0;
@ -2918,14 +2918,14 @@ void flushInternalCharBuffer (UConverter * _this,
/* We don't have enough space so we copy as much as we can
* on the output stream and update the object*/
{
icu_memcpy (myTarget, _this->charErrorBuffer, targetLength);
uprv_memcpy (myTarget, _this->charErrorBuffer, targetLength);
if (offsets)
{
int32_t i=0;
for (i=0; i< targetLength;i++) (*offsets)[i] = -1;
*offsets += targetLength;
}
icu_memmove (_this->charErrorBuffer,
uprv_memmove (_this->charErrorBuffer,
_this->charErrorBuffer + targetLength,
(myCharErrorBufferLength - targetLength));
_this->charErrorBufferLength -= (int8_t) targetLength;

View file

@ -12,8 +12,8 @@
#ifndef UCNV_CNV_H
#define UCNV_CNV_H
#include "utypes.h"
#include "ucnv_bld.h"
#include "unicode/utypes.h"
#include "unicode/ucnv_bld.h"
bool_t CONVERSION_U_SUCCESS (UErrorCode err);

View file

@ -19,11 +19,11 @@
#include "uhash.h"
#include "ucmp8.h"
#include "ucmp16.h"
#include "ucnv_bld.h"
#include "ucnv_err.h"
#include "unicode/ucnv_bld.h"
#include "unicode/ucnv_err.h"
#include "ucnv_cnv.h"
#include "cmemory.h"
#include "ucnv.h"
#include "unicode/ucnv.h"
#define VALUE_STRING_LENGTH 32
/*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
@ -135,7 +135,7 @@ void UCNV_FROM_U_CALLBACK_SUBSTITUTE (UConverter * _this,
/*In case we're dealing with a modal converter a la UCNV_EBCDIC_STATEFUL,
we need to make sure that the emitting of the substitution charater in the right mode*/
icu_memcpy(togo, _this->subChar, togoLen = _this->subCharLen);
uprv_memcpy(togo, _this->subChar, togoLen = _this->subCharLen);
if (ucnv_getType(_this) == UCNV_EBCDIC_STATEFUL)
{
if ((_this->fromUnicodeStatus)&&(togoLen != 2))
@ -159,7 +159,7 @@ void UCNV_FROM_U_CALLBACK_SUBSTITUTE (UConverter * _this,
the subchar there and update the pointer */
if ((targetLimit - *target) >= togoLen)
{
icu_memcpy (*target, togo, togoLen);
uprv_memcpy (*target, togo, togoLen);
*target += togoLen;
*err = U_ZERO_ERROR;
if (offsets)
@ -176,14 +176,14 @@ void UCNV_FROM_U_CALLBACK_SUBSTITUTE (UConverter * _this,
*copy the rest in the internal buffer, and increase the
*length marker
*/
icu_memcpy (*target, togo, (targetLimit - *target));
uprv_memcpy (*target, togo, (targetLimit - *target));
if (offsets)
{
int i=0;
for (i=0;i<(targetLimit - *target);i++) offsets[i]=0;
offsets += (targetLimit - *target);
}
icu_memcpy (_this->charErrorBuffer + _this->charErrorBufferLength,
uprv_memcpy (_this->charErrorBuffer + _this->charErrorBufferLength,
togo + (targetLimit - *target),
togoLen - (targetLimit - *target));
_this->charErrorBufferLength += togoLen - (targetLimit - *target);
@ -244,7 +244,7 @@ void UCNV_FROM_U_CALLBACK_ESCAPE (UConverter * _this,
while (i < _this->invalidUCharLength)
{
itou (codepoint + 2, _this->invalidUCharBuffer[i++], 16, 4);
icu_memcpy (valueString + valueStringLength, codepoint, sizeof (UChar) * 6);
uprv_memcpy (valueString + valueStringLength, codepoint, sizeof (UChar) * 6);
valueStringLength += CODEPOINT_STRING_LENGTH - 1;
}
@ -282,7 +282,7 @@ void UCNV_FROM_U_CALLBACK_ESCAPE (UConverter * _this,
*/
if ((targetLimit - *target) >= valueStringLength)
{
icu_memcpy (*target, myTarget, valueStringLength);
uprv_memcpy (*target, myTarget, valueStringLength);
*target += valueStringLength;
*err = U_ZERO_ERROR;
@ -307,8 +307,8 @@ void UCNV_FROM_U_CALLBACK_ESCAPE (UConverter * _this,
for (i=0;i<(targetLimit - *target);i++) offsets[i]=0;
offsets += (targetLimit - *target);
}
icu_memcpy (*target, myTarget, (targetLimit - *target));
icu_memcpy (_this->charErrorBuffer + _this->charErrorBufferLength,
uprv_memcpy (*target, myTarget, (targetLimit - *target));
uprv_memcpy (_this->charErrorBuffer + _this->charErrorBufferLength,
myTarget + (targetLimit - *target),
valueStringLength - (targetLimit - *target));
_this->charErrorBufferLength += valueStringLength - (targetLimit - *target);
@ -391,7 +391,7 @@ void UCNV_TO_U_CALLBACK_ESCAPE (UConverter * _this,
while (i < _this->invalidCharLength)
{
itou (codepoint + 2, _this->invalidCharBuffer[i++], 16, 2);
icu_memcpy (uniValueString + valueStringLength, codepoint, sizeof (UChar) * 4);
uprv_memcpy (uniValueString + valueStringLength, codepoint, sizeof (UChar) * 4);
valueStringLength += 4;
}
@ -400,7 +400,7 @@ void UCNV_TO_U_CALLBACK_ESCAPE (UConverter * _this,
/*if we have enough space on the output buffer we just copy
* the subchar there and update the pointer
*/
icu_memcpy (*target, uniValueString, (sizeof (UChar)) * (valueStringLength));
uprv_memcpy (*target, uniValueString, (sizeof (UChar)) * (valueStringLength));
if (offsets)
{
for (i = 0; i < valueStringLength; i++) offsets[i] = 0;
@ -416,14 +416,14 @@ void UCNV_TO_U_CALLBACK_ESCAPE (UConverter * _this,
*copy the rest in the internal buffer, and increase the
*length marker
*/
icu_memcpy (*target, uniValueString, (sizeof (UChar)) * (targetLimit - *target));
uprv_memcpy (*target, uniValueString, (sizeof (UChar)) * (targetLimit - *target));
if (offsets)
{
for (i = 0; i < (targetLimit - *target); i++) offsets[i] = 0;
}
icu_memcpy (_this->UCharErrorBuffer,
uprv_memcpy (_this->UCharErrorBuffer,
uniValueString + (targetLimit - *target),
(sizeof (UChar)) * (valueStringLength - (targetLimit - *target)));
_this->UCharErrorBufferLength += valueStringLength - (targetLimit - *target);

View file

@ -1,151 +1 @@
/*
**********************************************************************
* Copyright (C) 1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
*
* ucnv_err.h:
* defines error behaviour functions called by T_UConverter_{from,to}Unicode
*
* These Functions, although public, should NEVER be called directly, they should be used as parameters to
* the T_UConverter_setMissing{Char,Unicode}Action API, to set the behaviour of a converter
* when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
*
* usage example:
*
* ...
* UErrorCode err = U_ZERO_ERROR;
* UConverter* myConverter = T_UConverter_create("ibm-949", &err);
*
* if (U_SUCCESS(err))
* {
* T_UConverter_setMissingUnicodeAction(myConverter, (MissingUnicodeAction)UCNV_FROM_U_CALLBACK_STOP, &err);
* T_UConverter_setMissingCharAction(myConverter, (MissingCharAction)UCNV_TO_U_CALLBACK_SUBSTITUTE, &err);
* }
* ...
*
* The code above tells "myConverter" to stop when it encounters a ILLEGAL/TRUNCATED/INVALID sequences when it is used to
* convert from Unicode -> Codepage.
* and to substitute with a codepage specific substitutions sequence when converting from Codepage -> Unicode
*/
#ifndef UCNV_ERR_H
#define UCNV_ERR_H
#include "ucnv.h"
#include "utypes.h"
/*Functor STOPS at the ILLEGAL_SEQUENCE */
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
/*Functor STOPS at the ILLEGAL_SEQUENCE */
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
/*Functor SKIPs the ILLEGAL_SEQUENCE */
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
/* Functor Substitute the ILLEGAL SEQUENCE with the current substitution string assiciated with _this,
* in the event target buffer is too small, it will store the extra info in the UConverter, and err
* will be set to U_INDEX_OUTOFBOUNDS_ERROR. The next time T_UConverter_fromUnicode is called, it will
* store the left over data in target, before transcoding the "source Stream"
*/
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
/* Functor Substitute the ILLEGAL SEQUENCE with a sequence escaped codepoints corresponding to the ILLEGAL
* SEQUENCE (format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). In the Event the Converter doesn't support the
* characters {u,%}[A-F][0-9], it will substitute the illegal sequence with the substitution characters
* (it will behave like the above functor).
* in the event target buffer is too small, it will store the extra info in the UConverter, and err
* will be set to U_INDEX_OUTOFBOUNDS_ERROR. The next time T_UConverter_fromUnicode is called, it will
* store the left over data in target, before transcoding the "source Stream"
*/
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
/*Functor SKIPs the ILLEGAL_SEQUENCE */
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
/* Functor Substitute the ILLEGAL SEQUENCE with the current substitution string assiciated with _this,
* in the event target buffer is too small, it will store the extra info in the UConverter, and err
* will be set to U_INDEX_OUTOFBOUNDS_ERROR. The next time T_UConverter_fromUnicode is called, it will
* store the left over data in target, before transcoding the "source Stream"
*/
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
/* Functor Substitute the ILLEGAL SEQUENCE with a sequence escaped codepoints corresponding to the
* ILLEGAL SEQUENCE (format %XNN, e.g. "%XFF%X0A%XC8%X03").
* in the event target buffer is too small, it will store the extra info in the UConverter, and err
* will be set to U_INDEX_OUTOFBOUNDS_ERROR. The next time T_UConverter_fromUnicode is called, it will
* store the left over data in target, before transcoding the "source Stream"
*/
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
#endif/*UCNV_ERR_H*/
#error Please include unicode/ucnv_err.h instead

View file

@ -20,7 +20,7 @@
#ifndef UCNV_IMP_H
#define UCNV_IMP_H
#include "utypes.h"
#include "unicode/utypes.h"
#ifndef UHASH_H
typedef struct _UHashtable UHashtable;

View file

@ -24,12 +24,12 @@
********************************************************************************
*/
#include "utypes.h"
#include "unicode/utypes.h"
#include "umutex.h"
#include "cstring.h"
#include "cmemory.h"
#include "ucnv_io.h"
#include "udata.h"
#include "unicode/udata.h"
/* Format of cnvalias.dat ------------------------------------------------------
*
@ -159,7 +159,7 @@ strHalfCaseCmp(const char *str1, const char *str2) {
return 1;
} else {
/* compare non-zero characters with lowercase */
rc=(int)c1-(int)(unsigned char)icu_tolower(c2);
rc=(int)c1-(int)(unsigned char)uprv_tolower(c2);
if(rc!=0) {
return rc;
}
@ -187,7 +187,7 @@ findAlias(const char *alias) {
/* convert the alias name to lowercase to do case-insensitive comparisons */
for(i=0; i<sizeof(name)-1 && *alias!=0; ++i) {
name[i]=icu_tolower(*alias++);
name[i]=uprv_tolower(*alias++);
}
name[i]=0;
@ -249,7 +249,7 @@ ucnv_io_getAlias(const char *alias, uint16_t index, UErrorCode *pErrorCode) {
const char *aliases=(const char *)aliasTable+*p;
while(index>0) {
/* skip a name, first the canonical converter name */
aliases+=icu_strlen(aliases)+1;
aliases+=uprv_strlen(aliases)+1;
--index;
}
return aliases;
@ -338,7 +338,7 @@ ucnv_io_getDefaultConverterName() {
/* local variable to be thread-safe */
const char *name=defaultConverterName;
if(name==NULL) {
const char *codepage=icu_getDefaultCodepage();
const char *codepage=uprv_getDefaultCodepage();
if(codepage!=NULL) {
UErrorCode errorCode=U_ZERO_ERROR;
name=ucnv_io_getConverterName(codepage, &errorCode);
@ -363,7 +363,7 @@ ucnv_io_setDefaultConverterName(const char *converterName) {
defaultConverterName=name;
} else {
/* do not set the name if the alias lookup failed and it is too long */
int32_t length=icu_strlen(converterName);
int32_t length=uprv_strlen(converterName);
if(length<sizeof(defaultConverterNameBuffer)) {
/* it was not found as an alias, so copy it - accept an empty name */
bool_t didLock;
@ -373,7 +373,7 @@ ucnv_io_setDefaultConverterName(const char *converterName) {
} else {
didLock=FALSE;
}
icu_memcpy(defaultConverterNameBuffer, converterName, length);
uprv_memcpy(defaultConverterNameBuffer, converterName, length);
defaultConverterNameBuffer[length]=0;
defaultConverterName=defaultConverterNameBuffer;
if(didLock) {

View file

@ -13,7 +13,7 @@
#ifndef UCNV_IO_H
#define UCNV_IO_H
#include "utypes.h"
#include "unicode/utypes.h"
/**
* Map a converter alias name to a canonical converter name.

View file

@ -17,13 +17,13 @@
* created by: Markus W. Scherer
*/
#include "utypes.h"
#include "putil.h"
#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "umutex.h"
#include "cmemory.h"
#include "cstring.h"
#include "filestrm.h"
#include "udata.h"
#include "unicode/udata.h"
#if !defined(HAVE_DLOPEN)
# define HAVE_DLOPEN 0
@ -151,8 +151,8 @@ LOAD_LIBRARY(const char *path, const char *basename, bool_t isCommon) {
UErrorCode errorCode=U_ZERO_ERROR;
/* set up the mapping name and the filename */
icu_strcpy(buffer, "icu ");
icu_strcat(buffer, basename);
uprv_strcpy(buffer, "icu ");
uprv_strcat(buffer, basename);
/* open the mapping */
map=OpenFileMapping(FILE_MAP_READ, FALSE, buffer);
@ -184,7 +184,7 @@ LOAD_LIBRARY(const char *path, const char *basename, bool_t isCommon) {
}
/* allocate the data structure */
pData=(UDataMemory *)icu_malloc(sizeof(UDataMemory));
pData=(UDataMemory *)uprv_malloc(sizeof(UDataMemory));
if(pData==NULL) {
UnmapViewOfFile(pData->p);
CloseHandle(map);
@ -212,7 +212,7 @@ udata_close(UDataMemory *pData) {
UnmapViewOfFile(pData->p);
CloseHandle(pData->map);
}
icu_free(pData);
uprv_free(pData);
}
}
@ -350,7 +350,7 @@ LOAD_LIBRARY(const char *path, const char *basename, bool_t isCommon) {
#endif
/* allocate the data structure */
pData=(UDataMemory *)icu_malloc(sizeof(UDataMemory));
pData=(UDataMemory *)uprv_malloc(sizeof(UDataMemory));
if(pData==NULL) {
munmap(data, length);
return NULL;
@ -376,7 +376,7 @@ udata_close(UDataMemory *pData) {
if(pData->length!=0 && munmap(pData->p, pData->length)==-1) {
perror("munmap");
}
icu_free(pData);
uprv_free(pData);
}
}
# endif
@ -409,7 +409,7 @@ getChoice(Library lib, const char *entry,
#define NO_LIBRARY NULL
#define IS_LIBRARY(lib) ((lib)!=NULL)
#define UNLOAD_LIBRARY(lib) icu_free(lib)
#define UNLOAD_LIBRARY(lib) uprv_free(lib)
static Library
LOAD_LIBRARY(const char *path, const char *basename, bool_t isCommon) {
@ -431,7 +431,7 @@ LOAD_LIBRARY(const char *path, const char *basename, bool_t isCommon) {
}
/* allocate the data structure */
pData=(UDataMemory *)icu_malloc(fileLength);
pData=(UDataMemory *)uprv_malloc(fileLength);
if(pData==NULL) {
T_FileStream_close(file);
return NULL;
@ -439,7 +439,7 @@ LOAD_LIBRARY(const char *path, const char *basename, bool_t isCommon) {
/* read the file */
if(fileLength!=T_FileStream_read(file, pData, fileLength)) {
icu_free(pData);
uprv_free(pData);
T_FileStream_close(file);
return NULL;
}
@ -452,7 +452,7 @@ LOAD_LIBRARY(const char *path, const char *basename, bool_t isCommon) {
U_CAPI void U_EXPORT2
udata_close(UDataMemory *pData) {
if(pData!=NULL) {
icu_free(pData);
uprv_free(pData);
}
}
@ -474,7 +474,7 @@ udata_getInfo(UDataMemory *pData, UDataInfo *pInfo) {
if(size>info->size) {
pInfo->size=info->size;
}
icu_memcpy((uint16_t *)pInfo+1, (uint16_t *)info+1, size-2);
uprv_memcpy((uint16_t *)pInfo+1, (uint16_t *)info+1, size-2);
} else {
pInfo->size=0;
}
@ -496,7 +496,7 @@ udata_close(UDataMemory *pData) {
if(IS_LIBRARY(pData->lib)) {
UNLOAD_LIBRARY(pData->lib);
}
icu_free(pData);
uprv_free(pData);
}
}
@ -518,14 +518,14 @@ getCommonMapData(const UDataMemory *data, const char *dataName) {
limit=*toc++; /* number of names in this table of contents */
while(start<limit-1) {
number=(start+limit)/2;
if(icu_strcmp(dataName, (const char *)(base+toc[2*number]))<0) {
if(uprv_strcmp(dataName, (const char *)(base+toc[2*number]))<0) {
limit=number;
} else {
start=number;
}
}
if(icu_strcmp(dataName, (const char *)(base+toc[2*start]))==0) {
if(uprv_strcmp(dataName, (const char *)(base+toc[2*start]))==0) {
/* found it */
return (MappedData *)(base+toc[2*start+1]);
} else {
@ -574,7 +574,7 @@ udata_getInfo(UDataMemory *pData, UDataInfo *pInfo) {
if(size>info->size) {
pInfo->size=info->size;
}
icu_memcpy((uint16_t *)pInfo+1, (uint16_t *)info+1, size-2);
uprv_memcpy((uint16_t *)pInfo+1, (uint16_t *)info+1, size-2);
} else {
pInfo->size=0;
}
@ -591,8 +591,8 @@ static const char *strcpy_dllentry(char *target, const char *src)
{
int i, length;
icu_strcpy(target,src);
length = icu_strlen(target);
uprv_strcpy(target,src);
length = uprv_strlen(target);
for(i=0;i<length;i++)
{
if(target[i] == '-')
@ -607,11 +607,11 @@ static const char *strcat_dllentry(char *target, const char *src)
{
int i, length;
i = icu_strlen(target); /* original size */
i = uprv_strlen(target); /* original size */
icu_strcat(target,src);
uprv_strcat(target,src);
length = i + icu_strlen(src);
length = i + uprv_strlen(src);
for(;i<length;i++)
{
@ -645,8 +645,8 @@ doOpenChoice(const char *path, const char *type, const char *name,
/* copy the path to the path buffer */
path=u_getDataDirectory();
if(path!=NULL && *path!=0) {
int length=icu_strlen(path);
icu_memcpy(pathBuffer, path, length);
int length=uprv_strlen(path);
uprv_memcpy(pathBuffer, path, length);
basename+=length;
hasPath=TRUE;
} else {
@ -655,11 +655,11 @@ doOpenChoice(const char *path, const char *type, const char *name,
/* add (prefix and) basename */
# ifndef LIB_PREFIX
icu_strcpy(basename, COMMON_DATA_NAME);
uprv_strcpy(basename, COMMON_DATA_NAME);
suffix=basename+COMMON_DATA_NAME_LENGTH;
# else
icu_memcpy(basename, LIB_PREFIX, LIB_PREFIX_LENGTH);
icu_strcpy(basename+LIB_PREFIX_LENGTH, COMMON_DATA_NAME);
uprv_memcpy(basename, LIB_PREFIX, LIB_PREFIX_LENGTH);
uprv_strcpy(basename+LIB_PREFIX_LENGTH, COMMON_DATA_NAME);
suffix=basename+LIB_PREFIX_LENGTH+COMMON_DATA_NAME_LENGTH;
# endif
hasBasename=TRUE;
@ -669,14 +669,14 @@ doOpenChoice(const char *path, const char *type, const char *name,
isICUData=FALSE;
/* find the last file sepator */
basename=icu_strrchr(path, '/');
basename=uprv_strrchr(path, '/');
if(basename==NULL) {
basename=(char *)path;
} else {
++basename;
}
basename2=icu_strrchr(basename, '\\');
basename2=uprv_strrchr(basename, '\\');
if(basename2!=NULL) {
basename=basename2+1;
}
@ -684,16 +684,16 @@ doOpenChoice(const char *path, const char *type, const char *name,
if(path!=basename) {
# ifndef LIB_PREFIX
/* copy the path/basename to the path buffer */
icu_strcpy(pathBuffer, path);
uprv_strcpy(pathBuffer, path);
basename=pathBuffer+(basename-path);
# else
/* copy the path to the path buffer */
icu_memcpy(pathBuffer, path, basename-path);
uprv_memcpy(pathBuffer, path, basename-path);
/* add prefix and basename */
suffix=pathBuffer+(basename-path);
icu_memcpy(suffix, LIB_PREFIX, LIB_PREFIX_LENGTH);
icu_strcpy(suffix+LIB_PREFIX_LENGTH, basename);
uprv_memcpy(suffix, LIB_PREFIX, LIB_PREFIX_LENGTH);
uprv_strcpy(suffix+LIB_PREFIX_LENGTH, basename);
basename=suffix;
# endif
hasPath=TRUE;
@ -701,8 +701,8 @@ doOpenChoice(const char *path, const char *type, const char *name,
/* copy the path to the path buffer */
path=u_getDataDirectory();
if(path!=NULL && *path!=0) {
int length=icu_strlen(path);
icu_memcpy(pathBuffer, path, length);
int length=uprv_strlen(path);
uprv_memcpy(pathBuffer, path, length);
suffix=pathBuffer+length;
hasPath=TRUE;
} else {
@ -712,16 +712,16 @@ doOpenChoice(const char *path, const char *type, const char *name,
/* add (prefix and) basename */
# ifndef LIB_PREFIX
icu_strcpy(suffix, basename);
uprv_strcpy(suffix, basename);
# else
icu_memcpy(suffix, LIB_PREFIX, LIB_PREFIX_LENGTH);
icu_strcpy(suffix+LIB_PREFIX_LENGTH, basename);
uprv_memcpy(suffix, LIB_PREFIX, LIB_PREFIX_LENGTH);
uprv_strcpy(suffix+LIB_PREFIX_LENGTH, basename);
# endif
basename=suffix;
}
hasBasename= *basename!=0;
if(hasBasename) {
suffix=basename+icu_strlen(basename);
suffix=basename+uprv_strlen(basename);
}
}
path=pathBuffer;
@ -731,19 +731,19 @@ doOpenChoice(const char *path, const char *type, const char *name,
#ifdef UDATA_DLL
strcpy_dllentry(entryNameBuffer, name);
#else
icu_strcpy(entryNameBuffer, name);
uprv_strcpy(entryNameBuffer, name);
#endif
# ifdef UDATA_DLL
icu_strcat(entryNameBuffer, "_");
uprv_strcat(entryNameBuffer, "_");
# else
icu_strcat(entryNameBuffer, ".");
uprv_strcat(entryNameBuffer, ".");
# endif
#ifdef UDATA_DLL
strcat_dllentry(entryNameBuffer, type);
#else
icu_strcat(entryNameBuffer, type);
uprv_strcat(entryNameBuffer, type);
#endif
entryName=entryNameBuffer;
@ -772,7 +772,7 @@ doOpenChoice(const char *path, const char *type, const char *name,
/* load the common data if neccessary */
if(!IS_LIBRARY(lib)) {
/* try path/basename first */
icu_strcpy(suffix, LIB_SUFFIX);
uprv_strcpy(suffix, LIB_SUFFIX);
lib=LOAD_LIBRARY(path, basename, TRUE);
if(!IS_LIBRARY(lib)) {
/* try basename only next */
@ -820,9 +820,9 @@ doOpenChoice(const char *path, const char *type, const char *name,
/* try basename+"_"+entryName[+LIB_SUFFIX] first */
if(p==NULL && hasBasename) {
*suffix='_';
icu_strcpy(suffix+1, entryName);
uprv_strcpy(suffix+1, entryName);
# ifdef UDATA_DLL
icu_strcat(suffix+1, LIB_SUFFIX);
uprv_strcat(suffix+1, LIB_SUFFIX);
# endif
/* try path/basename first */
@ -849,12 +849,12 @@ doOpenChoice(const char *path, const char *type, const char *name,
/* try entryName[+LIB_SUFFIX] next */
if(p==NULL) {
# ifndef LIB_PREFIX
icu_strcpy(basename, entryName);
uprv_strcpy(basename, entryName);
# else
icu_strcpy(basename+LIB_PREFIX_LENGTH, entryName);
uprv_strcpy(basename+LIB_PREFIX_LENGTH, entryName);
# endif
# ifdef UDATA_DLL
icu_strcat(basename, LIB_SUFFIX);
uprv_strcat(basename, LIB_SUFFIX);
# endif
/* try path/basename first */
@ -896,7 +896,7 @@ doOpenChoice(const char *path, const char *type, const char *name,
# endif
/* allocate the data structure */
pData=(UDataMemory *)icu_malloc(sizeof(UDataMemory));
pData=(UDataMemory *)uprv_malloc(sizeof(UDataMemory));
if(pData==NULL) {
if(IS_LIBRARY(lib)) {
UNLOAD_LIBRARY(lib);
@ -909,7 +909,7 @@ doOpenChoice(const char *path, const char *type, const char *name,
pData->lib=lib;
# else
/* defined(UDATA_MAP) && !IS_LIBRARY(lib) */
icu_memset(pData, 0, sizeof(pData));
uprv_memset(pData, 0, sizeof(pData));
# endif
pData->p=p;

View file

@ -1,215 +1 @@
/*
*******************************************************************************
* *
* COPYRIGHT: *
* (C) Copyright International Business Machines Corporation, 1999 *
* Licensed Material - Program-Property of IBM - All Rights Reserved. *
* US Government Users Restricted Rights - Use, duplication, or disclosure *
* restricted by GSA ADP Schedule Contract with IBM Corp. *
* *
*******************************************************************************
* file name: udata.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999oct25
* created by: Markus W. Scherer
*/
#ifndef __UDATA_H__
#define __UDATA_H__
#include "utypes.h"
/**
* Information about data memory.
* This structure may grow in the future, indicated by the
* <code>size</code> field.
*
* <p>The platform data property fields help determine if a data
* file can be efficiently used on a given machine.
* The particular fields are of importance only if the data
* is affected by the properties - if there is integer data
* with word sizes > 1 byte, char* text, or UChar* text.</p>
*
* <p>The implementation for the <code>udata_open[Choice]()</code>
* functions may reject data based on the value in <code>isBigEndian</code>.
* No other field is used by the <code>udata</code> API implementation.</p>
*
* <p>The <code>dataFormat</code> may be used to identify
* the kind of data, e.g. a converter table.</p>
*
* <p>The <code>formatVersion</code> field should be used to
* make sure that the format can be interpreted.
* I may be a good idea to check only for the one or two highest
* of the version elements to allow the data memory to
* get more or somewhat rearranged contents, for as long
* as the using code can still interpret the older contents.</p>
*
* <p>The <code>dataVersion</code> field is intended to be a
* common place to store the source version of the data;
* for data from the Unicode character database, this could
* reflect the Unicode version.</p>
*/
typedef struct {
/** @memo sizeof(UDataInfo) */
uint16_t size;
/** @memo unused, set to 0 */
uint16_t reservedWord;
/* platform data properties */
/** @memo 0 for little-endian machine, 1 for big-endian */
uint8_t isBigEndian;
/** @memo see U_CHARSET_FAMILY values in utypes.h */
uint8_t charsetFamily;
/** @memo sizeof(UChar), one of { 1, 2, 4 } */
uint8_t sizeofUChar;
/** @memo unused, set to 0 */
uint8_t reservedByte;
/** @memo data format identifier */
uint8_t dataFormat[4];
/** @memo versions: [0] major [1] minor [2] milli [3] micro */
uint8_t formatVersion[4];
uint8_t dataVersion[4];
} UDataInfo;
/* API for reading data -----------------------------------------------------*/
/**
* Forward declaration of the data memory type.
*/
typedef struct UDataMemory UDataMemory;
/**
* Callback function for udata_openChoice().
* @param context parameter passed into <code>udata_openChoice()</code>.
* @param type The type of the data as passed into <code>udata_openChoice()</code>.
* It may be <code>NULL</code>.
* @param name The name of the data as passed into <code>udata_openChoice()</code>.
* @param pInfo A pointer to the <code>UDataInfo</code> structure
* of data that has been loaded and will be returned
* by <code>udata_openChoice()</code> if this function
* returns <code>TRUE</code>.
* @return TRUE if the current data memory is acceptable
*/
typedef bool_t
UDataMemoryIsAcceptable(void *context,
const char *type, const char *name,
UDataInfo *pInfo);
/**
* Convenience function.
* This function works the same as <code>udata_openChoice</code>
* except that any data that matches the type and name
* is assumed to be acceptable.
*/
U_CAPI UDataMemory * U_EXPORT2
udata_open(const char *path, const char *type, const char *name,
UErrorCode *pErrorCode);
/**
* Data loading function.
* This function is used to find and load efficiently data for
* ICU and applications using ICU.
* It provides an abstract interface that allows to specify a data
* type and name to find and load the data.
*
* <p>The implementation depends on platform properties and user preferences
* and may involve loading shared libraries (DLLs), mapping
* files into memory, or fopen()/fread() files.
* It may also involve using static memory or database queries etc.
* Several or all data items may be combined into one entity
* (DLL, memory-mappable file).</p>
*
* <p>The data is always preceded by a header that includes
* a <code>UDataInfo</code> structure.
* The caller's <code>isAcceptable()</code> function is called to make
* sure that the data is useful. It may be called several times if it
* rejects the data and there is more than one location with data
* matching the type and name.</p>
*
* <p>If <code>path==NULL</code>, then ICU data is loaded.
* Otherwise, it is separated into a basename and a basename-less path string.
* If the path string is empty, then <code>u_getDataDirectory()</code>
* is set in its place.
* When data is loaded from files or DLLs (shared libraries) and
* may be stored in common files, then the data finding is roughly as follows:
* <ul>
* <li>common file at path/basename has entry name_type?</li>
* <li>common file at basename has entry name_type?</li>
* <li>separate file at path/basename_name_type?</li>
* <li>separate file at basename_name_type?</li>
* <li>separate file at path/name_type?</li>
* <li>separate file at name_type?</li>
* </ul>
* If the basename is empty, then only the last two options are attempted.
* Otherwise, it serves as a name for a common data file or as a basename
* (collection name) prefix for individual files.</p>
*
* @param path Specifies an absolute path and/or a basename for the
* finding of the data in the file system.
* <code>NULL</code> for ICU data.
* @param type A string that specifies the type of data to be loaded.
* For example, resource bundles are loaded with type "res",
* conversion tables with type "cnv".
* This may be <code>NULL</code> or empty.
* @param name A string that specifies the name of the data.
* @param isAcceptable This function is called to verify that loaded data
* is useful for the client code. If it returns FALSE
* for all data items, then <code>udata_openChoice()</code>
* will return with an error.
* @param context Arbitrary parameter to be passed into isAcceptable.
* @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
* @return A pointer (handle) to a data memory object, or <code>NULL</code>
* if an error occurs. Call <code>udata_getMemory()</code>
* to get a pointer to the actual data.
*/
U_CAPI UDataMemory * U_EXPORT2
udata_openChoice(const char *path, const char *type, const char *name,
UDataMemoryIsAcceptable *isAcceptable, void *context,
UErrorCode *pErrorCode);
/**
* Close the data memory.
* This function must be called to allow the system to
* release resources associated with this data memory.
*/
U_CAPI void U_EXPORT2
udata_close(UDataMemory *pData);
/**
* Get the pointer to the actual data inside the data memory.
* The data is read-only.
*/
U_CAPI const void * U_EXPORT2
udata_getMemory(UDataMemory *pData);
/**
* Get the information from the data memory header.
* This allows to get access to the header containing
* platform data properties etc. which is not part of
* the data itself and can therefore not be accessed
* via the pointer that <code>udata_getMemory()</code> returns.
*
* @param pData pointer to the data memory object
* @param pInfo pointer to a UDataInfo object;
* its <code>size</code> field must be set correctly,
* typically to <code>sizeof(UDataInfo)</code>.
*
* <code>*pInfo</code> will be filled with the UDataInfo structure
* in the data memory object. If this structure is smaller than
* <code>pInfo->size</code>, then the <code>size</code> will be
* adjusted and only part of the structure will be filled.
*/
U_CAPI void U_EXPORT2
udata_getInfo(UDataMemory *pData, UDataInfo *pInfo);
#endif
#error Please include unicode/udata.h instead

View file

@ -14,7 +14,7 @@
*/
#include "uhash.h"
#include "ustring.h"
#include "unicode/ustring.h"
#include "cstring.h"
#include "cmemory.h"
@ -90,7 +90,7 @@ uhash_openSize(UHashFunction func,
if(U_FAILURE(*status)) return NULL;
result = (UHashtable*) icu_malloc(sizeof(UHashtable));
result = (UHashtable*) uprv_malloc(sizeof(UHashtable));
if(result == 0) {
*status = U_MEMORY_ALLOCATION_ERROR;
return 0;
@ -107,7 +107,7 @@ uhash_openSize(UHashFunction func,
uhash_initialize(result, uhash_leastGreaterPrimeIndex(size), status);
if(U_FAILURE(*status)) {
icu_free(result);
uprv_free(result);
return 0;
}
@ -135,9 +135,9 @@ uhash_close(UHashtable *hash)
while (toBeDeletedCount--) my_free(toBeDeleted[toBeDeletedCount]);
}
icu_free(hash->values);
icu_free(hash->hashes);
icu_free(hash->toBeDeleted);
uprv_free(hash->values);
uprv_free(hash->hashes);
uprv_free(hash->toBeDeleted);
}
U_CAPI int32_t
@ -182,7 +182,7 @@ uhash_putKey(UHashtable *hash,
void * result = hash->values[index];
if (result != value) /*Make sure the same object isn't scheduled for a double deletion*/
{
hash->toBeDeleted = (void**) icu_realloc(hash->toBeDeleted, sizeof(void*)*(++(hash->toBeDeletedCount)));
hash->toBeDeleted = (void**) uprv_realloc(hash->toBeDeleted, sizeof(void*)*(++(hash->toBeDeletedCount)));
hash->toBeDeleted[(hash->toBeDeletedCount)-1] = result;
}
hash->values[index] = 0;
@ -232,7 +232,7 @@ uhash_put(UHashtable *hash,
void* result = hash->values[index];
if (result != value) /*Make sure the same object isn't scheduled for a double deletion*/
{
hash->toBeDeleted = (void**) icu_realloc(hash->toBeDeleted,
hash->toBeDeleted = (void**) uprv_realloc(hash->toBeDeleted,
sizeof(void*)*(++(hash->toBeDeletedCount)));
hash->toBeDeleted[(hash->toBeDeletedCount)-1] = result;
}
@ -344,16 +344,16 @@ uhash_initialize(UHashtable *hash,
hash->primeIndex = primeIndex;
hash->length = UHASH_PRIMES[primeIndex];
hash->values = (void**) icu_malloc(sizeof(void*) * hash->length);
hash->values = (void**) uprv_malloc(sizeof(void*) * hash->length);
if(hash->values == 0) {
*status = U_MEMORY_ALLOCATION_ERROR;
return;
}
hash->hashes = (int32_t*) icu_malloc(sizeof(int32_t) * hash->length);
hash->hashes = (int32_t*) uprv_malloc(sizeof(int32_t) * hash->length);
if(hash->values == 0) {
*status = U_MEMORY_ALLOCATION_ERROR;
icu_free(hash->values);
uprv_free(hash->values);
return;
}
@ -413,8 +413,8 @@ uhash_rehash(UHashtable *hash,
}
}
icu_free(oldValues);
icu_free(oldHashList);
uprv_free(oldValues);
uprv_free(oldHashList);
}
void
@ -536,7 +536,7 @@ uhash_hashString(const void *parm)
{
if(parm != NULL) {
const char *key = (const char*) parm;
int32_t len = icu_strlen(key);
int32_t len = uprv_strlen(key);
int32_t hash = UHASH_INVALID;
const char *limit = key + len;
int32_t inc = (len >= 128 ? len/64 : 1);

View file

@ -22,7 +22,7 @@
#ifndef UHASH_H
#define UHASH_H
#include "utypes.h"
#include "unicode/utypes.h"
/*
* Hashtable stores key-value pairs and does efficient lookup based on keys.

View file

@ -19,14 +19,14 @@
******************************************************************************/
#include "uloc.h"
#include "unicode/uloc.h"
#include "utypes.h"
#include "ures.h"
#include "uchar.h"
#include "unicode/utypes.h"
#include "unicode/ures.h"
#include "unicode/uchar.h"
#include "umutex.h"
#include "cstring.h"
#include "ustring.h"
#include "unicode/ustring.h"
#include "cmemory.h"
/****************************************************************************
@ -188,11 +188,11 @@ int16_t _findIndex(const char* list, int32_t listLength, const char* key)
const char* listEnd = anchor + listLength;
bool_t found = FALSE;
int index = 0;
int tokenSize = icu_strlen(list)+1; /*gets the size of the tokens*/
int tokenSize = uprv_strlen(list)+1; /*gets the size of the tokens*/
while (!found && list<listEnd)
{
if (icu_strcmp(key, list) == 0)
if (uprv_strcmp(key, list) == 0)
{
found = TRUE;
break;
@ -227,16 +227,16 @@ void uloc_setDefault(const char* newDefaultLocale,
if (newDefaultLocale == NULL)
{
newDefaultLocale = icu_getDefaultLocaleID();
newDefaultLocale = uprv_getDefaultLocaleID();
}
umtx_lock(NULL);
if(_defaultLocale == NULL)
_defaultLocale = (char*)icu_malloc(sizeof(char) * (icu_strlen(newDefaultLocale) + 1));
_defaultLocale = (char*)uprv_malloc(sizeof(char) * (uprv_strlen(newDefaultLocale) + 1));
else
_defaultLocale = (char*)icu_realloc(_defaultLocale,
sizeof(char) * (icu_strlen(newDefaultLocale) + 1));
icu_strcpy(_defaultLocale, newDefaultLocale);
_defaultLocale = (char*)uprv_realloc(_defaultLocale,
sizeof(char) * (uprv_strlen(newDefaultLocale) + 1));
uprv_strcpy(_defaultLocale, newDefaultLocale);
umtx_unlock(NULL);
/* propagate change to C++ */
@ -281,7 +281,7 @@ int32_t uloc_getParent(const char* localeID,
*err = U_BUFFER_OVERFLOW_ERROR;
}
if (parentCapacity>0) parent[icu_min(i,parentCapacity-1)] = '\0';
if (parentCapacity>0) parent[uprv_min(i,parentCapacity-1)] = '\0';
return i+1;
@ -316,7 +316,8 @@ uloc_getLanguage(const char* localeID,
if (languageCapacity > 0)
{
language[icu_min(i,languageCapacity-1)] = '\0';
language[uprv_min(i,languageCapacity-1)] = '\0';
}
}
return i+1;
@ -353,7 +354,7 @@ int32_t uloc_getCountry(const char* localeID,
*err = U_BUFFER_OVERFLOW_ERROR;
}
if (countryCapacity > 0) {country[icu_min(i,countryCapacity-1)] = '\0';}
if (countryCapacity > 0) {country[uprv_min(i,countryCapacity-1)] = '\0';}
return i+1;
}
@ -390,7 +391,7 @@ int32_t uloc_getVariant(const char* localeID,
}
if (variantCapacity>0) {variant[icu_min(i,variantCapacity-1)] = '\0';}
if (variantCapacity>0) {variant[uprv_min(i,variantCapacity-1)] = '\0';}
return i+1;
}
@ -441,20 +442,20 @@ int32_t uloc_getName(const char* localeID,
/*We fill in the users buffer*/
if ((nameCapacity>0) && cntSze)
{
if (U_SUCCESS(int_err)) icu_strcat(name, "_");
if (U_SUCCESS(int_err)) uprv_strcat(name, "_");
uloc_getCountry(localeID,
name + icu_strlen(name),
nameCapacity - icu_strlen(name),
name + uprv_strlen(name),
nameCapacity - uprv_strlen(name),
&int_err);
if (varSze)
{
if (U_SUCCESS(int_err)) icu_strcat(name, "_");
if (U_SUCCESS(int_err)) uprv_strcat(name, "_");
uloc_getVariant(localeID,
name + icu_strlen(name),
nameCapacity - icu_strlen(name),
name + uprv_strlen(name),
nameCapacity - uprv_strlen(name),
&int_err);
}
@ -542,7 +543,7 @@ int32_t uloc_getDisplayLanguage(const char* locale,
inLocale = uloc_getDefault();
isDefaultLocale = TRUE;
}
else if (icu_strcmp(inLocale, uloc_getDefault()) == 0) isDefaultLocale = TRUE;
else if (uprv_strcmp(inLocale, uloc_getDefault()) == 0) isDefaultLocale = TRUE;
/*truncates the fallback mechanism if we start out with a defaultLocale*/
if (locale == NULL) locale = uloc_getDefault();
@ -669,7 +670,7 @@ int32_t uloc_getDisplayCountry(const char* locale,
inLocale = uloc_getDefault();
isDefaultLocale = TRUE;
}
else if (icu_strcmp(inLocale, uloc_getDefault()) == 0) isDefaultLocale = TRUE;
else if (uprv_strcmp(inLocale, uloc_getDefault()) == 0) isDefaultLocale = TRUE;
/*truncates the fallback mechanism if we start out with a defaultLocale*/
if (locale == NULL) locale = uloc_getDefault();
@ -793,7 +794,7 @@ int32_t uloc_getDisplayVariant(const char* locale,
inLocale = uloc_getDefault();
isDefaultLocale = TRUE;
}
else if (icu_strcmp(inLocale, uloc_getDefault()) == 0) isDefaultLocale = TRUE;
else if (uprv_strcmp(inLocale, uloc_getDefault()) == 0) isDefaultLocale = TRUE;
/*truncates the fallback mechanism if we start out with a defaultLocale*/
if (locale == NULL) locale = uloc_getDefault();
@ -806,20 +807,20 @@ int32_t uloc_getDisplayVariant(const char* locale,
/*In case the variant is longer than our stack buffers*/
if (err == U_BUFFER_OVERFLOW_ERROR)
{
inVariant = (char*)icu_malloc(varBufSize*sizeof(char)+1);
inVariant = (char*)uprv_malloc(varBufSize*sizeof(char)+1);
if (inVariant == NULL) goto NO_MEMORY;
inVariantTag = (char*)icu_malloc(varBufSize*sizeof(char)+icu_strlen("%%")+1);
inVariantTag = (char*)uprv_malloc(varBufSize*sizeof(char)+uprv_strlen("%%")+1);
if (inVariantTag == NULL)
{
icu_free(inVariant);
uprv_free(inVariant);
goto NO_MEMORY;
}
err = U_ZERO_ERROR;
uloc_getVariant(locale, inVariant, varBufSize, &err);
}
icu_strcpy(inVariantTag,"%%");
icu_strcat(inVariantTag, inVariant);
uprv_strcpy(inVariantTag,"%%");
uprv_strcat(inVariantTag, inVariant);
/*We need to implement a fallback mechanism here because we are getting keys out of a
tagged array, there is no capability of doing this with fallback through the resource
@ -902,8 +903,8 @@ int32_t uloc_getDisplayVariant(const char* locale,
/*Clean up memory*/
if (inVariant != inVariantBuffer)
{
icu_free(inVariant);
icu_free(inVariantTag);
uprv_free(inVariant);
uprv_free(inVariantTag);
}
return i;
@ -1047,13 +1048,13 @@ void _lazyEvaluate_installedLocales()
{
temp = T_ResourceBundle_listInstalledLocales(u_getDataDirectory(),
&_installedLocalesCount);
temp2 = (char **) icu_malloc(sizeof(char*) * (_installedLocalesCount+1));
temp2 = (char **) uprv_malloc(sizeof(char*) * (_installedLocalesCount+1));
for (i = 0; i < _installedLocalesCount; i++)
{
strSize = u_strlen(T_UnicodeString_getUChars(temp[i]));
temp2[i] = (char*) icu_malloc(sizeof(char) *
temp2[i] = (char*) uprv_malloc(sizeof(char) *
(strSize + 1));
T_UnicodeString_extract(temp[i], temp2[i]);
@ -1067,8 +1068,8 @@ void _lazyEvaluate_installedLocales()
temp2 = NULL;
}
else {
for (i = 0; i < _installedLocalesCount; i++) icu_free(temp2[i]);
icu_free(temp2);
for (i = 0; i < _installedLocalesCount; i++) uprv_free(temp2[i]);
uprv_free(temp2);
}
umtx_unlock(NULL);
@ -1095,7 +1096,7 @@ const char* const* uloc_getISOLanguages()
if (_isoLanguages == NULL)
{
_isoLanguages = (char**) icu_malloc(sizeof(char*)*(1+(sizeof(_languages) / 3)));
_isoLanguages = (char**) uprv_malloc(sizeof(char*)*(1+(sizeof(_languages) / 3)));
end = _languages + (sizeof(_languages));
from = _languages;
@ -1132,7 +1133,7 @@ const char* const* uloc_getISOCountries()
if (_isoCountries == NULL)
{
_isoCountries = (char**) icu_malloc(sizeof(char*)*(1+(sizeof(_countries) / 3)));
_isoCountries = (char**) uprv_malloc(sizeof(char*)*(1+(sizeof(_countries) / 3)));
end = _countries + (sizeof(_countries));
from = _countries;

View file

@ -1,469 +1 @@
/*
**********************************************************************
* Copyright (C) 1997-1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* File ULOC.H
*
* Modification History:
*
* Date Name Description
* 04/01/97 aliu Creation.
* 08/22/98 stephen JDK 1.2 sync.
* 12/08/98 rtg New C API for Locale
* 03/30/99 damiba overhaul
* 03/31/99 helena Javadoc for uloc functions.
* 04/15/99 Madhu Updated Javadoc
********************************************************************************
*/
#ifndef ULOC_H
#define ULOC_H
#include "utypes.h"
/**
*
* A <code>Locale</code> represents a specific geographical, political,
* or cultural region. An operation that requires a <code>Locale</code> to perform
* its task is called <em>locale-sensitive</em> and uses the <code>Locale</code>
* to tailor information for the user. For example, displaying a number
* is a locale-sensitive operation--the number should be formatted
* according to the customs/conventions of the user's native country,
* region, or culture. In the C APIs, a locales is simply a const char string.
*
* <P>
* You create a <code>Locale</code> with one of the three options listed below.
* Each of the component is separated by '_' in the locale string.
* <blockquote>
* <pre>
* . newLanguage
* .
* . newLanguage + newCountry
* .
* . newLanguage + newCountry + newVariant
* </pre>
* </blockquote>
* The first option is a valid <STRONG>ISO
* Language Code.</STRONG> These codes are the lower-case two-letter
* codes as defined by ISO-639.
* You can find a full list of these codes at a number of sites, such as:
* <BR><a href ="http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt">
* <code>http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt</code></a>
*
* <P>
* The second option includes an additonal <STRONG>ISO Country
* Code.</STRONG> These codes are the upper-case two-letter codes
* as defined by ISO-3166.
* You can find a full list of these codes at a number of sites, such as:
* <BR><a href="http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html">
* <code>http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html</code></a>
*
* <P>
* The third option requires another additonal information--the
* <STRONG>Variant.</STRONG>
* The Variant codes are vendor and browser-specific.
* For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX.
* Where there are two variants, separate them with an underscore, and
* put the most important one first. For
* example, a Traditional Spanish collation might be referenced, with
* "ES", "ES", "Traditional_WIN".
*
* <P>
* Because a <code>Locale</code> is just an identifier for a region,
* no validity check is performed when you specify a <code>Locale</code>.
* If you want to see whether particular resources are available for the
* <code>Locale</code> you asked for, you must query those resources. For
* example, ask the <code>UNumberFormat</code> for the locales it supports
* using its <code>getAvailable</code> method.
* <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular
* locale, you get back the best available match, not necessarily
* precisely what you asked for. For more information, look at
* <a href="ures.html"><code>UResourceBundle</code></a>.
*
* <P>
* The <code>Locale</code> provides a number of convenient constants
* that you can use to specify the commonly used
* locales. For example, the following refers to a locale
* for the United States:
* <blockquote>
* <pre>
* . ULOC_US
* </pre>
* </blockquote>
*
* <P>
* Once you've specified a locale you can query it for information about
* itself. Use <code>uloc_getCountry</code> to get the ISO Country Code and
* <code>uloc_getLanguage</code> to get the ISO Language Code. You can
* use <code>uloc_getDisplayCountry</code> to get the
* name of the country suitable for displaying to the user. Similarly,
* you can use <code>uloc_getDisplayLanguage</code> to get the name of
* the language suitable for displaying to the user. Interestingly,
* the <code>uloc_getDisplayXXX</code> methods are themselves locale-sensitive
* and have two versions: one that uses the default locale and one
* that takes a locale as an argument and displays the name or country in
* a language appropriate to that locale.
*
* <P>
* The ICU provides a number of services that perform locale-sensitive
* operations. For example, the <code>unum_xxx</code> functions format
* numbers, currency, or percentages in a locale-sensitive manner.
* </P>
* <blockquote>
* <pre>
* . UErrorCode success = U_ZERO_ERROR;
* . UNumberFormat *nf;
* . const char* myLocale = "fr_FR";
* .
* . nf = unum_open( UNUM_DEFAULT, NULL, success );
* . unum_close(nf);
* . nf = unum_open( UNUM_CURRENCY, NULL, success );
* . unum_close(nf);
* . nf = unum_open( UNUM_PERCENT, NULL, success );
* . unum_close(nf);
* </pre>
* </blockquote>
* Each of these methods has two variants; one with an explicit locale
* and one without; the latter using the default locale.
* <blockquote>
* <pre>
* .
* . nf = unum_open( UNUM_DEFAULT, myLocale, success );
* . unum_close(nf);
* . nf = unum_open( UNUM_CURRENCY, myLocale, success );
* . unum_close(nf);
* . nf = unum_open( UNUM_PERCENT, myLocale, success );
* . unum_close(nf);
* </pre>
* </blockquote>
* A <code>Locale</code> is the mechanism for identifying the kind of services
* (<code>UNumberFormat</code>) that you would like to get. The locale is
* <STRONG>just</STRONG> a mechanism for identifying these services.
*
* <P>
* Each international serivce that performs locale-sensitive operations
* allows you
* to get all the available objects of that type. You can sift
* through these objects by language, country, or variant,
* and use the display names to present a menu to the user.
* For example, you can create a menu of all the collation objects
* suitable for a given language. Such classes implement these
* three class methods:
* <blockquote>
* <pre>
* . const char* uloc_getAvailable(int32_t index);
* . int32_t uloc_countAvailable();
* . int32_t
* . uloc_getDisplayName(const char* localeID,
* . const char* inLocaleID,
* . UChar* result,
* . int32_t maxResultSize,
* . UErrorCode* err);
* .
* </pre>
* </blockquote>
*/
/*
*
* Useful constants for language.
*/
#define ULOC_ENGLISH "en"
#define ULOC_FRENCH "fr"
#define ULOC_GERMAN "de"
#define ULOC_ITALIAN "it"
#define ULOC_JAPANESE "ja"
#define ULOC_KOREAN "ko"
#define ULOC_CHINESE "zh"
#define ULOC_SIMPLIFIED_CHINESE "zh_CN"
#define ULOC_TRADITIONAL_CHINESE "zh_TW"
/*
*
* Useful constants for country.
*/
#define ULOC_FRANCE "fr_FR"
#define ULOC_GERMANY "de_DE"
#define ULOC_ITALY "it_IT"
#define ULOC_JAPAN "ja_JP"
#define ULOC_KOREA "ko_KR"
#define ULOC_CHINA "zh_CN"
#define ULOC_PRC "zh_CN"
#define ULOC_TAIWAN "zh_TW"
#define ULOC_UK "en_GB"
#define ULOC_US "en_US"
#define ULOC_CANADA "en_CA"
#define ULOC_CANADA_FRENCH "fr_CA"
/**
* Gets the system's default locale.
*
* @return the system default locale
*/
U_CAPI const char* U_EXPORT2
uloc_getDefault(void);
/**
* Sets the system's default locale.
*
* @param localeID the new system default locale
* @param status the error information if the setting of default locale fails
*/
U_CAPI void U_EXPORT2
uloc_setDefault(const char* localeID,
UErrorCode* status);
/**
* Gets the language code for the specified locale.
*
* @param localeID the locale to get the ISO langauge code with
* @param language the langauge code for localeID
* @param languageCapacity the size of the language buffer to store the
* language code with
* @param err error information if retrieving the language code failed
* @return the actual buffer size needed for the langauge code. If it's greater
* than languageCapacity, the returned language code will be truncated.
*/
U_CAPI int32_t U_EXPORT2
uloc_getLanguage(const char* localeID,
char* language,
int32_t languageCapacity,
UErrorCode* err);
/**
* Gets the country code for the specified locale.
*
* @param localeID the locale to get the country code with
* @param country the country code for localeID
* @param languageCapacity the size of the coutry buffer to store the
* country code with
* @param err error information if retrieving the country code failed
* @return the actual buffer size needed for the country code. If it's greater
* than countryCapacity, the returned country code will be truncated.
*/
U_CAPI int32_t U_EXPORT2
uloc_getCountry(const char* localeID,
char* country,
int32_t countryCapacity,
UErrorCode* err);
/**
* Gets the variant code for the specified locale.
*
* @param localeID the locale to get the variant code with
* @param variant the variant code for localeID
* @param variantCapacity the size of the variant buffer to store the
* variant code with
* @param err error information if retrieving the variant code failed
* @return the actual buffer size needed for the variant code. If it's greater
* than variantCapacity, the returned variant code will be truncated.
*/
U_CAPI int32_t U_EXPORT2
uloc_getVariant(const char* localeID,
char* variant,
int32_t variantCapacity,
UErrorCode* err);
/**
* Gets the full name for the specified locale.
*
* @param localeID the locale to get the full name with
* @param name the full name for localeID
* @param nameCapacity the size of the name buffer to store the
* full name with
* @param err error information if retrieving the full name failed
* @return the actual buffer size needed for the full name. If it's greater
* than nameCapacity, the returned full name will be truncated.
*/
U_CAPI int32_t U_EXPORT2
uloc_getName(const char* localeID,
char* name,
int32_t nameCapacity,
UErrorCode* err);
/**
* Gets the ISO language code for the specified locale.
*
* @param localeID the locale to get the ISO langauge code with
* @return language the ISO langauge code for localeID
*/
U_CAPI const char* U_EXPORT2
uloc_getISO3Language(const char* localeID);
/**
* Gets the ISO country code for the specified locale.
*
* @param localeID the locale to get the ISO country code with
* @return country the ISO country code for localeID
*/
U_CAPI const char* U_EXPORT2
uloc_getISO3Country(const char* localeID);
/**
* Gets the Win32 LCID value for the specified locale.
*
* @param localeID the locale to get the Win32 LCID value with
* @return country the Win32 LCID for localeID
*/
U_CAPI uint32_t U_EXPORT2
uloc_getLCID(const char* localeID);
/**
* Gets the language name suitable for display for the specified locale.
*
* @param localeID the locale to get the ISO langauge code with
* @param language the displayable langauge code for localeID
* @param languageCapacity the size of the language buffer to store the
* displayable language code with
* @param err error information if retrieving the displayable language code failed
* @return the actual buffer size needed for the displayable langauge code. If it's greater
* than languageCapacity, the returned language code will be truncated.
*/
U_CAPI int32_t U_EXPORT2
uloc_getDisplayLanguage(const char* locale,
const char* inLocale,
UChar* language,
int32_t languageCapacity,
UErrorCode* status);
/**
* Gets the country name suitable for display for the specified locale.
*
* @param localeID the locale to get the displayable country code with
* @param country the displayable country code for localeID
* @param languageCapacity the size of the coutry buffer to store the
* displayable country code with
* @param err error information if retrieving the displayable country code failed
* @return the actual buffer size needed for the displayable country code. If it's greater
* than countryCapacity, the returned displayable country code will be truncated.
*/
U_CAPI int32_t U_EXPORT2
uloc_getDisplayCountry(const char* locale,
const char* inLocale,
UChar* country,
int32_t countryCapacity,
UErrorCode* status); /* NULL may be used to specify the default */
/**
* Gets the variant code suitable for display for the specified locale.
*
* @param localeID the locale to get the displayable variant code with
* @param variant the displayable variant code for localeID
* @param variantCapacity the size of the variant buffer to store the
* displayable variant code with
* @param err error information if retrieving the displayable variant code failed
* @return the actual buffer size needed for the displayable variant code. If it's greater
* than variantCapacity, the returned displayable variant code will be truncated.
*/
U_CAPI int32_t U_EXPORT2
uloc_getDisplayVariant(const char* locale,
const char* inLocale,
UChar* variant,
int32_t variantCapacity,
UErrorCode* status); /* NULL may be used to specify the default */
/**
* Gets the full name suitable for display for the specified locale.
*
* @param localeID the locale to get the displayable name with
* @param variant the displayable name for localeID
* @param variantCapacity the size of the name buffer to store the
* displayable full name with
* @param err error information if retrieving the displayable name failed
* @return the actual buffer size needed for the displayable name. If it's greater
* than variantCapacity, the returned displayable name will be truncated.
*/
U_CAPI int32_t U_EXPORT2
uloc_getDisplayName(const char* localeID,
const char* inLocaleID, /* NULL may be used to specify the default */
UChar* result,
int32_t maxResultSize,
UErrorCode* err);
/**
*
* Gets the specified locale from a list of all available locales.
* The return value is a pointer to an item of
* a locale name array. Both this array and the pointers
* it contains are owned by ICU and should not be deleted or written through
* by the caller. The locale name is terminated by a null pointer.
* @param index the specific locale name index of the available locale list
* @return a specified locale name of all available locales
*/
U_CAPI const char* U_EXPORT2
uloc_getAvailable(int32_t index);
/**
* Gets the size of the all available locale list.
*
* @return the size of the locale list
*/
U_CAPI int32_t U_EXPORT2 uloc_countAvailable(void);
/**
*
* Gets a list of all available language codes defined in ISO 639. This is a pointer
* to an array of pointers to arrays of char. All of these pointers are owned
* by ICU-- do not delete them, and do not write through them. The array is
* terminated with a null pointer.
* @return a list of all available language codes
*/
U_CAPI const char* const* U_EXPORT2
uloc_getISOLanguages(void);
/**
*
* Gets a list of all available 2-letter country codes defined in ISO 639. This is a
* pointer to an array of pointers to arrays of char. All of these pointers are
* owned by ICU-- do not delete them, and do not write through them. The array is
* terminated with a null pointer.
* @return a list of all available country codes
*/
U_CAPI const char* const* U_EXPORT2
uloc_getISOCountries(void);
/**
* Deprecated 1999dec14 - Gets the directory containing the locale data files.
*
* @return the locale data file directory
*/
#define uloc_getDataDirectory u_getDataDirectory
/**
* Deprecated 1999dec14 - Sets the directory containing the locale data files.
*
* @return the new directory to fetch locale data from
*/
#define uloc_setDataDirectory u_setDataDirectory
/*Internal function */
int32_t U_EXPORT2
uloc_getParent(const char* localeID,
char* parent,
int32_t parentCapacity,
UErrorCode* err);
/*eof*/
#endif /*_ULOC*/
#error Please include unicode/uloc.h instead

View file

@ -1,33 +1 @@
/*
**********************************************************************
* Copyright (C) 1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: umisc.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999oct15
* created by: Markus W. Scherer
*/
#ifndef UMISC_H
#define UMISC_H
#include "utypes.h"
/* This file contains miscellaneous definitions for the C APIs. */
/** A struct representing a range of text containing a specific field */
struct UFieldPosition {
/** The field */
int32_t field;
/** The start of the text range containing field */
int32_t beginIndex;
/** The limit of the text range containing field */
int32_t endIndex;
};
typedef struct UFieldPosition UFieldPosition;
#endif
#error Please include unicode/umisc.h instead

View file

@ -37,7 +37,7 @@
/* Check our settings... */
#include "utypes.h"
#include "unicode/utypes.h"
/* APP_NO_THREADS is an old symbol. We'll honour it if present. */
#ifdef APP_NO_THREADS
@ -154,12 +154,12 @@ if( mutex == NULL ) /* initialize the global mutex */
return;
#if defined( _WIN32 )
*mutex = icu_malloc(sizeof(CRITICAL_SECTION));
*mutex = uprv_malloc(sizeof(CRITICAL_SECTION));
InitializeCriticalSection((CRITICAL_SECTION*)*mutex);
#elif defined( POSIX )
*mutex = icu_malloc(sizeof(pthread_mutex_t));
*mutex = uprv_malloc(sizeof(pthread_mutex_t));
#if defined(HPUX)
pthread_mutex_init((pthread_mutex_t*)*mutex, pthread_mutexattr_default);

View file

@ -18,7 +18,7 @@
#ifndef UMUTEX_H
#define UMUTEX_H
#include "utypes.h"
#include "unicode/utypes.h"
#ifndef XP_CPLUSPLUS
typedef void * Mutex;

View file

@ -22,11 +22,11 @@
# define U_COMMON_IMPLEMENTATION
#endif
#include "utypes.h"
#include "unicode/utypes.h"
#include "umutex.h"
#include "cmemory.h"
#include "uchar.h"
#include "udata.h"
#include "unicode/uchar.h"
#include "unicode/udata.h"
/* prototypes --------------------------------------------------------------- */

View file

@ -33,9 +33,9 @@
// 11/22/99 aliu Added MIN_RADIX, MAX_RADIX, digit, forDigit
//********************************************************************************************
#include "unicode.h"
#include "unicode/unicode.h"
#include "uchar.h"
#include "unicode/uchar.h"
const UChar Unicode::MIN_VALUE = 0x0000;

View file

@ -1,779 +1 @@
/*
*****************************************************************************************
* Copyright (C) 1996-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*****************************************************************************************
*/
// FILE NAME : unicode.h
//
// CREATED
// Wednesday, December 11, 1996
//
// CREATED BY
// Helena Shih
//
// CHANGES
// Thursday, April 15, 1999
// Modified the definitions of all the functions
// C++ Wrappers for Unicode
// CHANGES BY
// Madhu Katragadda
// 5/20/99 Madhu Added the function getVersion()
// 11/22/99 aliu Added MIN_RADIX, MAX_RADIX, digit, forDigit
//********************************************************************************************
#ifndef UNICODE_H
#define UNICODE_H
#include "utypes.h"
#include "uchar.h"
/**
* The Unicode class allows you to query the properties associated with individual
* Unicode character values.
* <p>
* The Unicode character information, provided implicitly by the
* Unicode character encoding standard, includes information about the sript
* (for example, symbols or control characters) to which the character belongs,
* as well as semantic information such as whether a character is a digit or
* uppercase, lowercase, or uncased.
* <P>
* @subclassing Do not subclass.
*/
class U_COMMON_API Unicode
{
public:
/**
* The minimum value a UChar can have. The lowest value a
* UChar can have is 0x0000.
*/
static const UChar MIN_VALUE;
/**
* The maximum value a UChar can have. The greatest value a
* UChar can have is 0xffff.
*/
static const UChar MAX_VALUE;
/**
* Public data for enumerated Unicode general category types
*/
enum EUnicodeGeneralTypes
{
UNASSIGNED = 0,
UPPERCASE_LETTER = 1,
LOWERCASE_LETTER = 2,
TITLECASE_LETTER = 3,
MODIFIER_LETTER = 4,
OTHER_LETTER = 5,
NON_SPACING_MARK = 6,
ENCLOSING_MARK = 7,
COMBINING_SPACING_MARK = 8,
DECIMAL_DIGIT_NUMBER = 9,
LETTER_NUMBER = 10,
OTHER_NUMBER = 11,
SPACE_SEPARATOR = 12,
LINE_SEPARATOR = 13,
PARAGRAPH_SEPARATOR = 14,
CONTROL = 15,
FORMAT = 16,
PRIVATE_USE = 17,
SURROGATE = 18,
DASH_PUNCTUATION = 19,
START_PUNCTUATION = 20,
END_PUNCTUATION = 21,
CONNECTOR_PUNCTUATION = 22,
OTHER_PUNCTUATION = 23,
MATH_SYMBOL = 24,
CURRENCY_SYMBOL = 25,
MODIFIER_SYMBOL = 26,
OTHER_SYMBOL = 27,
INITIAL_PUNCTUATION = 28,
FINAL_PUNCTUATION = 29,
GENERAL_TYPES_COUNT = 30
};
enum EUnicodeScript
{
kBasicLatin,
kLatin1Supplement,
kLatinExtendedA,
kLatinExtendedB,
kIPAExtension,
kSpacingModifier,
kCombiningDiacritical,
kGreek,
kCyrillic,
kArmenian,
kHebrew,
kArabic,
kDevanagari,
kBengali,
kGurmukhi,
kGujarati,
kOriya,
kTamil,
kTelugu,
kKannada,
kMalayalam,
kThai,
kLao,
kTibetan,
kGeorgian,
kHangulJamo,
kLatinExtendedAdditional,
kGreekExtended,
kGeneralPunctuation,
kSuperSubScript,
kCurrencySymbolScript,
kSymbolCombiningMark,
kLetterlikeSymbol,
kNumberForm,
kArrow,
kMathOperator,
kMiscTechnical,
kControlPicture,
kOpticalCharacter,
kEnclosedAlphanumeric,
kBoxDrawing,
kBlockElement,
kGeometricShape,
kMiscSymbol,
kDingbat,
kCJKSymbolPunctuation,
kHiragana,
kKatakana,
kBopomofo,
kHangulCompatibilityJamo,
kKanbun,
kEnclosedCJKLetterMonth,
kCJKCompatibility,
kCJKUnifiedIdeograph,
kHangulSyllable,
kHighSurrogate,
kHighPrivateUseSurrogate,
kLowSurrogate,
kPrivateUse,
kCJKCompatibilityIdeograph,
kAlphabeticPresentation,
kArabicPresentationA,
kCombiningHalfMark,
kCJKCompatibilityForm,
kSmallFormVariant,
kArabicPresentationB,
kNoScript,
kHalfwidthFullwidthForm,
kScriptCount
};
/**
* This specifies the language directional property of a character set.
*/
enum EDirectionProperty {
LEFT_TO_RIGHT = 0,
RIGHT_TO_LEFT = 1,
EUROPEAN_NUMBER = 2,
EUROPEAN_NUMBER_SEPARATOR = 3,
EUROPEAN_NUMBER_TERMINATOR = 4,
ARABIC_NUMBER = 5,
COMMON_NUMBER_SEPARATOR = 6,
BLOCK_SEPARATOR = 7,
SEGMENT_SEPARATOR = 8,
WHITE_SPACE_NEUTRAL = 9,
OTHER_NEUTRAL = 10,
LEFT_TO_RIGHT_EMBEDDING = 11,
LEFT_TO_RIGHT_OVERRIDE = 12,
RIGHT_TO_LEFT_ARABIC = 13,
RIGHT_TO_LEFT_EMBEDDING = 14,
RIGHT_TO_LEFT_OVERRIDE = 15,
POP_DIRECTIONAL_FORMAT = 16,
DIR_NON_SPACING_MARK = 17,
BOUNDARY_NEUTRAL = 18
};
/**
* Values returned by the getCellWidth() function.
* @see Unicode#getCellWidth
*/
enum ECellWidths
{
ZERO_WIDTH = 0,
HALF_WIDTH = 1,
FULL_WIDTH = 2,
NEUTRAL = 3
};
/**
* The minimum radix available for conversion to and from Strings.
* The constant value of this field is the smallest value permitted
* for the radix argument in radix-conversion methods such as the
* <code>digit</code> method and the <code>forDigit</code>
* method.
*
* @see Unicode#digit
* @see Unicode#forDigit
*/
static const int8_t MIN_RADIX;
/**
* The maximum radix available for conversion to and from Strings.
* The constant value of this field is the largest value permitted
* for the radix argument in radix-conversion methods such as the
* <code>digit</code> method and the <code>forDigit</code>
* method.
*
* @see Unicode#digit
* @see Unicode#forDigit
*/
static const int8_t MAX_RADIX;
/**
* Determines whether the specified UChar is a lowercase character
* according to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the character is lowercase; false otherwise.
*
* @see Unicode#isUpperCase
* @see Unicode#isTitleCase
* @see Unicode#toLowerCase
*/
static bool_t isLowerCase(UChar ch);
/**
* Determines whether the specified character is an uppercase character
* according to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the character is uppercase; false otherwise.
* @see Unicode#isLowerCase
* @see Unicode#isTitleCase
* @see Unicode#toUpperCase
*/
static bool_t isUpperCase(UChar ch);
/**
* Determines whether the specified character is a titlecase character
* according to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the character is titlecase; false otherwise.
* @see Unicode#isUpperCase
* @see Unicode#isLowerCase
* @see Unicode#toTitleCase
*/
static bool_t isTitleCase(UChar ch);
/**
* Determines whether the specified character is a digit according to Unicode
* 2.1.2.
*
* @param ch the character to be tested
* @return true if the character is a digit; false otherwise.
* @see Unicode#digit
* @see Unicode#forDigit
* @see Unicode#digitValue
*/
static bool_t isDigit(UChar ch);
/**
* Determines whether the specified numeric value is actually a defined character
* according to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the character has a defined Unicode meaning; false otherwise.
*
* @see Unicode#isDigit
* @see Unicode#isLetter
* @see Unicode#isLetterOrDigit
* @see Unicode#isUpperCase
* @see Unicode#isLowerCase
* @see Unicode#isTitleCase
*/
static bool_t isDefined(UChar ch);
/**
* Determines whether the specified character is a control character according
* to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the Unicode character is a control character; false otherwise.
*
* @see Unicode#isPrintable
*/
static bool_t isControl(UChar ch);
/**
* Determines whether the specified character is a printable character according
* to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the Unicode character is a printable character; false otherwise.
*
* @see Unicode#isControl
*/
static bool_t isPrintable(UChar ch);
/**
* Determines whether the specified character is of the base form according
* to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the Unicode character is of the base form; false otherwise.
*
* @see Unicode#isLetter
* @see Unicode#isDigit
*/
static bool_t isBaseForm(UChar ch);
/**
* Determines whether the specified character is a letter
* according to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the character is a letter; false otherwise.
*
*
* @see Unicode#isDigit
* @see Unicode#isLetterOrDigit
* @see Unicode#isUpperCase
* @see Unicode#isLowerCase
* @see Unicode#isTitleCase
*/
static bool_t isLetter(UChar ch);
/**
* A convenience method for determining if a Unicode character
* is allowed as the first character in a Java identifier.
* <P>
* A character may start a Java identifier if and only if
* it is one of the following:
* <ul>
* <li> a letter
* <li> a currency symbol (such as "$")
* <li> a connecting punctuation symbol (such as "_").
* </ul>
*
* @param ch the Unicode character.
* @return TRUE if the character may start a Java identifier;
* FALSE otherwise.
* @see isJavaIdentifierPart
* @see isLetter
* @see isUnicodeIdentifierStart
*/
static bool_t isJavaIdentifierStart(UChar ch);
/**
* A convenience method for determining if a Unicode character
* may be part of a Java identifier other than the starting
* character.
* <P>
* A character may be part of a Java identifier if and only if
* it is one of the following:
* <ul>
* <li> a letter
* <li> a currency symbol (such as "$")
* <li> a connecting punctuation character (such as "_").
* <li> a digit
* <li> a numeric letter (such as a Roman numeral character)
* <li> a combining mark
* <li> a non-spacing mark
* <li> an ignorable control character
* </ul>
*
* @param ch the Unicode character.
* @return TRUE if the character may be part of a Unicode identifier;
* FALSE otherwise.
* @see isIdentifierIgnorable
* @see isJavaIdentifierStart
* @see isLetter
* @see isDigit
* @see isUnicodeIdentifierPart
*/
static bool_t isJavaIdentifierPart(UChar ch);
/**
* A convenience method for determining if a Unicode character
* is allowed to start in a Unicode identifier.
* A character may start a Unicode identifier if and only if
* it is a letter.
*
* @param ch the Unicode character.
* @return TRUE if the character may start a Unicode identifier;
* FALSE otherwise.
* @see isJavaIdentifierStart
* @see isLetter
* @see isUnicodeIdentifierPart
*/
static bool_t isUnicodeIdentifierStart(UChar ch);
/**
* A convenience method for determining if a Unicode character
* may be part of a Unicode identifier other than the starting
* character.
* <P>
* A character may be part of a Unicode identifier if and only if
* it is one of the following:
* <ul>
* <li> a letter
* <li> a connecting punctuation character (such as "_").
* <li> a digit
* <li> a numeric letter (such as a Roman numeral character)
* <li> a combining mark
* <li> a non-spacing mark
* <li> an ignorable control character
* </ul>
*
* @param ch the Unicode character.
* @return TRUE if the character may be part of a Unicode identifier;
* FALSE otherwise.
* @see isIdentifierIgnorable
* @see isJavaIdentifierPart
* @see isLetterOrDigit
* @see isUnicodeIdentifierStart
*/
static bool_t isUnicodeIdentifierPart(UChar ch);
/**
* A convenience method for determining if a Unicode character
* should be regarded as an ignorable character in a Java
* identifier or a Unicode identifier.
* <P>
* The following Unicode characters are ignorable in a Java identifier
* or a Unicode identifier:
* <table>
* <tr><td>0x0000 through 0x0008,</td>
* <td>ISO control characters that</td></tr>
* <tr><td>0x000E through 0x001B,</td> <td>are not whitespace</td></tr>
* <tr><td>and 0x007F through 0x009F</td></tr>
* <tr><td>0x200C through 0x200F</td> <td>join controls</td></tr>
* <tr><td>0x200A through 0x200E</td> <td>bidirectional controls</td></tr>
* <tr><td>0x206A through 0x206F</td> <td>format controls</td></tr>
* <tr><td>0xFEFF</td> <td>zero-width no-break space</td></tr>
* </table>
*
* @param ch the Unicode character.
* @return TRUE if the character may be part of a Unicode identifier;
* FALSE otherwise.
* @see isJavaIdentifierPart
* @see isUnicodeIdentifierPart
*/
static bool_t isIdentifierIgnorable(UChar ch);
/**
* The given character is mapped to its lowercase equivalent according to
* Unicode 2.1.2; if the character has no lowercase equivalent, the character
* itself is returned.
* <P>
* A character has a lowercase equivalent if and only if a lowercase mapping
* is specified for the character in the Unicode 2.0 attribute table.
* <P>
* Unicode::toLowerCase() only deals with the general letter case conversion.
* For language specific case conversion behavior, use UnicodeString::toLower().
* For example, the case conversion for dot-less i and dotted I in Turkish,
* or for final sigma in Greek.
*
* @param ch the character to be converted
* @return the lowercase equivalent of the character, if any;
* otherwise the character itself.
*
* @see UnicodeString#toLower
* @see Unicode#isLowerCase
* @see Unicode#isUpperCase
* @see Unicode#toUpperCase
* @see Unicode#toTitleCase
*/
static UChar toLowerCase(UChar ch);
/**
* The given character is mapped to its uppercase equivalent according to Unicode
* 2.1.2; if the character has no uppercase equivalent, the character itself is
* returned.
* <P>
* Unicode::toUpperCase() only deals with the general letter case conversion.
* For language specific case conversion behavior, use UnicodeString::toUpper().
* For example, the case conversion for dot-less i and dotted I in Turkish,
* or ess-zed (i.e., "sharp S") in German.
*
* @param ch the character to be converted
* @return the uppercase equivalent of the character, if any;
* otherwise the character itself.
*
* @see UnicodeString#toUpper
* @see Unicode#isUpperCase
* @see Unicode#isLowerCase
* @see Unicode#toLowerCase
* @see Unicode#toTitleCase
*/
static UChar toUpperCase(UChar ch);
/**
* The given character is mapped to its titlecase equivalent according to Unicode
* 2.1.2. There are only four Unicode characters that are truly titlecase forms
* that are distinct from uppercase forms. As a rule, if a character has no
* true titlecase equivalent, its uppercase equivalent is returned.
* <P>
* A character has a titlecase equivalent if and only if a titlecase mapping
* is specified for the character in the Unicode 2.1.2 data.
*
* @param ch the character to be converted
* @return the titlecase equivalent of the character, if any;
* otherwise the character itself.
* @see Unicode#isTitleCase
* @see Unicode#toUpperCase
* @see Unicode#toLowerCase
*/
static UChar toTitleCase(UChar ch);
/**
* Determines if the specified character is a Unicode space character
* according to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the character is a space character; false otherwise.
*/
static bool_t isSpaceChar(UChar ch);
/**
* Returns a value indicating a character category according to Unicode
* 2.1.2.
* @param ch the character to be tested
* @return a value of type int, the character category.
* @see Unicode#UNASSIGNED
* @see Unicode#UPPERCASE_LETTER
* @see Unicode#LOWERCASE_LETTER
* @see Unicode#TITLECASE_LETTER
* @see Unicode#MODIFIER_LETTER
* @see Unicode#OTHER_LETTER
* @see Unicode#NON_SPACING_MARK
* @see Unicode#ENCLOSING_MARK
* @see Unicode#COMBINING_SPACING_MARK
* @see Unicode#DECIMAL_DIGIT_NUMBER
* @see Unicode#OTHER_NUMBER
* @see Unicode#SPACE_SEPARATOR
* @see Unicode#LINE_SEPARATOR
* @see Unicode#PARAGRAPH_SEPARATOR
* @see Unicode#CONTROL
* @see Unicode#PRIVATE_USE
* @see Unicode#SURROGATE
* @see Unicode#DASH_PUNCTUATION
* @see Unicode#OPEN_PUNCTUATION
* @see Unicode#CLOSE_PUNCTUATION
* @see Unicode#CONNECTOR_PUNCTUATION
* @see Unicode#OTHER_PUNCTUATION
* @see Unicode#LETTER_NUMBER
* @see Unicode#MATH_SYMBOL
* @see Unicode#CURRENCY_SYMBOL
* @see Unicode#MODIFIER_SYMBOL
* @see Unicode#OTHER_SYMBOL
*/
static int8_t getType(UChar ch);
/**
* Returns the linguistic direction property of a character.
* <P>
* Returns the linguistic direction property of a character.
* For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
* property.
* @see #EDirectionProperty
*/
static EDirectionProperty characterDirection(UChar ch);
/**
* Returns the script associated with a character.
* @see #EUnicodeScript
*/
static EUnicodeScript getScript(UChar ch);
/**
* Returns a value indicating the display-cell width of the character
* when used in Asian text, according to the Unicode standard (see p. 6-130
* of The Unicode Standard, Version 2.0). The results for various characters
* are as follows:
* <P>
* ZERO_WIDTH: Characters which are considered to take up no display-cell space:
* control characters
* format characters
* line and paragraph separators
* non-spacing marks
* combining Hangul jungseong
* combining Hangul jongseong
* unassigned Unicode values
* <P>
* HALF_WIDTH: Characters which take up half a cell in standard Asian text:
* all characters in the General Scripts Area except combining Hangul choseong
* and the characters called out specifically above as ZERO_WIDTH
* alphabetic and Arabic presentation forms
* halfwidth CJK punctuation
* halfwidth Katakana
* halfwidth Hangul Jamo
* halfwidth forms, arrows, and shapes
* <P>
* FULL_WIDTH: Characters which take up a full cell in standard Asian text:
* combining Hangul choseong
* all characters in the CJK Phonetics and Symbols Area
* all characters in the CJK Ideographs Area
* all characters in the Hangul Syllables Area
* CJK compatibility ideographs
* CJK compatibility forms
* small form variants
* fullwidth ASCII
* fullwidth punctuation and currency signs
* <P>
* NEUTRAL: Characters whose cell width is context-dependent:
* all characters in the Symbols Area, except those specifically called out above
* all characters in the Surrogates Area
* all charcaters in the Private Use Area
* <P>
* For Korean text, this algorithm should work properly with properly normalized Korean
* text. Precomposed Hangul syllables and non-combining jamo are all considered full-
* width characters. For combining jamo, we treat we treat choseong (initial consonants)
* as double-width characters and junseong (vowels) and jongseong (final consonants)
* as non-spacing marks. This will work right in text that uses the precomposed
* choseong characters instead of teo choseong characters in a row, and which uses the
* choseong filler character at the beginning of syllables that don't have an initial
* consonant. The results may be slightly off with Korean text following different
* conventions.
*/
static uint16_t getCellWidth(UChar ch);
/**
* Retrieve the name of a Unicode character.
* Depending on <code>nameChoice</code>, the character name written
* into the buffer is the "modern" name or the name that was defined
* in Unicode version 1.0.
* The name contains only "invariant" characters
* like A-Z, 0-9, space, and '-'.
*
* @param code The character (code point) for which to get the name.
* It must be <code>0&lt;=code&lt;0x10ffff</code>.
* @param buffer Destination address for copying the name.
* @param bufferLength <code>==sizeof(buffer)</code>
* @param nameChoice Selector for which name to get.
*
* @see UCharNameChoice
*
* Example:
* <pre>
* &#32; char buffer[100];
* &#32; UTextOffset length=Unicode::getCharName(
* &#32; 0x284, buffer, sizeof(buffer));
* &#32;
* &#32; // use invariant-character conversion to Unicode
* &#32; UnicodeString name(buffer, length, "");
* </pre>
*/
static inline UTextOffset
getCharName(uint32_t code,
char *buffer, UTextOffset bufferLength,
UCharNameChoice nameChoice=U_UNICODE_CHAR_NAME);
/**
* Retrives the decimal numeric value of a digit character.
* @param ch the digit character for which to get the numeric value
* @return the numeric value of ch in decimal radix. This method returns
* -1 if ch is not a valid digit character.
* @see Unicode#digit
* @see Unicode#forDigit
* @see Unicode#isDigit
*/
static int32_t digitValue(UChar ch);
/**
* Returns the numeric value of the character <code>ch</code> in the
* specified radix.
* <p>
* If the radix is not in the range <code>MIN_RADIX</code>&nbsp;&lt;=
* <code>radix</code>&nbsp;&lt;= <code>MAX_RADIX</code> or if the
* value of <code>ch</code> is not a valid digit in the specified
* radix, <code>-1</code> is returned. A character is a valid digit
* if at least one of the following is true:
* <ul>
* <li>The method <code>isDigit</code> is true of the character
* and the Unicode decimal digit value of the character (or its
* single-character decomposition) is less than the specified radix.
* In this case the decimal digit value is returned.
* <li>The character is one of the uppercase Latin letters
* <code>'A'</code> through <code>'Z'</code> and its code is less than
* <code>radix&nbsp;+ 'A'&nbsp;-&nbsp;10</code>.
* In this case, <code>ch&nbsp;- 'A'&nbsp;+&nbsp;10</code>
* is returned.
* <li>The character is one of the lowercase Latin letters
* <code>'a'</code> through <code>'z'</code> and its code is less than
* <code>radix&nbsp;+ 'a'&nbsp;-&nbsp;10</code>.
* In this case, <code>ch&nbsp;- 'a'&nbsp;+&nbsp;10</code>
* is returned.
* </ul>
*
* @param ch the character to be converted.
* @param radix the radix.
* @return the numeric value represented by the character in the
* specified radix.
* @see Unicode#MIN_RADIX
* @see Unicode#MAX_RADIX
* @see Unicode#forDigit
* @see Unicode#digitValue
* @see Unicode#isDigit
*/
static int8_t digit(UChar ch, int8_t radix);
/**
* Determines the character representation for a specific digit in
* the specified radix. If the value of <code>radix</code> is not a
* valid radix, or the value of <code>digit</code> is not a valid
* digit in the specified radix, the null character
* (<code>U+0000</code>) is returned.
* <p>
* The <code>radix</code> argument is valid if it is greater than or
* equal to <code>MIN_RADIX</code> and less than or equal to
* <code>MAX_RADIX</code>. The <code>digit</code> argument is valid if
* <code>0&nbsp;&lt;= digit&nbsp;&lt;=&nbsp;radix</code>.
* <p>
* If the digit is less than 10, then
* <code>'0'&nbsp;+ digit</code> is returned. Otherwise, the value
* <code>'a'&nbsp;+ digit&nbsp;-&nbsp;10</code> is returned.
*
* @param digit the number to convert to a character.
* @param radix the radix.
* @return the <code>char</code> representation of the specified digit
* in the specified radix.
* @see Unicode#MIN_RADIX
* @see Unicode#MAX_RADIX
* @see Unicode#digit
* @see Unicode#digitValue
* @see Unicode#isDigit
*/
static UChar forDigit(int32_t digit, int8_t radix);
/**
* Retrieves the Unicode Standard Version number that is used
* @return the Unicode Standard Version Number.
*/
static const char* getVersion(void);
protected:
// These constructors, destructor, and assignment operator must
// be protected (not private, as they semantically are) to make
// various UNIX compilers happy. [LIU]
Unicode();
Unicode( const Unicode& other);
~Unicode();
const Unicode& operator=( const Unicode& other);
};
inline UTextOffset
Unicode::getCharName(uint32_t code,
char *buffer, UTextOffset bufferLength,
UCharNameChoice nameChoice) {
UErrorCode errorCode=U_ZERO_ERROR;
UTextOffset length=u_charName(code, nameChoice, buffer, bufferLength, &errorCode);
return U_SUCCESS(errorCode) ? length : 0;
}
#endif
#error Please include unicode/unicode.h instead

View file

@ -0,0 +1,255 @@
/*
*******************************************************************************
*
* Copyright (C) 1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: ubidi.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999sep15
* created by: Markus W. Scherer
*/
#ifndef BIDI_H
#define BIDI_H
#include "unicode/utypes.h"
#include "unicode/ubidi.h"
#ifndef XP_CPLUSPLUS
# error This is a C++ header file.
#endif
/**
* BiDi is a C++ wrapper class for UBiDi.
* You need one BiDi object in place of one UBiDi object.
* For details on the API and implementation of the
* Unicode BiDi algorithm, see ubidi.h.
*
* @see UBiDi
*/
class U_COMMON_API BiDi {
public:
/** @memo Default constructor, calls ubidi_open(). */
BiDi();
/** @memo Constructor, calls ubidi_open(). */
BiDi(UErrorCode &rErrorCode);
/** @memo Preallocating constructor, calls ubidi_openSized(). */
BiDi(UTextOffset maxLength, UTextOffset maxRunCount, UErrorCode &rErrorCode);
/** @memo Destructor, calls ubidi_close(). */
~BiDi();
/** @memo Set this object for one paragraph's text. */
BiDi &
setPara(const UChar *text, UTextOffset length,
UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
UErrorCode &rErrorCode);
/** @memo Set this object for one line of the paragraph object's text. */
BiDi &
setLine(const BiDi &rParaBiDi,
UTextOffset start, UTextOffset limit,
UErrorCode &rErrorCode);
/** @memo Get the directionality of the text. */
UBiDiDirection
getDirection() const;
/** @memo Get the length of the text. */
UTextOffset
getLength() const;
/** @memo Get the paragraph level of the text. */
UBiDiLevel
getParaLevel() const;
/** @memo Get the level for one character. */
UBiDiLevel
getLevelAt(UTextOffset charIndex) const;
/** @memo Get an array of levels for each character. */
const UBiDiLevel *
getLevels(UErrorCode &rErrorCode);
/** @memo Get a logical run. */
void
getLogicalRun(UTextOffset logicalStart,
UTextOffset &rLogicalLimit, UBiDiLevel &rLevel) const;
/** @memo Get the number of runs. */
UTextOffset
countRuns(UErrorCode &rErrorCode);
/**
* @memo Get one run's logical start, length, and directionality,
* which can be 0 for LTR or 1 for RTL.
*/
UBiDiDirection
getVisualRun(UTextOffset runIndex, UTextOffset &rLogicalStart, UTextOffset &rLength);
/** @memo Get the visual position from a logical text position. */
UTextOffset
getVisualIndex(UTextOffset logicalIndex, UErrorCode &rErrorCode);
/** @memo Get the logical text position from a visual position. */
UTextOffset
getLogicalIndex(UTextOffset visualIndex, UErrorCode &rErrorCode);
/**
* @memo Get a logical-to-visual index map (array) for the characters in the UBiDi
* (paragraph or line) object.
*/
void
getLogicalMap(UTextOffset *indexMap, UErrorCode &rErrorCode);
/**
* @memo Get a visual-to-logical index map (array) for the characters in the UBiDi
* (paragraph or line) object.
*/
void
getVisualMap(UTextOffset *indexMap, UErrorCode &rErrorCode);
/** @memo Same as ubidi_reorderLogical(). */
static void
reorderLogical(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap);
/** @memo Same as ubidi_reorderVisual(). */
static void
reorderVisual(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap);
/** @memo Same as ubidi_invertMap(). */
static void
invertMap(const UTextOffset *srcMap, UTextOffset *destMap, UTextOffset length);
protected:
UBiDi *pBiDi;
};
/* Inline implementations. -------------------------------------------------- */
inline BiDi::BiDi() {
pBiDi=ubidi_open();
}
inline BiDi::BiDi(UErrorCode &rErrorCode) {
if(U_SUCCESS(rErrorCode)) {
pBiDi=ubidi_open();
if(pBiDi==0) {
rErrorCode=U_MEMORY_ALLOCATION_ERROR;
}
} else {
pBiDi=0;
}
}
inline BiDi::BiDi(UTextOffset maxLength, UTextOffset maxRunCount, UErrorCode &rErrorCode) {
pBiDi=ubidi_openSized(maxLength, maxRunCount, &rErrorCode);
}
inline BiDi::~BiDi() {
ubidi_close(pBiDi);
pBiDi=0;
}
inline BiDi &
BiDi::setPara(const UChar *text, UTextOffset length,
UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
UErrorCode &rErrorCode) {
ubidi_setPara(pBiDi, text, length, paraLevel, embeddingLevels, &rErrorCode);
return *this;
}
inline BiDi &
BiDi::setLine(const BiDi &rParaBiDi,
UTextOffset start, UTextOffset limit,
UErrorCode &rErrorCode) {
ubidi_setLine(rParaBiDi.pBiDi, start, limit, pBiDi, &rErrorCode);
return *this;
}
inline UBiDiDirection
BiDi::getDirection() const {
return ubidi_getDirection(pBiDi);
}
inline UTextOffset
BiDi::getLength() const {
return ubidi_getLength(pBiDi);
}
inline UBiDiLevel
BiDi::getParaLevel() const {
return ubidi_getParaLevel(pBiDi);
}
inline UBiDiLevel
BiDi::getLevelAt(UTextOffset charIndex) const {
return ubidi_getLevelAt(pBiDi, charIndex);
}
inline const UBiDiLevel *
BiDi::getLevels(UErrorCode &rErrorCode) {
return ubidi_getLevels(pBiDi, &rErrorCode);
}
inline void
BiDi::getLogicalRun(UTextOffset logicalStart,
UTextOffset &rLogicalLimit, UBiDiLevel &rLevel) const {
ubidi_getLogicalRun(pBiDi, logicalStart, &rLogicalLimit, &rLevel);
}
inline UTextOffset
BiDi::countRuns(UErrorCode &rErrorCode) {
return ubidi_countRuns(pBiDi, &rErrorCode);
}
inline UBiDiDirection
BiDi::getVisualRun(UTextOffset runIndex, UTextOffset &rLogicalStart, UTextOffset &rLength) {
return ubidi_getVisualRun(pBiDi, runIndex, &rLogicalStart, &rLength);
}
inline UTextOffset
BiDi::getVisualIndex(UTextOffset logicalIndex, UErrorCode &rErrorCode) {
return ubidi_getVisualIndex(pBiDi, logicalIndex, &rErrorCode);
}
inline UTextOffset
BiDi::getLogicalIndex(UTextOffset visualIndex, UErrorCode &rErrorCode) {
return ubidi_getLogicalIndex(pBiDi, visualIndex, &rErrorCode);
}
inline void
BiDi::getLogicalMap(UTextOffset *indexMap, UErrorCode &rErrorCode) {
ubidi_getLogicalMap(pBiDi, indexMap, &rErrorCode);
}
inline void
BiDi::getVisualMap(UTextOffset *indexMap, UErrorCode &rErrorCode) {
ubidi_getVisualMap(pBiDi, indexMap, &rErrorCode);
}
inline void
BiDi::reorderLogical(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap) {
ubidi_reorderLogical(levels, length, indexMap);
}
inline void
BiDi::reorderVisual(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap) {
ubidi_reorderVisual(levels, length, indexMap);
}
inline void
BiDi::invertMap(const UTextOffset *srcMap, UTextOffset *destMap, UTextOffset length) {
ubidi_invertMap(srcMap, destMap, length);
}
#endif

View file

@ -0,0 +1,194 @@
/*
********************************************************************
*
* Copyright (C) 1997-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
********************************************************************
*/
#ifndef CHARITER_H
#define CHARITER_H
#include "unicode/utypes.h"
#include "unicode/unistr.h"
/**
* Abstract class defining a protcol for accessing characters in a text-storage object.
<P>Examples:<P>
Function processing characters, in this example simple output
<pre>
. void processChar( UChar c )
. {
. cout &lt;&lt; " " &lt;&lt; c;
. }
</pre>
Traverse the text from start to finish
<pre>
. void traverseForward(CharacterIterator& iter)
. {
. for(UChar c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
. processChar(c);
. }
. }
</pre>
Traverse the text backwards, from end to start
<pre>
. void traverseBackward(CharacterIterator& iter)
. {
. for(UChar c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
. processChar(c);
. }
. }
</pre>
Traverse both forward and backward from a given position in the text.
Calls to notBoundary() in this example represents some additional stopping criteria.
<pre>
. void traverseOut(CharacterIterator& iter, UTextOffset pos)
. {
. UChar c;
. for (c = iter.setIndex(pos);
. c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
. c = iter.next()) {}
. UTextOffset end = iter.getIndex();
. for (c = iter.setIndex(pos);
. c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
. c = iter.previous()) {}
. UTextOffset start = iter.getIndex() + 1;
.
. cout &lt;&lt; "start: " &lt;&lt; start &lt;&lt; " end: " &lt;&lt; end &lt;&lt; endl;
. for (c = iter.setIndex(start); iter.getIndex() &lt; end; c = iter.next() ) {
. processChar(c);
. }
. }
</pre>
Creating a StringCharacterIteratorand calling the test functions
<pre>
. void CharacterIterator_Example( void )
. {
. cout &lt;&lt; endl &lt;&lt; "===== CharacterIterator_Example: =====" &lt;&lt; endl;
. UnicodeString text("Ein kleiner Satz.");
. StringCharacterIterator iterator(text);
. cout &lt;&lt; "----- traverseForward: -----------" &lt;&lt; endl;
. traverseForward( iterator );
. cout &lt;&lt; endl &lt;&lt; endl &lt;&lt; "----- traverseBackward: ----------" &lt;&lt; endl;
. traverseBackward( iterator );
. cout &lt;&lt; endl &lt;&lt; endl &lt;&lt; "----- traverseOut: ---------------" &lt;&lt; endl;
. traverseOut( iterator, 7 );
. cout &lt;&lt; endl &lt;&lt; endl &lt;&lt; "-----" &lt;&lt; endl;
. }
</pre>
*/
class U_COMMON_API CharacterIterator
{
public:
/**
* Value returned by most of CharacterIterator's functions
* when the iterator has reached the limits of its iteration. */
static const UChar DONE;
/**
* Destructor. */
virtual ~CharacterIterator();
/**
* Returns true when both iterators refer to the same
* character in the same character-storage object. */
virtual bool_t operator==(const CharacterIterator& that) const = 0;
/**
* Returns true when the iterators refer to different
* text-storage objects, or to different characters in the
* same text-storage object. */
bool_t operator!=(const CharacterIterator& that) const { return !operator==(that); }
/**
* Returns a pointer to a new CharacterIterator of the same
* concrete class as this one, and referring to the same
* character in the same text-storage object as this one. The
* caller is responsible for deleting the new clone. */
virtual CharacterIterator*
clone(void) const = 0;
/**
* Generates a hash code for this iterator. */
virtual int32_t hashCode(void) const = 0;
/**
* Sets the iterator to refer to the first character in its
* iteration range, and returns that character, */
virtual UChar first(void) = 0;
/**
* Sets the iterator to refer to the last character in its
* iteration range, and returns that character. */
virtual UChar last(void) = 0;
/**
* Sets the iterator to refer to the "position"-th character
* in the text-storage object the iterator refers to, and
* returns that character. */
virtual UChar setIndex(UTextOffset position) = 0;
/**
* Returns the character the iterator currently refers to. */
virtual UChar current(void) const = 0;
/**
* Advances to the next character in the iteration range
* (toward last()), and returns that character. If there are
* no more characters to return, returns DONE. */
virtual UChar next(void) = 0;
/**
* Advances to the previous character in the iteration rance
* (toward first()), and returns that character. If there are
* no more characters to return, returns DONE. */
virtual UChar previous(void) = 0;
/**
* Returns the numeric index in the underlying text-storage
* object of the character returned by first(). Since it's
* possible to create an iterator that iterates across only
* part of a text-storage object, this number isn't
* necessarily 0. */
virtual UTextOffset startIndex(void) const = 0;
/**
* Returns the numeric index in the underlying text-storage
* object of the position immediately BEYOND the character
* returned by last(). */
virtual UTextOffset endIndex(void) const = 0;
/**
* Returns the numeric index in the underlying text-storage
* object of the character the iterator currently refers to
* (i.e., the character returned by current()). */
virtual UTextOffset getIndex(void) const = 0;
/**
* Copies the text under iteration into the UnicodeString
* referred to by "result". @param result Receives a copy of
* the text under iteration. */
virtual void getText(UnicodeString& result) = 0;
/**
* Returns a UClassID for this CharacterIterator ("poor man's
* RTTI").<P> Despite the fact that this function is public,
* DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API! */
virtual UClassID getDynamicClassID(void) const = 0;
protected:
CharacterIterator() {}
CharacterIterator(const CharacterIterator&) {}
CharacterIterator& operator=(const CharacterIterator&) { return *this; }
};
#endif

View file

@ -0,0 +1,325 @@
/*******************************************************************************
*
* Copyright (C) 1998-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************/
#ifndef CONVERT_H
#define CONVERT_H
#include "unicode/unistr.h"
#include "unicode/ucnv.h"
class U_COMMON_API UnicodeConverterCPP
{
private:
/*Internal Data representation of the Converter*/
UConverter* myUnicodeConverter;
/*Debug method*/
void printRef(void) const;
/* list of converter and alias names */
static const char **availableConverterNames;
static int32_t availableConverterNamesCount;
public:
//Constructors and a destructor
/**
* Creates Unicode Conversion Object will default to LATIN1 <-> encoding
* @return An object Handle if successful or a NULL if the creation failed
*/
UnicodeConverterCPP();
/**
* Creates Unicode Conversion Object by specifying the codepage name. The name
* string is in ASCII format.
* @param code_set the pointer to a char[] object containing a codepage name. (I)
* @param UErrorCode Error status (I/O) IILLEGAL_ARGUMENT_ERROR will be returned if the string is empty.
* If the internal program does not work correctly, for example, if there's no such codepage,
* U_INTERNAL_PROGRAM_ERROR will be returned.
* @return An object Handle if successful or a NULL if the creation failed
*/
UnicodeConverterCPP(const char* name,
UErrorCode& err);
/**
*Creates a UnicodeConverter object with the names specified as unicode strings. The name should be limited to
*the ASCII-7 alphanumerics. Dash and underscore characters are allowed for readability, but are ignored in the
*search.
*@param code_set name of the uconv table in Unicode string (I)
*@param err error status (I/O) IILLEGAL_ARGUMENT_ERROR will be returned if the string is empty. If the internal
*program does not work correctly, for example, if there's no such codepage, U_INTERNAL_PROGRAM_ERROR will be
*returned.
*@return the created Unicode converter object
*/
UnicodeConverterCPP(const UnicodeString& name,
UErrorCode& err);
/**
* Creates Unicode Conversion Object using the codepage ID number.
* @param code_set a codepage # (I)
* @UErrorCode Error status (I/O) IILLEGAL_ARGUMENT_ERROR will be returned if the string is empty.
* If the internal program does not work correctly, for example, if there's no such codepage,
* U_INTERNAL_PROGRAM_ERROR will be returned.
* @return An object Handle if successful or a NULL if failed
*
*/
UnicodeConverterCPP(int32_t codepageNumber,
UConverterPlatform platform,
UErrorCode& err);
~UnicodeConverterCPP();
/**
* Transcodes the source UnicodeString to the target string in a codepage encoding
* with the specified Unicode converter. For example, if a Unicode to/from JIS
* converter is specified, the source string in Unicode will be transcoded to JIS
* encoding. The result will be stored in JIS encoding.
*
* @param source the source Unicode string
* @param target the target string in codepage encoding
* @param targetSize Input the number of bytes available in the "target" buffer, Output the number of bytes copied to it
* @param err the error status code. U_MEMORY_ALLOCATION_ERROR will be returned if the
* the internal process buffer cannot be allocated for transcoding. U_ILLEGAL_ARGUMENT_ERROR
* is returned if the converter is null or the source or target string is empty.
*/
void fromUnicodeString(char* target,
int32_t& targetSize,
const UnicodeString& source,
UErrorCode& err) const;
/**
* Transcode the source string in codepage encoding to the target string in
* Unicode encoding. For example, if a Unicode to/from JIS
* converter is specified, the source string in JIS encoding will be transcoded
* to Unicode encoding. The result will be stored in Unicode encoding.
* @param source the source string in codepage encoding
* @param target the target string in Unicode encoding
* @param targetSize : I/O parameter, Input size buffer, Output # of bytes copied to it
* @param err the error status code U_MEMORY_ALLOCATION_ERROR will be returned if the
* the internal process buffer cannot be allocated for transcoding. U_ILLEGAL_ARGUMENT_ERROR
* is returned if the converter is null or the source or target string is empty.
*/
void toUnicodeString(UnicodeString& target,
const char* source,
int32_t sourceSize,
UErrorCode& err) const;
/**
* Transcodes an array of unicode characters to an array of codepage characters.
* The source pointer is an I/O parameter, it starts out pointing at the place
* to begin translating, and ends up pointing after the first sequence of the bytes
* that it encounters that are semantically invalid.
* if T_UnicodeConverter_setMissingCharAction is called with an action other than STOP
* before a call is made to this API, consumed and source should point to the same place
* (unless target ends with an imcomplete sequence of bytes and flush is FALSE).
* @param target : I/O parameter. Input : Points to the beginning of the buffer to copy
* codepage characters to. Output : points to after the last codepage character copied
* to target.
* @param targetLimit the pointer to the end of the target array
* @param source the source Unicode character array
* @param sourceLimit the pointer to the end of the source array
* @param flush TRUE if the buffer is the last buffer and the conversion will finish
* in this call, FALSE otherwise. (future feature pending)
* @param UErrorCode the error status. U_ILLEGAL_ARGUMENT_ERROR will be returned if the
* converter is null.
*/
void fromUnicode(char*& target,
const char* targetLimit,
const UChar*& source,
const UChar* sourceLimit,
int32_t * offsets,
bool_t flush,
UErrorCode& err);
/**
* Converts an array of codepage characters into an array of unicode characters.
* The source pointer is an I/O parameter, it starts out pointing at the place
* to begin translating, and ends up pointing after the first sequence of the bytes
* that it encounters that are semantically invalid.
* if T_UnicodeConverter_setMissingUnicodeAction is called with an action other than STOP
* before a call is made to this API, consumed and source should point to the same place
* (unless target ends with an imcomplete sequence of bytes and flush is FALSE).
* @param target : I/O parameter. Input : Points to the beginning of the buffer to copy
* Unicode characters to. Output : points to after the last UChar copied to target.
* @param targetLimit the pointer to the end of the target array
* @param source the source codepage character array
* @param sourceLimit the pointer to the end of the source array
* @param flush TRUE if the buffer is the last buffer and the conversion will finish
* in this call, FALSE otherwise. (future feature pending)
* @param err the error code status U_ILLEGAL_ARGUMENT_ERROR will be returned if the
* converter is null, targetLimit < target, sourceLimit < source
*/
void toUnicode(UChar*& target,
const UChar* targetLimit,
const char*& source,
const char* sourceLimit,
int32_t * offsets,
bool_t flush,
UErrorCode& err);
/*
* Returns the maximum length of bytes used by a character. This varies between 1 and 4
* @return the max number of bytes per codepage character * converter is null, targetLimit < target, sourceLimit < source
*/
int8_t getMaxBytesPerChar(void) const;
/**
* Returns the minimum byte length for characters in this codepage. This is either
* 1 or 2 for all supported codepages.
* @return the minimum number of byte per codepage character
*/
int8_t getMinBytesPerChar(void) const;
/**
*Gets the type of conversion associated with the converter
* e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022, EBCDIC_STATEFUL, LATIN_1
* @return the type of the converter
*/
UConverterType getType(void) const;
/**
*Gets the "starter" bytes for the converters of type MBCS
*will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in
*is not MBCS.
*fills in an array of boolean, with the value of the byte as offset to the array.
*At return, if TRUE is found in at offset 0x20, it means that the byte 0x20 is a starter byte
*in this converter.
* @param starters: an array of size 256 to be filled in
* @param err: an array of size 256 to be filled in
* @see ucnv_getType
*/
void getStarters(bool_t starters[256],
UErrorCode& err) const;
/**
* Fills in the output parameter, subChars, with the substitution characters
* as multiple bytes.
* @param subChars the subsitution characters
* @param len the number of bytes of the substitution character array
* @param err the error status code. U_ILLEGAL_ARGUMENT_ERROR will be returned if
* the converter is null. If the substitution character array is too small, an
* U_INDEX_OUTOFBOUNDS_ERROR will be returned.
*/
void getSubstitutionChars(char* subChars,
int8_t& len,
UErrorCode& err) const;
/**
* Sets the substitution chars when converting from unicode to a codepage. The
* substitution is specified as a string of 1-4 bytes, and may contain null byte.
* The fill-in parameter err will get the error status on return.
* @param cstr the substitution character array to be set with
* @param len the number of bytes of the substitution character array and upon return will contain the
* number of bytes copied to that buffer
* @param err the error status code. U_ILLEGAL_ARGUMENT_ERROR if the converter is
* null. or if the number of bytes provided are not in the codepage's range (e.g length 1 for ucs-2)
*/
void setSubstitutionChars(const char* subChars,
int8_t len,
UErrorCode& err);
/**
* Resets the state of stateful conversion to the default state. This is used
* in the case of error to restart a conversion from a known default state.
*/
void resetState(void);
/**
* Gets the name of the converter (zero-terminated).
* the name will be the internal name of the converter
* @param converter the Unicode converter
* @param err the error status code. U_INDEX_OUTOFBOUNDS_ERROR in the converterNameLen is too
* small to contain the name.
*/
const char* getName( UErrorCode& err) const;
/**
* Gets a codepage number associated with the converter. This is not guaranteed
* to be the one used to create the converter. Some converters do not represent
* IBM registered codepages and return zero for the codepage number.
* The error code fill-in parameter indicates if the codepage number is available.
* @param err the error status code. U_ILLEGAL_ARGUMENT_ERROR will returned if
* the converter is null or if converter's data table is null.
* @return If any error occurrs, null will be returned.
*/
int32_t getCodepage(UErrorCode& err) const;
/**
* Returns the current setting action taken when a character from a codepage
* is missing. (Currently STOP or SUBSTITUTE).
* @return the action constant when a Unicode character cannot be converted to a
* codepage equivalent
*/
UConverterToUCallback getMissingCharAction(void) const;
/**
* Return the current setting action taken when a unicode character is missing.
* (Currently STOP or SUBSTITUTE).
* @return the action constant when a codepage character cannot be converted to a
* Unicode eqivalent
*/
UConverterFromUCallback getMissingUnicodeAction(void) const;
/**
* Sets the current setting action taken when a character from a codepage is
* missing. (Currently STOP or SUBSTITUTE).
* @param action the action constant if an equivalent codepage character is missing
*/
void setMissingCharAction(UConverterToUCallback action,
UErrorCode& err);
/**
* Sets the current setting action taken when a unicode character is missing.
* (currently T_UnicodeConverter_MissingUnicodeAction is either STOP or SUBSTITUTE,
* SKIP, CLOSEST_MATCH, ESCAPE_SEQ may be added in the future).
* @param action the action constant if an equivalent Unicode character is missing
* @param err the error status code
*/
void setMissingUnicodeAction(UConverterFromUCallback action,
UErrorCode& err);
/**
* Returns the localized name of the UnicodeConverter, if for any reason it is
* available, the internal name will be returned instead.
* @param displayLocale the valid Locale, from which we want to localize
* @param displayString a UnicodeString that is going to be filled in.
*/
void getDisplayName(const Locale& displayLocale,
UnicodeString& displayName) const;
/**
* Returns the T_UnicodeConverter_platform (ICU defined enum) of a UnicodeConverter
* available, the internal name will be returned instead.
* @param err the error code status
* @return the codepages platform
*/
UConverterPlatform getCodepagePlatform(UErrorCode& err) const;
UnicodeConverterCPP& operator=(const UnicodeConverterCPP& that);
bool_t operator==(const UnicodeConverterCPP& that) const;
bool_t operator!=(const UnicodeConverterCPP& that) const;
UnicodeConverterCPP(const UnicodeConverterCPP& that);
/**
* Returns the available names. Lazy evaluated, Library owns the storage
* @param num the number of available converters
* @param err the error code status
* @return the name array
*/
static const char* const* getAvailableNames(int32_t& num,
UErrorCode& err);
/**
* Iterates through every cached converter and frees all the unused ones
* @return the number of cached converters successfully deleted
*/
static int32_t flushCache(void);
};
#endif

View file

@ -0,0 +1,570 @@
/*
*****************************************************************************************
*
* Copyright (C) 1996-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*****************************************************************************************
*
* File locid.h
*
* Created by: Helena Shih
*
* Modification History:
*
* Date Name Description
* 02/11/97 aliu Changed gLocPath to fgLocPath and added methods to
* get and set it.
* 04/02/97 aliu Made operator!= inline; fixed return value of getName().
* 04/15/97 aliu Cleanup for AIX/Win32.
* 04/24/97 aliu Numerous changes per code review.
* 08/18/98 stephen Added tokenizeString(),changed getDisplayName()
* 09/08/98 stephen Moved definition of kEmptyString for Mac Port
* 11/09/99 weiv Added const char * getName() const;
*****************************************************************************************
*/
#ifndef LOCID_H
#define LOCID_H
#include "unicode/unistr.h"
typedef struct ULocale ULocale;
typedef struct UHashtable UHashtable;
#define ULOC_LANG_CAPACITY 3
#define ULOC_COUNTRY_CAPACITY 3
#define ULOC_FULLNAME_CAPACITY 50
/**
*
* A <code>Locale</code> object represents a specific geographical, political,
* or cultural region. An operation that requires a <code>Locale</code> to perform
* its task is called <em>locale-sensitive</em> and uses the <code>Locale</code>
* to tailor information for the user. For example, displaying a number
* is a locale-sensitive operation--the number should be formatted
* according to the customs/conventions of the user's native country,
* region, or culture.
*
* <P>
* You create a <code>Locale</code> object using one of the three constructors in
* this class:
* <blockquote>
* <pre>
* . Locale( const UnicodeString& newLanguage);
* .
* . Locale( const UnicodeString& language,
* . const UnicodeString& country);
* .
* . Locale( const UnicodeString& language,
* . const UnicodeString& country,
* . const UnicodeString& variant);
* </pre>
* </blockquote>
* The first argument to the constructors is a valid <STRONG>ISO
* Language Code.</STRONG> These codes are the lower-case two-letter
* codes as defined by ISO-639.
* You can find a full list of these codes at a number of sites, such as:
* <BR><a href ="http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt">
* <code>http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt</code></a>
*
* <P>
* The second argument to the constructors is a valid <STRONG>ISO Country
* Code.</STRONG> These codes are the upper-case two-letter codes
* as defined by ISO-3166.
* You can find a full list of these codes at a number of sites, such as:
* <BR><a href="http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html">
* <code>http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html</code></a>
*
* <P>
* The third constructor requires a third argument--the <STRONG>Variant.</STRONG>
* The Variant codes are vendor and browser-specific.
* For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX.
* Where there are two variants, separate them with an underscore, and
* put the most important one first. For
* example, a Traditional Spanish collation might be referenced, with
* "ES", "ES", "Traditional_WIN".
*
* <P>
* Because a <code>Locale</code> object is just an identifier for a region,
* no validity check is performed when you construct a <code>Locale</code>.
* If you want to see whether particular resources are available for the
* <code>Locale</code> you construct, you must query those resources. For
* example, ask the <code>NumberFormat</code> for the locales it supports
* using its <code>getAvailableLocales</code> method.
* <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular
* locale, you get back the best available match, not necessarily
* precisely what you asked for. For more information, look at
* <a href="java.util.ResourceBundle.html"><code>ResourceBundle</code></a>.
*
* <P>
* The <code>Locale</code> class provides a number of convenient constants
* that you can use to create <code>Locale</code> objects for commonly used
* locales. For example, the following refers to a <code>Locale</code> object
* for the United States:
* <blockquote>
* <pre>
* . Locale::US
* </pre>
* </blockquote>
*
* <P>
* Once you've created a <code>Locale</code> you can query it for information about
* itself. Use <code>getCountry</code> to get the ISO Country Code and
* <code>getLanguage</code> to get the ISO Language Code. You can
* use <code>getDisplayCountry</code> to get the
* name of the country suitable for displaying to the user. Similarly,
* you can use <code>getDisplayLanguage</code> to get the name of
* the language suitable for displaying to the user. Interestingly,
* the <code>getDisplayXXX</code> methods are themselves locale-sensitive
* and have two versions: one that uses the default locale and one
* that takes a locale as an argument and displays the name or country in
* a language appropriate to that locale.
*
* <P>
* The TIFC provides a number of classes that perform locale-sensitive
* operations. For example, the <code>NumberFormat</code> class formats
* numbers, currency, or percentages in a locale-sensitive manner. Classes
* such as <code>NumberFormat</code> have a number of convenience methods
* for creating a default object of that type. For example, the
* <code>NumberFormat</code> class provides these three convenience methods
* for creating a default <code>NumberFormat</code> object:
* <blockquote>
* <pre>
* . UErrorCode success = U_ZERO_ERROR;
* . Locale myLocale;
* . NumberFormat *nf;
* .
* . nf = NumberFormat::createInstance( success ); delete nf;
* . nf = NumberFormat::createCurrencyInstance( success ); delete nf;
* . nf = NumberFormat::createPercentInstance( success ); delete nf;
* </pre>
* </blockquote>
* Each of these methods has two variants; one with an explicit locale
* and one without; the latter using the default locale.
* <blockquote>
* <pre>
* . nf = NumberFormat::createInstance( myLocale, success ); delete nf;
* . nf = NumberFormat::createCurrencyInstance( myLocale, success ); delete nf;
* . nf = NumberFormat::createPercentInstance( myLocale, success ); delete nf;
* </pre>
* </blockquote>
* A <code>Locale</code> is the mechanism for identifying the kind of object
* (<code>NumberFormat</code>) that you would like to get. The locale is
* <STRONG>just</STRONG> a mechanism for identifying objects,
* <STRONG>not</STRONG> a container for the objects themselves.
*
* <P>
* Each class that performs locale-sensitive operations allows you
* to get all the available objects of that type. You can sift
* through these objects by language, country, or variant,
* and use the display names to present a menu to the user.
* For example, you can create a menu of all the collation objects
* suitable for a given language. Such classes implement these
* three class methods:
* <blockquote>
* <pre>
* . static Locale* getAvailableLocales(int32_t& numLocales)
* . static UnicodeString& getDisplayName(const Locale& objectLocale,
* . const Locale& displayLocale,
* . UnicodeString& displayName)
* . static UnicodeString& getDisplayName(const Locale& objectLocale,
* . UnicodeString& displayName)
* </pre>
* </blockquote>
*/
class U_COMMON_API Locale
{
public:
/**
* Useful constants for language.
*/
static const Locale ENGLISH;
static const Locale FRENCH;
static const Locale GERMAN;
static const Locale ITALIAN;
static const Locale JAPANESE;
static const Locale KOREAN;
static const Locale CHINESE;
static const Locale SIMPLIFIED_CHINESE;
static const Locale TRADITIONAL_CHINESE;
/**
* Useful constants for country.
*/
static const Locale FRANCE;
static const Locale GERMANY;
static const Locale ITALY;
static const Locale JAPAN;
static const Locale KOREA;
static const Locale CHINA; // Alias for PRC
static const Locale PRC; // Peoples Republic of China
static const Locale TAIWAN; // Republic of China
static const Locale UK;
static const Locale US;
static const Locale CANADA;
static const Locale CANADA_FRENCH;
/**
* Construct an empty locale. It's only used when a fill-in parameter is
* needed.
*/
Locale();
/**
* Construct a locale from language, country, variant.
*
* @param language Lowercase two-letter ISO-639 code.
* @param country Uppercase two-letter ISO-3166 code. (optional)
* @param variant Uppercase vendor and browser specific code. See class
* description. (optional)
*/
Locale( const UnicodeString& language,
const UnicodeString& country ,
const UnicodeString& variant );
Locale( const UnicodeString& language,
const UnicodeString& country );
Locale( const UnicodeString& language);
/**
* Initializes a Locale object from another Locale object.
*
* @param other The Locale object being copied in.
*/
Locale(const Locale& other);
/**
* Destructor
*/
~Locale() ;
/**
* Replaces the entire contents of *this with the specified value.
*
* @param other The Locale object being copied in.
* @return *this
*/
Locale& operator=(const Locale& other);
/**
* Checks if two locale keys are the same.
*
* @param other The locale key object to be compared with this.
* @return True if the two locale keys are the same, false otherwise.
*/
bool_t operator==(const Locale& other) const;
/**
* Checks if two locale keys are not the same.
*
* @param other The locale key object to be compared with this.
* @return True if the two locale keys are not the same, false
* otherwise.
*/
bool_t operator!=(const Locale& other) const;
/**
* Common methods of getting the current default Locale. Used for the
* presentation: menus, dialogs, etc. Generally set once when your applet or
* application is initialized, then never reset. (If you do reset the
* default locale, you probably want to reload your GUI, so that the change
* is reflected in your interface.)
*
* More advanced programs will allow users to use different locales for
* different fields, e.g. in a spreadsheet.
*
* Note that the initial setting will match the host system.
*/
static Locale& getDefault(void);
/**
* Sets the default. Normally set once at the beginning of applet or
* application, then never reset. setDefault does NOT reset the host locale.
*
* @param newLocale Locale to set to.
*/
static void setDefault(const Locale& newLocale,
UErrorCode& success);
/**
* Fills in "lang" with the locale's two-letter ISO-639 language code.
* @param lang Receives the language code.
* @return A reference to "lang".
*/
UnicodeString& getLanguage( UnicodeString& lang) const;
/**
* Fills in "cntry" with the locale's two-letter ISO-3166 country code.
* @param cntry Receives the country code.
* @return A reference to "cntry".
*/
UnicodeString& getCountry( UnicodeString& cntry) const;
/**
* Fills in "var" with the locale's variant code.
* @param var Receives the variant code.
* @return A reference to "var".
*/
UnicodeString& getVariant( UnicodeString& var) const;
/**
* Fills in "name" the programmatic name of the entire locale, with the language,
* country and variant separated by underbars. If a field is missing, at
* most one underbar will occur. Example: "en", "de_DE", "en_US_WIN",
* "de_POSIX", "fr_MAC"
* @param var Receives the programmatic locale name.
* @return A reference to "name".
*/
UnicodeString& getName( UnicodeString& name) const;
/**
* Returns the programmatic name of the entire locale, with the language,
* country and variant separated by underbars. If a field is missing, at
* most one underbar will occur. Example: "en", "de_DE", "en_US_WIN",
* "de_POSIX", "fr_MAC"
* @return A pointer to "name".
*/
const char * getName() const;
/**
* Fills in "name" with the locale's three-letter language code, as specified
* in ISO draft standard ISO-639-2..
* @param name Receives the three-letter language code.
* @param status An UErrorCode to receive any MISSING_RESOURCE_ERRORs
* @return A reference to "name".
*/
UnicodeString& getISO3Language(UnicodeString& name, UErrorCode& status) const;
// this version is deprecated, use getISO3Language(UnicodeString&, UErrorCode&)
UnicodeString& getISO3Language(UnicodeString& name) const;
/**
* Fills in "name" with the locale's three-letter ISO-3166 country code.
* @param name Receives the three-letter country code.
* @param status An UErrorCode to receive any MISSING_RESOURCE_ERRORs
* @return A reference to "name".
*/
UnicodeString& getISO3Country( UnicodeString& name, UErrorCode& status) const;
// this version is deprecated, use getISO3Country(UnicodeString&, UErrorCode&);
UnicodeString& getISO3Country( UnicodeString& name) const;
/**
* Returns the Windows LCID value corresponding to this locale.
* This value is stored in the resource data for the locale as a one-to-four-digit
* hexadecimal number. If the resource is missing, in the wrong format, or
* there is no Windows LCID value that corresponds to this locale, returns 0.
*/
uint32_t getLCID(void) const;
/**
* Fills in "dispLang" with the name of this locale's language in a format suitable for
* user display in the default locale. For example, if the locale's language code is
* "fr" and the default locale's language code is "en", this function would set
* dispLang to "French".
* @param dispLang Receives the language's display name.
* @return A reference to "dispLang".
*/
UnicodeString& getDisplayLanguage(UnicodeString& dispLang) const;
/**
* Fills in "dispLang" with the name of this locale's language in a format suitable for
* user display in the locale specified by "inLocale". For example, if the locale's
* language code is "en" and inLocale's language code is "fr", this function would set
* dispLang to "Anglais".
* @param inLocale Specifies the locale to be used to display the name. In other words,
* if the locale's language code is "en", passing Locale::FRENCH for
* inLocale would result in "Anglais", while passing Locale::GERMAN
* for inLocale would result in "Englisch".
* @param dispLang Receives the language's display name.
* @return A reference to "dispLang".
*/
UnicodeString& getDisplayLanguage( const Locale& inLocale,
UnicodeString& dispLang) const;
/**
* Fills in "dispCountry" with the name of this locale's country in a format suitable
* for user display in the default locale. For example, if the locale's country code
* is "FR" and the default locale's language code is "en", this function would set
* dispCountry to "France".
* @param dispCountry Receives the country's display name.
* @return A reference to "dispCountry".
*/
UnicodeString& getDisplayCountry( UnicodeString& dispCountry) const;
/**
* Fills in "dispCountry" with the name of this locale's country in a format suitable
* for user display in the locale specified by "inLocale". For example, if the locale's
* country code is "US" and inLocale's language code is "fr", this function would set
* dispCountry to "Etats-Unis".
* @param inLocale Specifies the locale to be used to display the name. In other
* words, if the locale's country code is "US", passing
* Locale::FRENCH for inLocale would result in "États-Unis", while
* passing Locale::GERMAN for inLocale would result in
* "Vereinigte Staaten".
* @param dispCountry Receives the country's display name.
* @return A reference to "dispCountry".
*/
UnicodeString& getDisplayCountry( const Locale& inLocale,
UnicodeString& dispCountry) const;
/**
* Fills in "dispVar" with the name of this locale's variant code in a format suitable
* for user display in the default locale.
* @param dispVar Receives the variant's name.
* @return A reference to "dispVar".
*/
UnicodeString& getDisplayVariant( UnicodeString& dispVar) const;
/**
* Fills in "dispVar" with the name of this locale's variant code in a format
* suitable for user display in the locale specified by "inLocale".
* @param inLocale Specifies the locale to be used to display the name.
* @param dispVar Receives the variant's display name.
* @return A reference to "dispVar".
*/
UnicodeString& getDisplayVariant( const Locale& inLocale,
UnicodeString& dispVar) const;
/**
* Fills in "name" with the name of this locale in a format suitable for user display
* in the default locale. This function uses getDisplayLanguage(), getDisplayCountry(),
* and getDisplayVariant() to do its work, and outputs the display name in the format
* "language (country[,variant])". For example, if the default locale is en_US, then
* fr_FR's display name would be "French (France)", and es_MX_Traditional's display name
* would be "Spanish (Mexico,Traditional)".
* @param name Receives the locale's display name.
* @return A reference to "name".
*/
UnicodeString& getDisplayName( UnicodeString& name) const;
/**
* Fills in "name" with the name of this locale in a format suitable for user display
* in the locale specfied by "inLocale". This function uses getDisplayLanguage(),
* getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display
* name in the format "language (country[,variant])". For example, if inLocale is
* fr_FR, then en_US's display name would be "Anglais (États-Unis)", and no_NO_NY's
* display name would be "norvégien (Norvège,NY)".
* @param inLocale Specifies the locale to be used to display the name.
* @param name Receives the locale's display name.
* @return A reference to "name".
*/
UnicodeString& getDisplayName( const Locale& inLocale,
UnicodeString& name) const;
/**
* Generates a hash code for the locale. Since Locales are often used in hashtables,
* caches the value for speed.
*/
int32_t hashCode(void) const;
/**
* Returns a list of all installed locales.
* @param count Receives the number of locales in the list.
* @return A pointer to an array of Locale objects. This array is the list
* of all locales with installed resource files. The called does NOT
* get ownership of this list, and must NOT delete it.
*/
static const Locale* getAvailableLocales(int32_t& count);
/**
* Returns a list of all 2-letter country codes defined in ISO 3166.
* Can be used to create Locales.
* @param count Receives the number of countries in the list.
* @return A pointer to an array of UnicodeString objects. The caller does NOT
* get ownership of this list, and must NOT delete it.
*/
static const UnicodeString* getISOCountries(int32_t& count);
/**
* Returns a list of all 2-letter language codes defined in ISO 639.
* Can be used to create Locales.
* [NOTE: ISO 639 is not a stable standard-- some languages' codes have changed.
* The list this function returns includes both the new and the old codes for the
* languages whose codes have changed.]
* @param count Receives the number of languages in the list.
* @return A pointer to an array of UnicodeString objects. The caller does NOT
* get ownership of this list, and must NOT delete it.
*/
static const UnicodeString* getISOLanguages(int32_t& count);
/**
* Deprecated 1999dec14 - Get the path to the ResourceBundle locale files. This path will be a
* platform-specific path name ending in a directory separator, so that file
* names may be concatenated to it. This path may be changed by calling
* setDataDirectory(). If setDataDirectory() has not been called yet,
* getDataDirectory() will return a platform-dependent default path as
* specified by TPlatformUtilities::getDefaultDataDirectory().
*
* @return Current data path.
*/
static const char* getDataDirectory(void);
/**
* Deprecated 1999dec14 - Set the path to the ResourceBundle locale files. After making this call,
* all objects in the Unicode Analytics package will read ResourceBundle
* data files in the specified directory in order to obtain locale data.
*
* @param path The new data path to be set to.
*/
static void setDataDirectory(const char* path);
Locale& init(const char* cLocaleID);
protected: // only protected for testing purposes. DO NOT USE.
void setFromPOSIXID(const UnicodeString& posixID); // set it from a single string.
void setFromPOSIXID(const char *posixID); // set it from a single string.
/**
* Given an ISO country code, returns an array of Strings containing the ISO
* codes of the languages spoken in that country. Official languages are listed
* in the returned table before unofficial languages, but other than that, the
* order of the returned list is indeterminate. If the value the user passes in
* for "country" is not a valid ISO 316 country code, or if we don't have language
* information for the specified country, this function returns an empty array.
*
* [This function is not currently part of Locale's API, but is needed in the
* implementation. We hope to add it to the API in a future release.]
* @param country The ISO 2-letter country code of the desired country
* @param count Receives the number of languages in the list.
* @return A pointer to an array of UnicodeString objects. The caller does NOT
* get ownership of this list, and must NOT delete it.
*/
static const UnicodeString* getLanguagesForCountry( const UnicodeString& country,
int32_t& count);
private:
/**
* Initializes a Locale object from a ULocale struct, which is the C locale object,
* and where the actual implementation is.
*/
void setHashCode(void);
char language[ULOC_LANG_CAPACITY];
char country[ULOC_COUNTRY_CAPACITY];
char* variant;
char* fullName;
char fullNameBuffer[ULOC_FULLNAME_CAPACITY];
int32_t khashCode;
static Locale *localeList;
static int32_t localeListCount;
static UnicodeString *isoLanguages;
static int32_t isoLanguagesCount;
static UnicodeString *isoCountries;
static int32_t isoCountriesCount;
static UHashtable *ctry2LangMapping;
static const UnicodeString compressedCtry2LangMapping;
static Locale fgDefaultLocale;
};
inline bool_t
Locale::operator!=(const Locale& other) const
{
return !operator==(other);
}
#endif

View file

@ -0,0 +1,717 @@
/*
********************************************************************
* COPYRIGHT:
* Copyright (c) 1996-1999, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************
*/
#ifndef NORMLZR_H
#define NORMLZR_H
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "unicode/chariter.h"
/**
* <tt>Normalizer</tt> transforms Unicode text into an equivalent composed or
* decomposed form, allowing for easier sorting and searching of text.
* <tt>Normalizer</tt> supports the standard normalization forms described in
* <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
* Unicode Technical Report #15</a>.
* <p>
* Characters with accents or other adornments can be encoded in
* several different ways in Unicode. For example, take the character "Á"
* (A-acute). In Unicode, this can be encoded as a single character (the
* "composed" form):
* <pre>
* 00C1 LATIN CAPITAL LETTER A WITH ACUTE</pre>
* or as two separate characters (the "decomposed" form):
* <pre>
* 0041 LATIN CAPITAL LETTER A
* 0301 COMBINING ACUTE ACCENT</pre>
* <p>
* To a user of your program, however, both of these sequences should be
* treated as the same "user-level" character "Á". When you are searching or
* comparing text, you must ensure that these two sequences are treated
* equivalently. In addition, you must handle characters with more than one
* accent. Sometimes the order of a character's combining accents is
* significant, while in other cases accent sequences in different orders are
* really equivalent.
* <p>
* Similarly, the string "ffi" can be encoded as three separate letters:
* <pre>
* 0066 LATIN SMALL LETTER F
* 0066 LATIN SMALL LETTER F
* 0069 LATIN SMALL LETTER I</pre>
* or as the single character
* <pre>
* FB03 LATIN SMALL LIGATURE FFI</pre>
* <p>
* The ffi ligature is not a distinct semantic character, and strictly speaking
* it shouldn't be in Unicode at all, but it was included for compatibility
* with existing character sets that already provided it. The Unicode standard
* identifies such characters by giving them "compatibility" decompositions
* into the corresponding semantic characters. When sorting and searching, you
* will often want to use these mappings.
* <p>
* <tt>Normalizer</tt> helps solve these problems by transforming text into the
* canonical composed and decomposed forms as shown in the first example above.
* In addition, you can have it perform compatibility decompositions so that
* you can treat compatibility characters the same as their equivalents.
* Finally, <tt>Normalizer</tt> rearranges accents into the proper canonical
* order, so that you do not have to worry about accent rearrangement on your
* own.
* <p>
* <tt>Normalizer</tt> adds one optional behavior, {@link #IGNORE_HANGUL},
* that differs from
* the standard Unicode Normalization Forms. This option can be passed
* to the {@link #Normalizer constructors} and to the static
* {@link #compose compose} and {@link #decompose decompose} methods. This
* option, and any that are added in the future, will be turned off by default.
* <p>
* There are three common usage models for <tt>Normalizer</tt>. In the first,
* the static {@link #normalize normalize()} method is used to process an
* entire input string at once. Second, you can create a <tt>Normalizer</tt>
* object and use it to iterate through the normalized form of a string by
* calling {@link #first} and {@link #next}. Finally, you can use the
* {@link #setIndex setIndex()} and {@link #getIndex} methods to perform
* random-access iteration, which is very useful for searching.
* <p>
* <b>Note:</b> <tt>Normalizer</tt> objects behave like iterators and have
* methods such as <tt>setIndex</tt>, <tt>next</tt>, <tt>previous</tt>, etc.
* You should note that while the <tt>setIndex</tt> and <tt>getIndex</tt> refer
* to indices in the underlying <em>input</em> text being processed, the
* <tt>next</tt> and <tt>previous</tt> methods it iterate through characters
* in the normalized <em>output</em>. This means that there is not
* necessarily a one-to-one correspondence between characters returned
* by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
* returned from <tt>setIndex</tt> and <tt>getIndex</tt>. It is for this
* reason that <tt>Normalizer</tt> does not implement the
* {@link CharacterIterator} interface.
* <p>
* <b>Note:</b> <tt>Normalizer</tt> is currently based on version 2.1.8
* of the <a href="http://www.unicode.org" target="unicode">Unicode Standard</a>.
* It will be updated as later versions of Unicode are released. If you are
* using this class on a JDK that supports an earlier version of Unicode, it
* is possible that <tt>Normalizer</tt> may generate composed or dedecomposed
* characters for which your JDK's {@link java.lang.Character} class does not
* have any data.
* <p>
* @author Laura Werner, Mark Davis
*/
class U_COMMON_API Normalizer
{
public:
// This tells us what the bits in the "mode" mean.
enum {
COMPAT_BIT = 1,
DECOMP_BIT = 2,
COMPOSE_BIT = 4
};
/** */
static const UChar DONE;
/** The mode of a Normalizer object */
enum EMode {
/**
* Null operation for use with the {@link #Normalizer constructors}
* and the static {@link #normalize normalize} method. This value tells
* the <tt>Normalizer</tt> to do nothing but return unprocessed characters
* from the underlying String or CharacterIterator. If you have code which
* requires raw text at some times and normalized text at others, you can
* use <tt>NO_OP</tt> for the cases where you want raw text, rather
* than having a separate code path that bypasses <tt>Normalizer</tt>
* altogether.
* <p>
* @see #setMode
*/
NO_OP = 0,
/**
* Canonical decomposition followed by canonical composition. Used with
* the {@link #Normalizer constructors} and the static
* {@link #normalize normalize}
* method to determine the operation to be performed.
* <p>
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
* off, this operation produces output that is in
* <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
* Form</a>
* <b>C</b>.
* <p>
* @see #setMode
*/
COMPOSE = COMPOSE_BIT,
/**
* Compatibility decomposition followed by canonical composition.
* Used with the {@link #Normalizer constructors} and the static
* {@link #normalize normalize} method to determine the operation to be
* performed.
* <p>
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
* off, this operation produces output that is in
* <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
* Form</a>
* <b>KC</b>.
* <p>
* @see #setMode
*/
COMPOSE_COMPAT = COMPOSE_BIT | COMPAT_BIT,
/**
* Canonical decomposition. This value is passed to the
* {@link #Normalizer constructors} and the static
* {@link #normalize normalize}
* method to determine the operation to be performed.
* <p>
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
* off, this operation produces output that is in
* <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
* Form</a>
* <b>D</b>.
* <p>
* @see #setMode
*/
DECOMP = DECOMP_BIT,
/**
* Compatibility decomposition. This value is passed to the
* {@link #Normalizer constructors} and the static
* {@link #normalize normalize}
* method to determine the operation to be performed.
* <p>
* If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
* off, this operation produces output that is in
* <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
* Form</a>
* <b>KD</b>.
* <p>
* @see #setMode
*/
DECOMP_COMPAT = DECOMP_BIT | COMPAT_BIT
};
/** The options for a Normalizer object */
enum {
/**
* Option to disable Hangul/Jamo composition and decomposition.
* This option applies to Korean text,
* which can be represented either in the Jamo alphabet or in Hangul
* characters, which are really just two or three Jamo combined
* into one visual glyph. Since Jamo takes up more storage space than
* Hangul, applications that process only Hangul text may wish to turn
* this option on when decomposing text.
* <p>
* The Unicode standard treates Hangul to Jamo conversion as a
* canonical decomposition, so this option must be turned <b>off</b> if you
* wish to transform strings into one of the standard
* <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
* Unicode Normalization Forms</a>.
* <p>
* @see #setOption
*/
IGNORE_HANGUL = 0x001
};
// Constructors
/**
* Creates a new <tt>Normalizer</tt> object for iterating over the
* normalized form of a given string.
* <p>
* @param str The string to be normalized. The normalization
* will start at the beginning of the string.
*
* @param mode The normalization mode.
*/
Normalizer(const UnicodeString& str,
EMode mode);
/**
* Creates a new <tt>Normalizer</tt> object for iterating over the
* normalized form of a given string.
* <p>
* The <tt>options</tt> parameter specifies which optional
* <tt>Normalizer</tt> features are to be enabled for this object.
* <p>
* @param str The string to be normalized. The normalization
* will start at the beginning of the string.
*
* @param mode The normalization mode.
*
* @param opt Any optional features to be enabled.
* Currently the only available option is {@link #IGNORE_HANGUL}
* If you want the default behavior corresponding to one of the
* standard Unicode Normalization Forms, use 0 for this argument
*/
Normalizer(const UnicodeString& str,
EMode mode,
int32_t opt);
/**
* Creates a new <tt>Normalizer</tt> object for iterating over the
* normalized form of a given UChar string.
* <p>
* @param str The string to be normalized. The normalization
* will start at the beginning of the string.
*
* @param length Lenght of the string
*
*/
Normalizer(const UChar* str,
int32_t length,
EMode mode);
/**
* Creates a new <tt>Normalizer</tt> object for iterating over the
* normalized form of the given text.
* <p>
* @param iter The input text to be normalized. The normalization
* will start at the beginning of the string.
*
* @param mode The normalization mode.
*
*/
Normalizer(const CharacterIterator& iter,
EMode mode);
/**
* Creates a new <tt>Normalizer</tt> object for iterating over the
* normalized form of the given text.
* <p>
* @param iter The input text to be normalized. The normalization
* will start at the beginning of the string.
*
* @param mode The normalization mode.
*
* @param opt Any optional features to be enabled.
* Currently the only available option is {@link #IGNORE_HANGUL}
* If you want the default behavior corresponding to one of the
* standard Unicode Normalization Forms, use 0 for this argument
*/
Normalizer(const CharacterIterator& iter,
EMode mode,
int32_t opt);
/**
* Copy constructor.
*/
Normalizer(const Normalizer& copy);
/**
* Destructor
*/
~Normalizer();
//-------------------------------------------------------------------------
// Static utility methods
//-------------------------------------------------------------------------
/**
* Normalizes a <tt>String</tt> using the given normalization operation.
* <p>
* The <tt>options</tt> parameter specifies which optional
* <tt>Normalizer</tt> features are to be enabled for this operation.
* Currently the only available option is {@link #IGNORE_HANGUL}.
* If you want the default behavior corresponding to one of the standard
* Unicode Normalization Forms, use 0 for this argument.
* <p>
* @param source the input string to be normalized.
*
* @param aMode the normalization mode
*
* @param options the optional features to be enabled.
*
* @param result The normalized string (on output).
*
* @param status The error code.
*/
static void normalize(const UnicodeString& source,
EMode mode,
int32_t options,
UnicodeString& result,
UErrorCode &status);
/**
* Compose a <tt>String</tt>.
* <p>
* The <tt>options</tt> parameter specifies which optional
* <tt>Normalizer</tt> features are to be enabled for this operation.
* Currently the only available option is {@link #IGNORE_HANGUL}.
* If you want the default behavior corresponding
* to Unicode Normalization Form <b>C</b> or <b>KC</b>,
* use 0 for this argument.
* <p>
* @param source the string to be composed.
*
* @param compat Perform compatibility decomposition before composition.
* If this argument is <tt>false</tt>, only canonical
* decomposition will be performed.
*
* @param options the optional features to be enabled.
*
* @param result The composed string (on output).
*
* @param status The error code.
*/
static void compose(const UnicodeString& source,
bool_t compat,
int32_t options,
UnicodeString& result,
UErrorCode &status);
/**
* Static method to decompose a <tt>String</tt>.
* <p>
* The <tt>options</tt> parameter specifies which optional
* <tt>Normalizer</tt> features are to be enabled for this operation.
* Currently the only available option is {@link #IGNORE_HANGUL}.
* The desired options should be OR'ed together to determine the value
* of this argument. If you want the default behavior corresponding
* to Unicode Normalization Form <b>D</b> or <b>KD</b>,
* use 0 for this argument.
* <p>
* @param str the string to be decomposed.
*
* @param compat Perform compatibility decomposition.
* If this argument is <tt>false</tt>, only canonical
* decomposition will be performed.
*
* @param options the optional features to be enabled.
*
* @param result The composed string (on output).
*
* @param status The error code.
*
* @return the decomposed string.
*/
static void decompose(const UnicodeString& source,
bool_t compat,
int32_t options,
UnicodeString& result,
UErrorCode &status);
//-------------------------------------------------------------------------
// CharacterIterator overrides
//-------------------------------------------------------------------------
/**
* Return the current character in the normalized text.
*/
UChar current(void) const;
/**
* Return the first character in the normalized text. This resets
* the <tt>Normalizer's</tt> position to the beginning of the text.
*/
UChar first(void);
/**
* Return the last character in the normalized text. This resets
* the <tt>Normalizer's</tt> position to be just before the
* the input text corresponding to that normalized character.
*/
UChar last(void);
/**
* Return the next character in the normalized text and advance
* the iteration position by one. If the end
* of the text has already been reached, {@link #DONE} is returned.
*/
UChar next(void);
/**
* Return the previous character in the normalized text and decrement
* the iteration position by one. If the beginning
* of the text has already been reached, {@link #DONE} is returned.
*/
UChar previous(void);
/**
* Set the iteration position in the input text that is being normalized
* and return the first normalized character at that position.
* <p>
* <b>Note:</b> This method sets the position in the <em>input</em> text,
* while {@link #next} and {@link #previous} iterate through characters
* in the normalized <em>output</em>. This means that there is not
* necessarily a one-to-one correspondence between characters returned
* by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
* returned from <tt>setIndex</tt> and {@link #getIndex}.
* <p>
* @param index the desired index in the input text.
*
* @return the first normalized character that is the result of iterating
* forward starting at the given index.
*
* @throws IllegalArgumentException if the given index is less than
* {@link #getBeginIndex} or greater than {@link #getEndIndex}.
*/
UChar setIndex(UTextOffset index);
/**
* Reset the iterator so that it is in the same state that it was just after
* it was constructed. A subsequent call to <tt>next</tt> will return the first
* character in the normalized text. In contrast, calling <tt>setIndex(0)</tt> followed
* by <tt>next</tt> will return the <em>second</em> character in the normalized text,
* because <tt>setIndex</tt> itself returns the first character
*/
void reset(void);
/**
* Retrieve the current iteration position in the input text that is
* being normalized. This method is useful in applications such as
* searching, where you need to be able to determine the position in
* the input text that corresponds to a given normalized output character.
* <p>
* <b>Note:</b> This method sets the position in the <em>input</em>, while
* {@link #next} and {@link #previous} iterate through characters in the
* <em>output</em>. This means that there is not necessarily a one-to-one
* correspondence between characters returned by <tt>next</tt> and
* <tt>previous</tt> and the indices passed to and returned from
* <tt>setIndex</tt> and {@link #getIndex}.
*
*/
UTextOffset getIndex(void) const;
/**
* Retrieve the index of the start of the input text. This is the begin index
* of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
* over which this <tt>Normalizer</tt> is iterating
*/
UTextOffset startIndex(void) const;
/**
* Retrieve the index of the end of the input text. This is the end index
* of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
* over which this <tt>Normalizer</tt> is iterating
*/
UTextOffset endIndex(void) const;
/**
* Returns true when both iterators refer to the same character in the same
* character-storage object.
*/
// virtual bool_t operator==(const CharacterIterator& that) const;
bool_t operator==(const Normalizer& that) const;
inline bool_t operator!=(const Normalizer& that) const;
/**
* Returns a pointer to a new Normalizer that is a clone of this one.
* The caller is responsible for deleting the new clone.
*/
Normalizer* clone(void) const;
/**
* Generates a hash code for this iterator.
*/
int32_t hashCode(void) const;
//-------------------------------------------------------------------------
// Property access methods
//-------------------------------------------------------------------------
/**
* Set the normalization mode for this object.
* <p>
* <b>Note:</b>If the normalization mode is changed while iterating
* over a string, calls to {@link #next} and {@link #previous} may
* return previously buffers characters in the old normalization mode
* until the iteration is able to re-sync at the next base character.
* It is safest to call {@link #setText setText()}, {@link #first},
* {@link #last}, etc. after calling <tt>setMode</tt>.
* <p>
* @param newMode the new mode for this <tt>Normalizer</tt>.
* The supported modes are:
* <ul>
* <li>{@link #COMPOSE} - Unicode canonical decompositiion
* followed by canonical composition.
* <li>{@link #COMPOSE_COMPAT} - Unicode compatibility decompositiion
* follwed by canonical composition.
* <li>{@link #DECOMP} - Unicode canonical decomposition
* <li>{@link #DECOMP_COMPAT} - Unicode compatibility decomposition.
* <li>{@link #NO_OP} - Do nothing but return characters
* from the underlying input text.
* </ul>
*
* @see #getMode
*/
void setMode(EMode newMode);
/**
* Return the basic operation performed by this <tt>Normalizer</tt>
*
* @see #setMode
*/
EMode getMode(void) const;
/**
* Set options that affect this <tt>Normalizer</tt>'s operation.
* Options do not change the basic composition or decomposition operation
* that is being performed , but they control whether
* certain optional portions of the operation are done.
* Currently the only available option is:
* <p>
* <ul>
* <li>{@link #IGNORE_HANGUL} - Do not decompose Hangul syllables into the
* Jamo alphabet and vice-versa. This option is off by default
* (<i>i.e.</i> Hangul processing is enabled) since the Unicode
* standard specifies that Hangul to Jamo is a canonical decomposition.
* For any of the standard Unicode Normalization
* Forms, you should leave this option off.
* </ul>
* <p>
* @param option the option whose value is to be set.
* @param value the new setting for the option. Use <tt>true</tt> to
* turn the option on and <tt>false</tt> to turn it off.
*
* @see #getOption
*/
void setOption(int32_t option,
bool_t value);
/**
* Determine whether an option is turned on or off.
* <p>
* @see #setOption
*/
bool_t getOption(int32_t option) const;
/**
* Set the input text over which this <tt>Normalizer</tt> will iterate.
* The iteration position is set to the beginning.
*/
void setText(const UnicodeString& newText,
UErrorCode &status);
/**
* Set the input text over which this <tt>Normalizer</tt> will iterate.
* The iteration position is set to the beginning.
*/
void setText(const CharacterIterator& newText,
UErrorCode &status);
/**
* Set the input text over which this <tt>Normalizer</tt> will iterate.
* The iteration position is set to the beginning.
*/
void setText(const UChar* newText,
int32_t length,
UErrorCode &status);
/**
* Copies the text under iteration into the UnicodeString referred to by
* "result".
* @param result Receives a copy of the text under iteration.
*/
void getText(UnicodeString& result);
private:
// Private utility methods for iteration
// For documentation, see the source code
UChar nextCompose(void);
UChar prevCompose(void);
UChar nextDecomp(void);
UChar prevDecomp(void);
UChar curForward(void);
UChar curBackward(void);
void init(CharacterIterator* iter,
EMode mode,
int32_t option);
void initBuffer(void);
void clearBuffer(void);
// Utilities used by Compose
static void bubbleAppend(UnicodeString& target,
UChar ch,
uint32_t cclass);
static uint32_t getComposeClass(UChar ch);
static uint16_t composeLookup(UChar ch);
static uint16_t composeAction(uint16_t baseIndex,
uint16_t comIndex);
static void explode(UnicodeString& target,
uint16_t index);
static UChar pairExplode(UnicodeString& target,
uint16_t action);
// Utilities used by Decompose
static void fixCanonical(UnicodeString& result); // Reorders combining marks
static uint8_t getClass(UChar ch); // Gets char's combining class
// Other static utility methods
static void doAppend(const UChar source[],
uint16_t offset,
UnicodeString& dest);
static void doInsert(const UChar source[],
uint16_t offset,
UnicodeString& dest,
UTextOffset pos);
static void hangulToJamo(UChar ch,
UnicodeString& result,
uint16_t decompLimit);
static void jamoAppend(UChar ch,
uint16_t decompLimit,
UnicodeString& dest);
static void jamoToHangul(UnicodeString& buffer,
UTextOffset start);
//-------------------------------------------------------------------------
// Private data
//-------------------------------------------------------------------------
EMode fMode;
int32_t fOptions;
int16_t minDecomp;
// The input text and our position in it
CharacterIterator* text;
// A buffer for holding intermediate results
UnicodeString buffer;
UTextOffset bufferPos;
UTextOffset bufferLimit;
UChar currentChar;
// Another buffer for use during iterative composition
UnicodeString explodeBuf;
enum {
EMPTY = -1,
STR_INDEX_SHIFT = 2, //Must agree with the constants used in NormalizerBuilder
STR_LENGTH_MASK = 0x0003
};
static const UChar HANGUL_BASE;
static const UChar HANGUL_LIMIT;
static const UChar JAMO_LBASE;
static const UChar JAMO_VBASE;
static const UChar JAMO_TBASE;
static const int16_t JAMO_LCOUNT;
static const int16_t JAMO_VCOUNT;
static const int16_t JAMO_TCOUNT;
static const int16_t JAMO_NCOUNT;
friend class ComposedCharIter;
};
inline bool_t
Normalizer::operator!= (const Normalizer& other) const
{ return ! operator==(other); }
#endif // _NORMLZR

View file

@ -0,0 +1,90 @@
/*
*******************************************************************************
*
* Copyright (C) 1997-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* FILE NAME : platform.h
*
* Date Name Description
* 05/13/98 nos Creation (content moved here from ptypes.h).
* 03/02/99 stephen Added AS400 support.
* 03/30/99 stephen Added Linux support.
* 04/13/99 stephen Reworked for autoconf.
*******************************************************************************
*/
/* Define the platform we're on. */
#ifndef OS2
#define OS2 1
#endif
/* Define whether inttypes.h is available */
#define HAVE_INTTYPES_H 0
/* Determines whether specific types are available */
#define HAVE_INT8_T 0
#define HAVE_UINT8_T 0
#define HAVE_INT16_T 0
#define HAVE_UINT16_T 0
#define HAVE_INT32_T 0
#define HAVE_UINT32_T 0
#define HAVE_BOOL_T 0
/* Determines the endianness of the platform */
#define U_IS_BIG_ENDIAN 0
/*===========================================================================*/
/* Generic data types */
/*===========================================================================*/
/* If your platform does not have the <inttypes.h> header, you may
need to edit the typedefs below. */
#if HAVE_INTTYPES_H
#include <inttypes.h>
#else
#if ! HAVE_INT8_T
typedef signed char int8_t;
#endif
#if ! HAVE_UINT8_T
typedef unsigned char uint8_t;
#endif
#if ! HAVE_INT16_T
typedef signed short int16_t;
#endif
#if ! HAVE_UINT16_T
typedef unsigned short uint16_t;
#endif
#if ! HAVE_INT32_T
typedef signed long int32_t;
#endif
#if ! HAVE_UINT32_T
typedef unsigned long uint32_t;
#endif
#endif
#include <limits.h>
#define T_INT32_MAX (LONG_MAX)
/*===========================================================================*/
/* Character data types */
/*===========================================================================*/
#define U_SIZEOF_WCHAR_T 2
/*===========================================================================*/
/* Symbol import-export control */
/*===========================================================================*/
#define U_EXPORT
#define U_EXPORT2
#define U_IMPORT

View file

@ -0,0 +1,92 @@
/*
*******************************************************************************
*
* Copyright (C) 1997-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* FILE NAME : platform.h
*
* Date Name Description
* 05/13/98 nos Creation (content moved here from ptypes.h).
* 03/02/99 stephen Added AS400 support.
* 03/30/99 stephen Added Linux support.
* 04/13/99 stephen Reworked for autoconf.
* 09/21/99 barry Created new for OS/400 platform.
*******************************************************************************
*/
/* Define the platform we're on. */
#ifndef OS400
#define OS400
#endif
/* Define whether inttypes.h is available */
#define HAVE_INTTYPES_H 0
/* Determines whether specific types are available */
#define HAVE_INT8_T 0
#define HAVE_UINT8_T 0
#define HAVE_INT16_T 0
#define HAVE_UINT16_T 0
#define HAVE_INT32_T 0
#define HAVE_UINT32_T 0
#define HAVE_BOOL_T 0
/* Determines the endianness of the platform */
#define U_IS_BIG_ENDIAN 1
/*===========================================================================*/
/* Generic data types */
/*===========================================================================*/
/* If your platform does not have the <inttypes.h> header, you may
need to edit the typedefs below. */
#if HAVE_INTTYPES_H
#include <inttypes.h>
#else
#if ! HAVE_INT8_T
typedef signed char int8_t;
#endif
#if ! HAVE_UINT8_T
typedef unsigned char uint8_t;
#endif
#if ! HAVE_INT16_T
typedef signed short int16_t;
#endif
#if ! HAVE_UINT16_T
typedef unsigned short uint16_t;
#endif
#if ! HAVE_INT32_T
typedef signed long int32_t;
#endif
#if ! HAVE_UINT32_T
typedef unsigned long uint32_t;
#endif
#endif
#include <limits.h>
#define T_INT32_MAX (LONG_MAX)
/*===========================================================================*/
/* Character data types */
/*===========================================================================*/
#define U_CHARSET_FAMILY 1
#define U_SIZEOF_WCHAR_T 2
/*===========================================================================*/
/* Symbol import-export control */
/*===========================================================================*/
#define U_EXPORT
#define U_EXPORT2
#define U_IMPORT

View file

@ -0,0 +1,215 @@
/*
*******************************************************************************
*
* Copyright (C) 1997-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* FILE NAME : putil.h
*
* Date Name Description
* 05/14/98 nos Creation (content moved here from utypes.h).
* 06/17/99 erm Added IEEE_754
* 07/22/98 stephen Added IEEEremainder, max, min, trunc
* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
* 08/24/98 stephen Added longBitsFromDouble
* 03/02/99 stephen Removed openFile(). Added AS400 support.
* 04/15/99 stephen Converted to C
* 11/15/99 helena Integrated S/390 changes for IEEE support.
*******************************************************************************
*/
#ifndef PUTIL_H
#define PUTIL_H
#include "unicode/utypes.h"
/* Define this if your platform supports IEEE 754 floating point */
#define IEEE_754
/*===========================================================================*/
/* Platform utilities */
/*===========================================================================*/
/**
* Platform utilities isolates the platform dependencies of the
* libarary. For each platform which this code is ported to, these
* functions may have to be re-implemented. */
/* Floating point utilities */
U_CAPI bool_t U_EXPORT2 uprv_isNaN(double);
U_CAPI bool_t U_EXPORT2 uprv_isInfinite(double);
U_CAPI bool_t U_EXPORT2 uprv_isPositiveInfinity(double);
U_CAPI bool_t U_EXPORT2 uprv_isNegativeInfinity(double);
U_CAPI double U_EXPORT2 uprv_getNaN(void);
U_CAPI double U_EXPORT2 uprv_getInfinity(void);
U_CAPI double U_EXPORT2 uprv_floor(double x);
U_CAPI double U_EXPORT2 uprv_ceil(double x);
U_CAPI double U_EXPORT2 uprv_fabs(double x);
U_CAPI double U_EXPORT2 uprv_modf(double x, double* y);
U_CAPI double U_EXPORT2 uprv_fmod(double x, double y);
U_CAPI double U_EXPORT2 uprv_pow10(int32_t x);
U_CAPI double U_EXPORT2 uprv_IEEEremainder(double x, double y);
U_CAPI double U_EXPORT2 uprv_fmax(double x, double y);
U_CAPI double U_EXPORT2 uprv_fmin(double x, double y);
U_CAPI int32_t U_EXPORT2 uprv_max(int32_t x, int32_t y);
U_CAPI int32_t U_EXPORT2 uprv_min(int32_t x, int32_t y);
U_CAPI double U_EXPORT2 uprv_trunc(double d);
U_CAPI void U_EXPORT2 uprv_longBitsFromDouble(double d, int32_t *hi, uint32_t *lo);
#if U_IS_BIG_ENDIAN
# define uprv_isNegative(number) (*((signed char *)&(number))<0)
#else
# define uprv_isNegative(number) (*((signed char *)&(number)+sizeof(number)-1)<0)
#endif
/* Conversion from a digit to the character with radix base from 2-19 */
#ifndef OS390
#define T_CString_itosOffset(a) a<=9?(0x30+a):(0x30+a+7)
#else
#define T_CString_itosOffset(a) a<=9?(0xF0+a):(0xC1+a-10) /* C1 is EBCDIC 'A' */
#endif
/*
* Return the floor of the log base 10 of a given double.
* This method compensates for inaccuracies which arise naturally when
* computing logs, and always gives the correct value. The parameter
* must be positive and finite.
* (Thanks to Alan Liu for supplying this function.)
*/
/**
* Returns the common log of the double value d.
*
* @param d the double value to apply the common log function for.
* @return the log of value d.
*/
U_CAPI int16_t U_EXPORT2 uprv_log10(double d);
/**
* Returns the number of digits after the decimal point in a double number x.
*
* @param x the double number
*/
U_CAPI int32_t U_EXPORT2 uprv_digitsAfterDecimal(double x);
/**
* Time zone utilities
*
* Wrappers for C runtime library functions relating to timezones.
* The t_tzset() function (similar to tzset) uses the current setting
* of the environment variable TZ to assign values to three global
* variables: daylight, timezone, and tzname. These variables have the
* following meanings, and are declared in &lt;time.h>.
*
* daylight Nonzero if daylight-saving-time zone (DST) is specified
* in TZ; otherwise, 0. Default value is 1.
* timezone Difference in seconds between coordinated universal
* time and local time. E.g., -28,800 for PST (GMT-8hrs)
* tzname(0) Three-letter time-zone name derived from TZ environment
* variable. E.g., "PST".
* tzname(1) Three-letter DST zone name derived from TZ environment
* variable. E.g., "PDT". If DST zone is omitted from TZ,
* tzname(1) is an empty string.
*
* Notes: For example, to set the TZ environment variable to correspond
* to the current time zone in Germany, you can use one of the
* following statements:
*
* set TZ=GST1GDT
* set TZ=GST+1GDT
*
* If the TZ value is not set, t_tzset() attempts to use the time zone
* information specified by the operating system. Under Windows NT
* and Windows 95, this information is specified in the Control Panels
* Date/Time application.
*/
U_CAPI void U_EXPORT2 uprv_tzset(void);
U_CAPI int32_t U_EXPORT2 uprv_timezone(void);
U_CAPI char* U_EXPORT2 uprv_tzname(int index);
/* Get UTC (GMT) time measured in seconds since 0:00 on 1/1/70. */
U_CAPI int32_t U_EXPORT2 uprv_getUTCtime(void);
/* Return the data directory for this platform. */
U_CAPI const char* U_EXPORT2 u_getDataDirectory(void);
/* Set the data directory. */
U_CAPI void U_EXPORT2 u_setDataDirectory(const char *directory);
/* Return the default codepage for this platform and locale */
U_CAPI const char* U_EXPORT2 uprv_getDefaultCodepage(void);
/* Return the default locale ID string by querying ths system, or
zero if one cannot be found. */
U_CAPI const char* U_EXPORT2 uprv_getDefaultLocaleID(void);
/*
* Finds the least double greater than d (if positive == true),
* or the greatest double less than d (if positive == false).
*
* This is a special purpose function defined by the ChoiceFormat API
* documentation.
* It is not a general purpose function and not defined for NaN or Infinity
*/
U_CAPI double U_EXPORT2 uprv_nextDouble(double d, bool_t positive);
/**
* Filesystem file and path separator characters.
* Example: '/' and ':' on Unix, '\\' and ';' on Windows.
*/
#ifdef XP_MAC
# define U_FILE_SEP_CHAR ':'
# define U_PATH_SEP_CHAR ';'
# define U_FILE_SEP_STRING ":"
# define U_PATH_SEP_STRING ";"
#elif defined(WIN32) || defined(OS2)
# define U_FILE_SEP_CHAR '\\'
# define U_PATH_SEP_CHAR ';'
# define U_FILE_SEP_STRING "\\"
# define U_PATH_SEP_STRING ";"
#else
# define U_FILE_SEP_CHAR '/'
# define U_PATH_SEP_CHAR ':'
# define U_FILE_SEP_STRING "/"
# define U_PATH_SEP_STRING ":"
#endif
/**
* Convert char characters to UChar characters.
* This utility function is useful only for "invariant characters"
* that are encoded in the platform default encoding.
* They are a small, constant subset of the encoding and include
* just the latin letters, digits, and some punctuation.
* For details, see utypes.h .
*
* @param cs Input string, points to <code>length</code>
* character bytes from a subset of the platform encoding.
* @param us Output string, points to memory for <code>length</code>
* Unicode characters.
* @param length The number of characters to convert; this may
* include the terminating <code>NUL</code>.
*/
U_CAPI void U_EXPORT2
u_charsToUChars(const char *cs, UChar *us, UTextOffset length);
/**
* Convert UChar characters to char characters.
* This utility function is useful only for "invariant characters"
* that can be encoded in the platform default encoding.
* They are a small, constant subset of the encoding and include
* just the latin letters, digits, and some punctuation.
* For details, see utypes.h .
*
* @param us Input string, points to <code>length</code>
* Unicode characters that can be encoded with the
* codepage-invariant subset of the platform encoding.
* @param cs Output string, points to memory for <code>length</code>
* character bytes.
* @param length The number of characters to convert; this may
* include the terminating <code>NUL</code>.
*/
U_CAPI void U_EXPORT2
u_UCharsToChars(const UChar *us, char *cs, UTextOffset length);
#endif

View file

@ -0,0 +1,90 @@
/*
*******************************************************************************
*
* Copyright (C) 1997-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* FILE NAME : platform.h
*
* Date Name Description
* 05/13/98 nos Creation (content moved here from ptypes.h).
* 03/02/99 stephen Added AS400 support.
* 03/30/99 stephen Added Linux support.
* 04/13/99 stephen Reworked for autoconf.
*******************************************************************************
*/
/* Define the platform we're on. */
#ifndef WIN32
#define WIN32
#endif
/* Define whether inttypes.h is available */
#define HAVE_INTTYPES_H 0
/* Determines whether specific types are available */
#define HAVE_INT8_T 0
#define HAVE_UINT8_T 0
#define HAVE_INT16_T 0
#define HAVE_UINT16_T 0
#define HAVE_INT32_T 0
#define HAVE_UINT32_T 0
#define HAVE_BOOL_T 0
/* Determines the endianness of the platform */
#define U_IS_BIG_ENDIAN 0
/*===========================================================================*/
/* Generic data types */
/*===========================================================================*/
/* If your platform does not have the <inttypes.h> header, you may
need to edit the typedefs below. */
#if HAVE_INTTYPES_H
#include <inttypes.h>
#else
#if ! HAVE_INT8_T
typedef signed char int8_t;
#endif
#if ! HAVE_UINT8_T
typedef unsigned char uint8_t;
#endif
#if ! HAVE_INT16_T
typedef signed short int16_t;
#endif
#if ! HAVE_UINT16_T
typedef unsigned short uint16_t;
#endif
#if ! HAVE_INT32_T
typedef signed long int32_t;
#endif
#if ! HAVE_UINT32_T
typedef unsigned long uint32_t;
#endif
#endif
#include <limits.h>
#define T_INT32_MAX (LONG_MAX)
/*===========================================================================*/
/* Character data types */
/*===========================================================================*/
#define U_SIZEOF_WCHAR_T 2
/*===========================================================================*/
/* Symbol import-export control */
/*===========================================================================*/
#define U_EXPORT __declspec(dllexport)
#define U_EXPORT2
#define U_IMPORT __declspec(dllimport)

View file

@ -0,0 +1,123 @@
/*
**********************************************************************
* Copyright (C) 1999, International Business Machines Corporation and
* others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 11/17/99 aliu Creation. Ported from java. Modified to
* match current UnicodeString API. Forced
* to use name "handleReplaceBetween" because
* of existing methods in UnicodeString.
**********************************************************************
*/
#ifndef REP_H
#define REP_H
#include "unicode/utypes.h"
class UnicodeString;
/**
* <code>Replaceable</code> is an abstract base class representing a
* string of characters that supports the replacement of a range of
* itself with a new string of characters. It is used by APIs that
* change a piece of text while retaining style attributes. In other
* words, an implicit aspect of the <code>Replaceable</code> API is
* that during a replace operation, new characters take on the
* attributes, if any, of the old characters. For example, if the
* string "the <b>bold</b> font" has range (4, 8) replaced with
* "strong", then it becomes "the <b>strong</b> font".
*
* <p><code>Replaceable</code> specifies ranges using an initial
* offset and a limit offset. The range of characters thus specified
* includes the characters at offset initial..limit-1. That is, the
* start offset is inclusive, and the limit offset is exclusive.
*
* <p><code>Replaceable</code> also includes API to access characters
* in the string: <code>length()</code>, <code>charAt()</code>, and
* <code>extractBetween()</code>.
*
* @author Alan Liu
*/
class U_COMMON_API Replaceable {
public:
/**
* Destructor.
*/
virtual ~Replaceable();
/**
* Return the number of characters in the text.
* @return number of characters in text
*/
virtual int32_t length() const = 0;
/**
* Return the character at the given offset into the text.
* @param offset an integer between 0 and <code>length()</code>-1
* inclusive
* @return character of text at given offset
*/
virtual UChar charAt(UTextOffset offset) const = 0;
/**
* Copy characters from this object into the destination character
* array. The first character to be copied is at index
* <code>srcStart</code>; the last character to be copied is at
* index <code>srcLimit-1</code> (thus the total number of
* characters to be copied is <code>srcLimit-srcStart</code>). The
* characters are copied into the subarray of <code>dst</code>
* starting at index <code>dstStart</code> and ending at index
* <code>dstStart + (srcLimit-srcStart) - 1</code>.
*
* @param srcStart the beginning index to copy, inclusive; <code>0
* <= srcStart <= srcLimit</code>.
* @param srcLimit the ending index to copy, exclusive;
* <code>srcStart <= srcLimit <= length()</code>.
* @param dst the destination array.
* @param dstStart the start offset in the destination array. */
virtual void extractBetween(UTextOffset srcStart,
UTextOffset srcLimit,
UChar* dst,
UTextOffset dstStart = 0) const = 0;
/**
* Replace a substring of this object with the given text. If the
* characters being replaced have attributes, the new characters
* that replace them should be given the same attributes.
*
* @param start the beginning index, inclusive; <code>0 <= start
* <= limit</code>.
* @param limit the ending index, exclusive; <code>start <= limit
* <= length()</code>.
* @param text the text to replace characters <code>start</code>
* to <code>limit - 1</code> */
virtual void handleReplaceBetween(UTextOffset start,
UTextOffset limit,
const UnicodeString& text) = 0;
// Note: All other methods in this class take the names of
// existing UnicodeString methods. This method is the exception.
// It is named differently because all replace methods of
// UnicodeString return a UnicodeString&. The 'between' is
// required in order to conform to the UnicodeString naming
// convention; API taking start/length are named <operation>, and
// those taking start/limit are named <operationBetween>. The
// 'handle' is added because 'replaceBetween' and
// 'doReplaceBetween' are already taken.
protected:
/**
* Default constructor.
*/
Replaceable();
};
inline Replaceable::Replaceable() {}
inline Replaceable::~Replaceable() {}
#endif

View file

@ -0,0 +1,633 @@
/*
*******************************************************************************
*
* Copyright (C) 1996-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* File resbund.h
*
* CREATED BY
* Richard Gillam
*
* Modification History:
*
* Date Name Description
* 2/5/97 aliu Added scanForLocaleInFile. Added
* constructor which attempts to read resource bundle
* from a specific file, without searching other files.
* 2/11/97 aliu Added UErrorCode return values to constructors. Fixed
* infinite loops in scanForFile and scanForLocale.
* Modified getRawResourceData to not delete storage in
* localeData and resourceData which it doesn't own.
* Added Mac compatibility #ifdefs for tellp() and
* ios::nocreate.
* 2/18/97 helena Updated with 100% documentation coverage.
* 3/13/97 aliu Rewrote to load in entire resource bundle and store
* it as a Hashtable of ResourceBundleData objects.
* Added state table to govern parsing of files.
* Modified to load locale index out of new file distinct
* from default.txt.
* 3/25/97 aliu Modified to support 2-d arrays, needed for timezone data.
* Added support for custom file suffixes. Again, needed to
* support timezone data.
* 4/7/97 aliu Cleaned up.
* 03/02/99 stephen Removed dependency on FILE*.
* 03/29/99 helena Merged Bertrand and Stephen's changes.
* 06/11/99 stephen Removed parsing of .txt files.
* Reworked to use new binary format.
* Cleaned up.
* 06/14/99 stephen Removed methods taking a filename suffix.
* 11/09/99 weiv Added getLocale(), fRealLocale, removed fRealLocaleID
*******************************************************************************
*/
#ifndef RESBUND_H
#define RESBUND_H
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "unicode/locid.h"
#include <wchar.h>
class RBHashtable;
class ResourceBundleData;
class ResourceBundleCache;
class VisitedFileCache;
#ifndef _FILESTRM
typedef struct _FileStream FileStream;
#endif
/**
* A class representing a collection of resource information pertaining to a given
* locale. A resource bundle provides a way of accessing locale- specfic information in
* a data file. You create a resource bundle that manages the resources for a given
* locale and then ask it for individual resources.
* <P>
* The resource bundle file is a text (ASCII or Unicode) file with the format:
* <pre>
* . locale {
* . tag1 {...}
* . tag2 {...}
* . }
* </pre>
* The tags are used to retrieve the data later. You may not have multiple instances of
* the same tag.
* <P>
* Four data types are supported. These are solitary strings, comma-delimited lists of
* strings, 2-dimensional arrays of strings, and tagged lists of strings.
* <P>
* Note that all data is textual. Adjacent strings are merged by the low-level
* tokenizer, so that the following effects occur: foo bar, baz // 2 elements, "foo
* bar", and "baz" "foo" "bar", baz // 2 elements, "foobar", and "baz" Note that a
* single intervening space is added between merged strings, unless they are both double
* quoted. This extends to more than two strings in a row.
* <P>
* Whitespace is ignored, as in a C source file.
* <P>
* Solitary strings have the format:
* <pre>
* . Tag { Data }
* </pre>
* This is indistinguishable from a comma-delimited list with only one element, and in
* fact may be retrieved as such (as an array, or as element 0 or an array).
* <P>
* Comma-delimited lists have the format:
* <pre>
* . Tag { Data, Data, Data }
* </pre>
* Parsing is lenient; a final string, after the last element, is allowed.
* <P>
* Tagged lists have the format:
* <pre>
* . Tag { Subtag { Data } Subtag {Data} }
* </pre>
* Data is retrieved by specifying the subtag.
* <P>
* Two-dimensional arrays have the format:
* <pre>
* . TwoD {
* . { r1c1, r1c2, ..., r1cm },
* . { r2c1, r2c2, ..., r2cm },
* . ...
* . { rnc1, rnc2, ..., rncm }
* . }
* </pre>
* where n is the number of rows, and m is the number of columns. Parsing is lenient (as
* in other data types). A final comma is always allowed after the last element; either
* the last string in a row, or the last row itself. Furthermore, since there is no
* ambiguity, the commas between the rows are entirely optional. (However, if a comma is
* present, there can only be one comma, no more.) It is possible to have zero columns,
* as follows:
* <pre>
* . Odd { {} {} {} } // 3 x 0 array
* </pre>
* But it is impossible to have zero rows. The smallest array is thus a 1 x 0 array,
* which looks like this:
* <pre>
* . Smallest { {} } // 1 x 0 array
* </pre>
* The array must be strictly rectangular; that is, each row must have the same number
* of elements.
* <P>
* This is an example for using a possible custom resource:
* <pre>
* . Locale currentLocale;
* . UErrorCode success = U_ZERO_ERROR;
* . ResourceBundle myResources("MyResources", currentLocale, success );
* .
* . UnicodeString button1Title, button2Title;
* . myResources.getString("OkKey", button1Title, success );
* . myResources.getString("CancelKey", button2Title, success );
* </pre>
*/
class U_COMMON_API ResourceBundle {
public:
/**
* Constructor
*
* @param path This is a full pathname in the platform-specific format for the
* directory containing the resource data files we want to load
* resources from. We use locale IDs to generate filenames, and the
* filenames have this string prepended to them before being passed
* to the C++ I/O functions. Therefore, this string must always end
* with a directory delimiter (whatever that is for the target OS)
* for this class to work correctly.
* @param locale This is the locale this resource bundle is for. To get resources
* for the French locale, for example, you would create a
* ResourceBundle passing Locale::FRENCH for the "locale" parameter,
* and all subsequent calls to that resource bundle will return
* resources that pertain to the French locale. If the caller doesn't
* pass a locale parameter, the default locale for the system (as
* returned by Locale::getDefault()) will be used.
* The UErrorCode& err parameter is used to return status information to the user. To
* check whether the construction succeeded or not, you should check the value of
* U_SUCCESS(err). If you wish more detailed information, you can check for
* informational error results which still indicate success. U_USING_FALLBACK_ERROR
* indicates that a fall back locale was used. For example, 'de_CH' was requested,
* but nothing was found there, so 'de' was used. U_USING_DEFAULT_ERROR indicates that
* the default locale data was used; neither the requested locale nor any of its
* fall back locales could be found.
*/
ResourceBundle( const UnicodeString& path,
const Locale& locale,
UErrorCode& err);
ResourceBundle( const UnicodeString& path,
UErrorCode& err);
ResourceBundle( const wchar_t* path,
const Locale& locale,
UErrorCode& err);
~ResourceBundle();
/**
* Returns the contents of a string resource. Resource data is undifferentiated
* Unicode text. The resource file may contain quoted strings or escape sequences;
* these will be parsed prior to the data's return.
* [THIS FUNCTION IS DERECATED; USE THE OVERLOAD BELOW INSTEAD]
*
* @param resourceTag The resource tag of the string resource the caller wants
* @param theString Receives the actual data in the resource
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified tag couldn't be found.
*/
void getString( const char *resourceTag,
UnicodeString& theString,
UErrorCode& err) const;
/**
* Returns the contents of a string resource. Resource data is undifferentiated
* Unicode text. The resource file may contain quoted strings or escape sequences;
* these will be parsed prior to the data's return.
*
* @param resourceTag The resource tag of the string resource the caller wants
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified tag couldn't be found.
* @return A pointer to the string from the resource bundle, or NULL if there was
* an error.
*/
const UnicodeString* getString( const char *resourceTag,
UErrorCode& err) const;
/**
* Returns the contents of a string-array resource. This will return the contents of
* a string-array (comma-delimited-list) resource as a C++ array of UnicodeString
* objects. The number of elements in the array is returned in numArrayItems.
* Calling getStringArray on a resource of type string will return an array with one
* element; calling it on a resource of type tagged-array results in a
* U_MISSING_RESOURCE_ERROR error.
*
* @param resourceTag The resource tag of the string-array resource the caller
* wants
* @param numArrayItems Receives the number of items in the array the function
* returns.
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified tag couldn't be found.
* @return The resource requested, as a pointer to an array of
* UnicodeStrings. The caller does not own the storage and
* must not delete it.
*/
const UnicodeString* getStringArray( const char *resourceTag,
int32_t& numArrayItems,
UErrorCode& err) const;
/**
* Returns a single item from a string-array resource. This will return the contents
* of a single item in a resource of string-array (comma-delimited-list) type. If
* the resource is not an array, a U_MISSING_RESOURCE_ERROR will be returned in err.
* [THIS FUNCTION IS DEPRECATED; USE THE OVERLOAD BELOW INSTEAD]
*
* @param resourceTag The resource tag of the resource the caller wants to extract
* an item from.
* @param index The index (zero-based) of the particular array item the user
* wants to extract from the resource.
* @param theArrayItem Receives the actual text of the desired array item.
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified tag couldn't be found, or if the index was out of range.
*/
void getArrayItem( const char *resourceTag,
int32_t index,
UnicodeString& theArrayItem,
UErrorCode& err) const;
/**
* Returns a single item from a string-array resource. This will return the contents
* of a single item in a resource of string-array (comma-delimited-list) type. If
* the resource is not an array, a U_MISSING_RESOURCE_ERROR will be returned in err.
*
* @param resourceTag The resource tag of the resource the caller wants to extract
* an item from.
* @param index The index (zero-based) of the particular array item the user
* wants to extract from the resource.
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified tag couldn't be found, or if the index was out of range.
* @return A pointer to the text of the array item, or NULL is there was an error.
*/
const UnicodeString* getArrayItem( const char *resourceTag,
int32_t index,
UErrorCode& err) const;
/**
* Return the contents of a 2-dimensional array resource. The return value will be a
* UnicodeString** array. (This is really an array of pointers; each pointer is a
* ROW of the data.) The number of rows and columns is returned. If the resource is
* of the wrong type, or not present, U_MISSING_RESOURCE_ERROR is placed in err.
*
* @param resourceTag The resource tag of the string-array resource the caller
* wants
* @param rowCount Receives the number of rows in the array the function
* returns.
* @param columnCount Receives the number of columns in the array the function
* returns.
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified tag couldn't be found.
* @return The resource requested, as a UnicodeStrings**. The caller
* does not own the storage and must not delete it.
*/
const UnicodeString** get2dArray(const char *resourceTag,
int32_t& rowCount,
int32_t& columnCount,
UErrorCode& err) const;
/**
* Return a single string from a 2-dimensional array resource. If the resource does
* not exists, or if it is not a 2-d array, or if the row or column indices are out
* of bounds, err is set to U_MISSING_RESOURCE_ERROR.
* [THIS FUNCTION IS DEPRECATED; USE THE OVERLOAD BELOW INSTEAD]
*
* @param resourceTag The resource tag of the resource the caller wants to extract
* an item from.
* @param rowIndex The row index (zero-based) of the array item the user wants
* to extract from the resource.
* @param columnIndex The column index (zero-based) of the array item the user
* wants to extract from the resource.
* @param theArrayItem Receives the actual text of the desired array item.
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified tag couldn't be found, if the resource data was in
* the wrong format, or if either index is out of bounds.
*/
void get2dArrayItem(const char *resourceTag,
int32_t rowIndex,
int32_t columnIndex,
UnicodeString& theArrayItem,
UErrorCode& err) const;
/**
* Return a single string from a 2-dimensional array resource. If the resource does
* not exists, or if it is not a 2-d array, or if the row or column indices are out
* of bounds, err is set to U_MISSING_RESOURCE_ERROR.
*
* @param resourceTag The resource tag of the resource the caller wants to extract
* an item from.
* @param rowIndex The row index (zero-based) of the array item the user wants
* to extract from the resource.
* @param columnIndex The column index (zero-based) of the array item the user
* wants to extract from the resource.
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified tag couldn't be found, if the resource data was in
* the wrong format, or if either index is out of bounds.
* @return A pointer to the text of the array item, or NULL is there was an error.
*/
const UnicodeString* get2dArrayItem( const char *resourceTag,
int32_t rowIndex,
int32_t columnIndex,
UErrorCode& err) const;
/**
* Returns a single item from a tagged-array resource This will return the contents
* of a single item in a resource of type tagged-array. If this function is called
* for a resource that is not of type tagged-array, it will set err to
* MISSING_RESOUCE_ERROR.
* [THIS FUNCTION IS DEPRECATED; USE THE OVERLOAD BELOW INSTEAD]
*
* @param resourceTag The resource tag of the resource the caller wants to extract
* an item from.
* @param itemTag The item tag for the item the caller wants to extract.
* @param theArrayItem Receives the text of the desired array item.
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified resource tag couldn't be found, or if an item
* with the specified item tag couldn't be found in the resource.
*/
void getTaggedArrayItem( const char *resourceTag,
const UnicodeString& itemTag,
UnicodeString& theArrayItem,
UErrorCode& err) const;
/**
* Returns a single item from a tagged-array resource This will return the contents
* of a single item in a resource of type tagged-array. If this function is called
* for a resource that is not of type tagged-array, it will set err to
* MISSING_RESOUCE_ERROR.
*
* @param resourceTag The resource tag of the resource the caller wants to extract
* an item from.
* @param itemTag The item tag for the item the caller wants to extract.
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified resource tag couldn't be found, or if an item
* with the specified item tag coldn't be found in the resource.
* @return A pointer to the text of the array item, or NULL is there was an error.
*/
const UnicodeString* getTaggedArrayItem( const char *resourceTag,
const UnicodeString& itemTag,
UErrorCode& err) const;
/**
* Returns a tagged-array resource. The contents of the resource is returned as two
* separate arrays of UnicodeStrings, the addresses of which are placed in "itemTags"
* and "items". After calling this function, the items in the resource will be in the
* list pointed to by "items", and for each items[i], itemTags[i] will be the tag that
* corresponds to it. The total number of entries in both arrays is returned in
* numItems.
*
* @param resourceTag The resource tag of the resource the caller wants to extract
* an item from.
* @param itemTags Set to point to an array of UnicodeStrings representing the
* tags in the specified resource. The caller DOES own this array,
* and must delete it.
* @param items Set to point to an array of UnicodeStrings containing the
* individual resource items themselves. itemTags[i] will
* contain the tag corresponding to items[i]. The caller DOES
* own this array, and must delete it.
* @param numItems Receives the number of items in the arrays pointed to by
* items and itemTags.
* @param err Set to U_MISSING_RESOURCE_ERROR if a resource with the
* specified tag couldn't be found.
*/
void getTaggedArray( const char *resourceTag,
UnicodeString*& itemTags,
UnicodeString*& items,
int32_t& numItems,
UErrorCode& err) const;
/**
* Return the version number associated with this ResourceBundle. This version
* number is a string of the form MAJOR.MINOR, where MAJOR is the version number of
* the current analytic code package, and MINOR is the version number contained in
* the resource file as the value of the tag "Version". A change in the MINOR
* version indicated an updated data file. A change in the MAJOR version indicates a
* new version of the code which is not binary-compatible with the previous version.
* If no "Version" tag is present in a resource file, the MINOR version "0" is assigned.
*
* For example, if the Collation sort key algorithm changes, the MAJOR version
* increments. If the collation data in a resource file changes, the MINOR version
* for that file increments.
*
* @return A string of the form N.n, where N is the major version number,
* representing the code version, and n is the minor version number,
* representing the resource data file. The caller does not own this
* string.
*/
const char* getVersionNumber(void) const;
/**
* Return the Locale associated with this ResourceBundle.
*
* @return a Locale object
*/
const Locale &getLocale(void) const ;
private:
class U_COMMON_API PathInfo {
public:
PathInfo();
PathInfo(const PathInfo& source);
PathInfo(const UnicodeString& path);
PathInfo(const UnicodeString& path, const UnicodeString& suffix);
PathInfo(const wchar_t* path, const wchar_t* suffix);
~PathInfo();
PathInfo& operator=(const PathInfo& source);
bool_t fileExists(const UnicodeString& localeName) const;
UnicodeString makeCacheKey(const UnicodeString& localeName) const;
UnicodeString makeHashkey(const UnicodeString& localeName) const;
FileStream* openFile(const UnicodeString& localeName) const;
private:
static const UChar kSeparator;
UnicodeString fPrefix;
UnicodeString fSuffix;
wchar_t* fWPrefix;
wchar_t* fWSuffix;
};
private:
friend class Locale;
friend class RuleBasedCollator;
friend int32_t T_ResourceBundle_countArrayItemsImplementation(const ResourceBundle* resourceBundle,
const char* resourceKey,
UErrorCode& err) ;
friend const UnicodeString** listInstalledLocalesImplementation(const char* path,
int32_t* numInstalledLocales);
friend void getTaggedArrayUCharsImplementation(
const ResourceBundle* bundle,
const char *resourceTag,
UChar const** itemTags,
UChar const** items,
int32_t maxItems,
int32_t& numItems,
UErrorCode& err);
/**
* This constructor is used by Collation to load a resource bundle from a specific
* file, without trying other files. This is used by the Collation caching
* mechanism.
*/
ResourceBundle( const UnicodeString& path,
const UnicodeString& localeName,
UErrorCode& status);
/**
* Return a list of all installed locales. This function returns a list of the IDs
* of all locales represented in the directory specified by this ResourceBundle. It
* depends on that directory having an "Index" tagged-list item in its "index.txt"
* file; it parses that list to determine its return value (therefore, that list
* also has to be up to date). This function is static.
*
* This function is the implementation of the Locale::listInstalledLocales()
* function. It's private because the API for it real;ly belongs in Locale.
*
* @param path The path to the locale data files. The function will
* look here for "index.txt".
* @param numInstalledLocales Receives the number of installed locales, according
* to the Index resource in index.txt.
* @return A list of the installed locales, as a pointer to an
* array of UnicodeStrings. This storage is not owned by
* the caller, who must not delete it. The information
* in this list is derived from the Index resource in
* default.txt, which must be kept up to date.
*/
static const UnicodeString* listInstalledLocales(const UnicodeString& path,
int32_t& numInstalledLocales);
/**
* Retrieve a ResourceBundle from the cache. Return NULL if not found.
*/
static const UHashtable* getFromCache(const PathInfo& path,
const UnicodeString& localeName,
ResourceBundleCache* someCache);
static const UHashtable* getFromCacheWithFallback(const PathInfo& path,
const UnicodeString& desiredLocale,
UnicodeString& returnedLocale,
ResourceBundleCache* someCache,
UErrorCode& error);
/**
* Handlers which are passed to parse() have this signature.
*/
typedef void (*Handler)(const UnicodeString& localeName,
UHashtable* hashtable,
void* context,
ResourceBundleCache* someCache);
/**
* Parse a file, storing the resource data in the cache.
*/
static void parse(const PathInfo& path,
const UnicodeString& localeName,
Handler handler,
void* context,
ResourceBundleCache* someCache,
UErrorCode &error);
/**
* If the given file exists and has not been parsed, then parse it (caching the
* resultant data) and return true.
*/
static bool_t parseIfUnparsed(const PathInfo& path,
const UnicodeString& locale,
ResourceBundleCache* fCache,
VisitedFileCache* vCache,
UErrorCode& error);
const UHashtable* getHashtableForLocale(const UnicodeString& localeName,
UnicodeString& returnedLocale,
UErrorCode& err);
const UHashtable* getHashtableForLocale(const UnicodeString& desiredLocale,
UErrorCode& error);
const ResourceBundleData* getDataForTag(const char *tag,
UErrorCode& err) const;
void constructForLocale(const PathInfo& path,
const Locale& locale,
UErrorCode& error);
static void addToCache(const UnicodeString& localeName,
UHashtable* hashtable,
void* context,
ResourceBundleCache* someCache);
static void saveCollationHashtable(const UnicodeString& localeName,
UHashtable* hashtable,
void* context,
ResourceBundleCache* cache);
private:
/**
* This internal class iterates over the fallback and/or default locales. It
* progresses as follows: Specific: language+country+variant language+country
* language Default: language+country+variant language+country language Root:
*/
class LocaleFallbackIterator
{
public:
LocaleFallbackIterator(const UnicodeString& startingLocale,
const UnicodeString& root,
bool_t useDefaultLocale);
const UnicodeString& getLocale(void) const { return fLocale; }
bool_t nextLocale(UErrorCode& status);
private:
void chopLocale(void);
UnicodeString fLocale;
UnicodeString fDefaultLocale;
UnicodeString fRoot;
bool_t fUseDefaultLocale;
bool_t fTriedDefaultLocale;
bool_t fTriedRoot;
};
private:
static const char* kDefaultSuffix;
static const int32_t kDefaultSuffixLen;
static const char* kDefaultFilename;
static const char* kDefaultLocaleName;
static const char* kIndexLocaleName;
static const char* kIndexFilename;
static const char* kIndexTag;
static const char* kDefaultMinorVersion;
static const char* kVersionSeparator;
static const char* kVersionTag;
static ResourceBundleCache* fgUserCache;
static VisitedFileCache* fgUserVisitedFiles;
ResourceBundleCache* fgCache;
VisitedFileCache* fgVisitedFiles;
/**
* Data members. The ResourceBundle object is kept lightweight by having the fData[]
* array entries be non-owned pointers. The cache (fgCache) owns the entries and
* will delete them at static destruction time.
*/
PathInfo fPath;
enum { kDataCount = 4 };
const UHashtable* fData[kDataCount]; // These aren't const if fIsDataOwned is true
bool_t fLoaded[kDataCount];
UErrorCode fDataStatus[kDataCount]; // Returns the appropriate error code for each data table.
bool_t fIsDataOwned;
Locale fRealLocale;
LocaleFallbackIterator* fLocaleIterator;
char* fVersionID;
};
#endif

View file

@ -0,0 +1,174 @@
/*
*******************************************************************************
*
* Copyright (C) 1998-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* File schriter.h
*
* Modification History:
*
* Date Name Description
* 05/05/99 stephen Cleaned up.
*******************************************************************************
*/
#ifndef SCHRITER_H
#define SCHRITER_H
#include "unicode/utypes.h"
#include "unicode/chariter.h"
/**
* A concrete subclass of CharacterIterator that iterates over the
* characters in a UnicodeString. It's possible not only to create an
* iterator that iterates over an entire UnicodeString, but also to
* create only that iterates over only a subrange of a UnicodeString
* (iterators over different subranges of the same UnicodeString don't
* compare equal). */
class U_COMMON_API StringCharacterIterator : public CharacterIterator {
public:
/**
* Create an iterator over the UnicodeString referred to by "text".
* The iteration range is the whole string, and the starting position is 0.
*/
StringCharacterIterator(const UnicodeString& text);
/**
* Create an iterator over the UnicodeString referred to by "text".
* The iteration range is the whole string, and the starting
* position is specified by "pos". If "pos" is outside the valid
* iteration range, the behavior of this object is undefined. */
StringCharacterIterator(const UnicodeString& text,
UTextOffset pos);
/**
* Create an iterator over the UnicodeString referred to by "text".
* The iteration range begins with the character specified by
* "begin" and ends with the character BEFORE the character specfied
* by "end". The starting position is specified by "pos". If
* "begin" and "end" don't form a valid range on "text" (i.e., begin
* >= end or either is negative or greater than text.size()), or
* "pos" is outside the range defined by "begin" and "end", the
* behavior of this iterator is undefined. */
StringCharacterIterator(const UnicodeString& text,
UTextOffset begin,
UTextOffset end,
UTextOffset pos);
/**
* Copy constructor. The new iterator iterates over the same range
* of the same string as "that", and its initial position is the
* same as "that"'s current position. */
StringCharacterIterator(const StringCharacterIterator& that);
/**
* Destructor. */
virtual ~StringCharacterIterator();
/**
* Assignment operator. *this is altered to iterate over the sane
* range of the same string as "that", and refers to the same
* character within that string as "that" does. */
StringCharacterIterator&
operator=(const StringCharacterIterator& that);
/**
* Returns true if the iterators iterate over the same range of the
* same string and are pointing at the same character. */
virtual bool_t operator==(const CharacterIterator& that) const;
/**
* Generates a hash code for this iterator. */
virtual int32_t hashCode(void) const;
/**
* Returns a new StringCharacterIterator referring to the same
* character in the same range of the same string as this one. The
* caller must delete the new iterator. */
virtual CharacterIterator* clone(void) const;
/**
* Sets the iterator to refer to the first character in its
* iteration range, and returns that character, */
virtual UChar first(void);
/**
* Sets the iterator to refer to the last character in its iteration
* range, and returns that character. */
virtual UChar last(void);
/**
* Sets the iterator to refer to the "position"-th character in the
* UnicodeString the iterator refers to, and returns that character.
* If the index is outside the iterator's iteration range, the
* behavior of the iterator is undefined. */
virtual UChar setIndex(UTextOffset pos);
/**
* Returns the character the iterator currently refers to. */
virtual UChar current(void) const;
/**
* Advances to the next character in the iteration range (toward
* last()), and returns that character. If there are no more
* characters to return, returns DONE. */
virtual UChar next(void);
/**
* Advances to the previous character in the iteration rance (toward
* first()), and returns that character. If there are no more
* characters to return, returns DONE. */
virtual UChar previous(void);
/**
* Returns the numeric index of the first character in this
* iterator's iteration range. */
virtual UTextOffset startIndex(void) const;
/**
* Returns the numeric index of the character immediately BEYOND the
* last character in this iterator's iteration range. */
virtual UTextOffset endIndex(void) const;
/**
* Returns the numeric index in the underlying UnicodeString of the
* character the iterator currently refers to (i.e., the character
* returned by current()). */
virtual UTextOffset getIndex(void) const;
/**
* Copies the UnicodeString under iteration into the UnicodeString
* referred to by "result". Even if this iterator iterates across
* only a part of this string, the whole string is copied. @param
* result Receives a copy of the text under iteration. */
virtual void getText(UnicodeString& result);
/**
* Return a class ID for this object (not really public) */
virtual UClassID getDynamicClassID(void) const
{ return getStaticClassID(); }
/**
* Return a class ID for this class (not really public) */
static UClassID getStaticClassID(void)
{ return (UClassID)(&fgClassID); }
private:
StringCharacterIterator();
UnicodeString text;
UTextOffset pos;
UTextOffset begin;
UTextOffset end;
static UClassID fgClassID;
};
#endif

View file

@ -0,0 +1,142 @@
/*
*******************************************************************************
*
* Copyright (C) 1998-1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* File scsu.h
*
* Modification History:
*
* Date Name Description
* 05/17/99 stephen Creation (ported from java UnicodeCompressor.java)
* 09/21/99 stephen Updated to handle data splits on decompression.
*******************************************************************************
*/
#ifndef SCSU_H
#define SCSU_H 1
#include "unicode/utypes.h"
/* Number of windows */
#define USCSU_NUM_WINDOWS 8
#define USCSU_NUM_STATIC_WINDOWS 8
/* Maximum value for a window's index */
#define USCSU_MAX_INDEX 0xFF
/* The size of the internal buffer for a UnicodeCompressor. */
#define USCSU_BUFSIZE 3
/** The UnicodeCompressor struct */
struct UnicodeCompressor {
/** Alias to current dynamic window */
int32_t fCurrentWindow;
/** Dynamic compression window offsets */
int32_t fOffsets [ USCSU_NUM_WINDOWS ];
/** Current compression mode */
int32_t fMode;
/** Keeps count of times character indices are encountered */
int32_t fIndexCount [ USCSU_MAX_INDEX + 1 ];
/** The time stamps indicate when a window was last defined */
int32_t fTimeStamps [ USCSU_NUM_WINDOWS ];
/** The current time stamp */
int32_t fTimeStamp;
/** Internal buffer for saving state */
uint8_t fBuffer [ USCSU_BUFSIZE ];
/** Number of characters in our internal buffer */
int32_t fBufferLength;
};
typedef struct UnicodeCompressor UnicodeCompressor;
/**
* Initialize a UnicodeCompressor.
* Sets all windows to their default values.
* @see #reset
*/
U_CAPI void U_EXPORT2 scsu_init(UnicodeCompressor *comp);
/**
* Reset the compressor to its initial state.
* @param comp The UnicodeCompressor to reset.
*/
U_CAPI void U_EXPORT2 scsu_reset(UnicodeCompressor *comp);
/**
* Compress a Unicode character array into a byte array.
*
* This function is not guaranteed to completely fill the output buffer, nor
* is it guaranteed to compress the entire input.
* If the source data is completely compressed, <TT>status</TT> will be set
* to <TT>U_ZERO_ERROR</TT>.
* If the source data is not completely compressed, <TT>status</TT> will be
* set to <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT>. If this occurs, larger buffers
* should be allocated, or data flushed, and the function should be called
* again with the new buffers.
*
* @param comp A pointer to a previously-initialized UnicodeCompressor
* @param target I/O parameter. On input, a pointer to a buffer of bytes to
* receive the compressed data. On output, points to the byte following
* the last byte written. This buffer must be at least 4 bytes.
* @param targetLimit A pointer to the end of the array <TT>target</TT>.
* @param source I/O parameter. On input, a pointer to a buffer of
* Unicode characters to be compressed. On output, points to the character
* following the last character compressed.
* @param sourceLimit A pointer to the end of the array <TT>source</TT>.
* @param status A pointer to an UErrorCode to receive any errors.
*
* @see #decompress
*/
U_CAPI void U_EXPORT2 scsu_compress(UnicodeCompressor *comp,
uint8_t **target,
const uint8_t *targetLimit,
const UChar **source,
const UChar *sourceLimit,
UErrorCode *status);
/**
* Decompress a byte array into a Unicode character array.
*
* This function will either completely fill the output buffer, or
* consume the entire input.
* If the source data is completely compressed, <TT>status</TT> will be set
* to <TT>U_ZERO_ERROR</TT>.
* If the source data is not completely compressed, <TT>status</TT> will be
* set to <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT>. If this occurs, larger buffers
* should be allocated, or data flushed, and the function should be called
* again with the new buffers.
*
* @param comp A pointer to a previously-initialized UnicodeDecompressor
* @param target I/O parameter. On input, a pointer to a buffer of Unicode
* characters to receive the compressed data. On output, points to the
* character following the last character written. This buffer must be
* at least 2 bytes.
* @param targetLimit A pointer to the end of the array <TT>target</TT>.
* @param source I/O parameter. On input, a pointer to a buffer of
* bytes to be decompressed. On output, points to the byte following the
* last byte decompressed.
* @param sourceLimit A pointer to the end of the array <TT>source</TT>.
* @param status A pointer to an UErrorCode to receive any errors.
* @return The number of Unicode characters writeten to <TT>target</TT>.
*
* @see #compress
*/
U_CAPI void U_EXPORT2 scsu_decompress(UnicodeCompressor *comp,
UChar **target,
const UChar *targetLimit,
const uint8_t **source,
const uint8_t *sourceLimit,
UErrorCode *status);
#endif

View file

@ -0,0 +1,892 @@
/*
*******************************************************************************
*
* Copyright (C) 1999, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: ubidi.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999jul27
* created by: Markus W. Scherer
*/
#ifndef UBIDI_H
#define UBIDI_H
#include "unicode/utypes.h"
#include "unicode/uchar.h"
/*
* javadoc-style comments are intended to be transformed into HTML
* using DOC++ - see
* http://www.zib.de/Visual/software/doc++/index.html .
*
* The HTML documentation is created with
* doc++ -H ubidi.h
*
* The following #define trick allows us to do it all in one file
* and still be able to compile it.
*/
#define DOCXX_TAG
#define BIDI_SAMPLE_CODE
/**
* @name BIDI algorithm for ICU
*
* <h2>BIDI algorithm for ICU</h2>
*
* This is an implementation of the Unicode Bidirectional algorithm.
* The algorithm is defined in the
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>,
* version 5, also described in The Unicode Standard, Version 3.0 .<p>
*
* <h3>General remarks about the API:</h3>
*
* In functions with an error code parameter,
* the <code>pErrorCode</code> pointer must be valid
* and the value that it points to must not indicate a failure before
* the function call. Otherwise, the function returns immediately.
* After the function call, the value indicates success or failure.<p>
*
* The <quote>limit</quote> of a sequence of characters is the position just after their
* last character, i.e., one more than that position.<p>
*
* Some of the API functions provide access to <quote>runs</quote>.
* Such a <quote>run</quote> is defined as a sequence of characters
* that are at the same embedding level
* after performing the BIDI algorithm.<p>
*
* @author Markus W. Scherer
* @version 1.0
*/
DOCXX_TAG
/*@{*/
/**
* UBiDiLevel is the type of the level values in this
* BiDi implementation.
* It holds an embedding level and indicates the visual direction
* by its bit&nbsp;0 (even/odd value).<p>
*
* It can also hold non-level values for the
* <code>paraLevel</code> and <code>embeddingLevels</code>
* arguments of <code>ubidi_setPara()</code>; there:
* <ul>
* <li>bit&nbsp;7 of an <code>embeddingLevels[]</code>
* value indicates whether the using application is
* specifying the level of a character to <i>override</i> whatever the
* BiDi implementation would resolve it to.</li>
* <li><code>paraLevel</code> can be set to the
* pesudo-level values <code>UBIDI_DEFAULT_LTR</code>
* and <code>UBIDI_DEFAULT_RTL</code>.</li>
*
* @see ubidi_setPara
*
* <p>The related constants are not real, valid level values.
* <code>UBIDI_DEFAULT_XXX</code> can be used to specify
* a default for the paragraph level for
* when the <code>ubidi_setPara()</code> function
* shall determine it but there is no
* strongly typed character in the input.<p>
*
* Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even
* and the one for <code>UBIDI_DEFAULT_RTL</code> is odd,
* just like with normal LTR and RTL level values -
* these special values are designed that way. Also, the implementation
* assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
*
* @see UBIDI_DEFAULT_LTR
* @see UBIDI_DEFAULT_RTL
* @see UBIDI_LEVEL_OVERRIDE
* @see UBIDI_MAX_EXPLICIT_LEVEL
*/
typedef uint8_t UBiDiLevel;
/** Paragraph level setting.
* If there is no strong character, then set the paragraph level to 0 (left-to-right).
*/
#define UBIDI_DEFAULT_LTR 0xfe
/** Paragraph level setting.
* If there is no strong character, then set the paragraph level to 1 (right-to-left).
*/
#define UBIDI_DEFAULT_RTL 0xff
/**
* Maximum explicit embedding level.
* (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>).
*
*/
#define UBIDI_MAX_EXPLICIT_LEVEL 61
/** Bit flag for level input.
* Overrides directional properties.
*/
#define UBIDI_LEVEL_OVERRIDE 0x80
/**
* @memo <code>UBiDiDirection</code> values indicate the text direction.
*/
enum UBiDiDirection {
/** @memo All left-to-right text. This is a 0 value. */
UBIDI_LTR,
/** @memo All right-to-left text. This is a 1 value. */
UBIDI_RTL,
/** @memo Mixed-directional text. */
UBIDI_MIXED
};
typedef enum UBiDiDirection UBiDiDirection;
/**
* Forward declaration of the <code>UBiDi</code> structure for the declaration of
* the API functions. Its fields are implementation-specific.<p>
* This structure holds information about a paragraph of text
* with BiDi-algorithm-related details, or about one line of
* such a paragraph.<p>
* Reordering can be done on a line, or on a paragraph which is
* then interpreted as one single line.
*/
struct UBiDi;
typedef struct UBiDi UBiDi;
/**
* Allocate a <code>UBiDi</code> structure.
* Such an object is initially empty. It is assigned
* the BiDi properties of a paragraph by <code>ubidi_setPara()</code>
* or the BiDi properties of a line of a paragraph by
* <code>ubidi_getLine()</code>.<p>
* This object can be reused for as long as it is not deallocated
* by calling <code>ubidi_close()</code>.<p>
* <code>ubidi_set()</code> will allocate additional memory for
* internal structures as necessary.
*
* @return An empty <code>UBiDi</code> object.
*/
U_CAPI UBiDi * U_EXPORT2
ubidi_open(void);
/**
* Allocate a <code>UBiDi</code> structure with preallocated memory
* for internal structures.
* This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code>
* with no arguments, but it also preallocates memory for internal structures
* according to the sizings supplied by the caller.<p>
* Subsequent functions will not allocate any more memory, and are thus
* guaranteed not to fail because of lack of memory.<p>
* The preallocation can be limited to some of the internal memory
* by setting some values to 0 here. That means that if, e.g.,
* <code>maxRunCount</code> cannot be reasonably predetermined and should not
* be set to <code>maxLength</code> (the only failproof value) to avoid
* wasting memory, then <code>maxRunCount</code> could be set to 0 here
* and the internal structures that are associated with it will be allocated
* on demand, just like with <code>ubidi_open()</code>.
*
* @param maxLength is the maximum paragraph or line length that internal memory
* will be preallocated for. An attempt to associate this object with a
* longer text will fail, unless this value is 0, which leaves the allocation
* up to the implementation.
*
* @param maxRunCount is the maximum anticipated number of same-level runs
* that internal memory will be preallocated for. An attempt to access
* visual runs on an object that was not preallocated for as many runs
* as the text was actually resolved to will fail,
* unless this value is 0, which leaves the allocation up to the implementation.<p>
* The number of runs depends on the actual text and maybe anywhere between
* 1 and <code>maxLength</code>. It is typically small.<p>
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @return An empty <code>UBiDi</code> object with preallocated memory.
*/
U_CAPI UBiDi * U_EXPORT2
ubidi_openSized(UTextOffset maxLength, UTextOffset maxRunCount, UErrorCode *pErrorCode);
/**
* <code>ubidi_close()</code> must be called to free the memory
* associated with a UBiDi object.<p>
*
* <strong>Important: </strong>
* If a <code>UBiDi</code> object is the <quote>child</quote>
* of another one (its <quote>parent</quote>), after calling
* <code>ubidi_setLine()</code>, then the child object must
* be destroyed (closed) or reused (by calling
* <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>)
* before the parent object.
*
* @param pBiDi is a <code>UBiDi</code> object.
*
* @see ubidi_setPara
* @see ubidi_setLine
*/
U_CAPI void U_EXPORT2
ubidi_close(UBiDi *pBiDi);
/**
* Perform the Unicode BiDi algorithm. It is defined in the
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>,
* version 5,
* also described in The Unicode Standard, Version 3.0 .<p>
*
* This function takes a single plain text paragraph with or without
* externally specified embedding levels from <quote>styled</quote> text
* and computes the left-right-directionality of each character.<p>
*
* If the entire paragraph consists of text of only one direction, then
* the function may not perform all the steps described by the algorithm,
* i.e., some levels may not be the same as if all steps were performed.
* This is not relevant for unidirectional text.<br>
* For example, in pure LTR text with numbers the numbers would get
* a resolved level of 2 higher than the surrounding text according to
* the algorithm. This implementation may set all resolved levels to
* the same value in such a case.<p>
*
* The text must be externally split into separate paragraphs (rule P1).
* Paragraph separators (B) should appear at most at the very end.
*
* @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code>
* which will be set to contain the reordering information,
* especially the resolved levels for all the characters in <code>text</code>.
*
* @param text is a pointer to the single-paragraph text that the
* BiDi algorithm will be performed on
* (step (P1) of the algorithm is performed externally).
* <strong>The text must be (at least) <code>length</code> long.</strong>
*
* @param length is the length of the text; if <code>length==-1</code> then
* the text must be zero-terminated.
*
* @param paraLevel specifies the default level for the paragraph;
* it is typically 0 (LTR) or 1 (RTL).
* If the function shall determine the paragraph level from the text,
* then <code>paraLevel</code> can be set to
* either <code>UBIDI_DEFAULT_LTR</code>
* or <code>UBIDI_DEFAULT_RTL</code>;
* if there is no strongly typed character, then
* the desired default is used (0 for LTR or 1 for RTL).
* Any other value between 0 and <code>UBIDI_MAX_EXPLICIT_LEVEL</code> is also valid,
* with odd levels indicating RTL.
*
* @param embeddingLevels (in) may be used to preset the embedding and override levels,
* ignoring characters like LRE and PDF in the text.
* A level overrides the directional property of its corresponding
* (same index) character if the level has the
* <code>UBIDI_LEVEL_OVERRIDE</code> bit set.<p>
* Except for that bit, it must be
* <code>paraLevel&lt;=embeddingLevels[]&lt;=UBIDI_MAX_EXPLICIT_LEVEL</code>.<p>
* <strong>Caution: </strong>A copy of this pointer, not of the levels,
* will be stored in the <code>UBiDi</code> object;
* the <code>embeddingLevels</code> array must not be
* deallocated before the <code>UBiDi</code> structure is destroyed or reused,
* and the <code>embeddingLevels</code>
* should not be modified to avoid unexpected results on subsequent BiDi operations.
* However, the <code>ubidi_setPara()</code> and
* <code>ubidi_setLine()</code> functions may modify some or all of the levels.<p>
* After the <code>UBiDi</code> object is reused or destroyed, the caller
* must take care of the deallocation of the <code>embeddingLevels</code> array.<p>
* <strong>The <code>embeddingLevels</code> array must be
* at least <code>length</code> long.</strong>
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*/
U_CAPI void U_EXPORT2
ubidi_setPara(UBiDi *pBiDi, const UChar *text, UTextOffset length,
UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
UErrorCode *pErrorCode);
/**
* <code>ubidi_getLine()</code> sets a <code>UBiDi</code> to
* contain the reordering information, especially the resolved levels,
* for all the characters in a line of text. This line of text is
* specified by referring to a <code>UBiDi</code> object representing
* this information for a paragraph of text, and by specifying
* a range of indexes in this paragraph.<p>
* In the new line object, the indexes will range from 0 to <code>limit-start</code>.<p>
*
* This is used after calling <code>ubidi_setPara()</code>
* for a paragraph, and after line-breaking on that paragraph.
* It is not necessary if the paragraph is treated as a single line.<p>
*
* After line-breaking, rules (L1) and (L2) for the treatment of
* trailing WS and for reordering are performed on
* a <code>UBiDi</code> object that represents a line.<p>
*
* <strong>Important: </strong><code>pLineBiDi</code> shares data with
* <code>pParaBiDi</code>.
* You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>.
* In other words, you must destroy or reuse the <code>UBiDi</code> object for a line
* before the object for its parent paragraph.
*
* @param pParaBiDi is the parent paragraph object.
*
* @param start is the line's first index into the paragraph text.
*
* @param limit is just behind the line's last index into the paragraph text
* (its last index +1).<br>
* It must be <code>0&lt;=start&lt;=limit&lt;=</code>paragraph length.
*
* @param pLineBiDi is the object that will now represent a line of the paragraph.
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @see ubidi_setPara
*/
U_CAPI void U_EXPORT2
ubidi_setLine(const UBiDi *pParaBiDi,
UTextOffset start, UTextOffset limit,
UBiDi *pLineBiDi,
UErrorCode *pErrorCode);
/**
* Get the directionality of the text.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @return A <code>UBIDI_XXX</code> value that indicates if the entire text
* represented by this object is unidirectional,
* and which direction, or if it is mixed-directional.
*
* @see UBiDiDirection
*/
U_CAPI UBiDiDirection U_EXPORT2
ubidi_getDirection(const UBiDi *pBiDi);
/**
* Get the length of the text.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @return The length of the text that the UBiDi object was created for.
*/
U_CAPI UTextOffset U_EXPORT2
ubidi_getLength(const UBiDi *pBiDi);
/**
* Get the paragraph level of the text.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @return The paragraph level.
*
* @see UBiDiLevel
*/
U_CAPI UBiDiLevel U_EXPORT2
ubidi_getParaLevel(const UBiDi *pBiDi);
/**
* Get the level for one character.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @param charIndex the index of a character.
*
* @return The level for the character at charIndex.
*
* @see UBiDiLevel
*/
U_CAPI UBiDiLevel U_EXPORT2
ubidi_getLevelAt(const UBiDi *pBiDi, UTextOffset charIndex);
/**
* Get an array of levels for each character.<p>
*
* Note that this function may allocate memory under some
* circumstances, unlike <code>ubidi_getLevelAt()</code>.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @return The levels array for the text,
* or <code>NULL</code> if an error occurs.
*
* @see UBiDiLevel
*/
U_CAPI const UBiDiLevel * U_EXPORT2
ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode);
/**
* Get a logical run.
* This function returns information about a run and is used
* to retrieve runs in logical order.<p>
* This is especially useful for line-breaking on a paragraph.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @param logicalStart is the first character of the run.
*
* @param pLogicalLimit will receive the limit of the run.
* The l-value that you point to here may be the
* same expression (variable) as the one for
* <code>logicalStart</code>.
* This pointer can be <code>NULL</code> if this
* value is not necessary.
*
* @param pLevel will receive the level of the run.
* This pointer can be <code>NULL</code> if this
* value is not necessary.
*/
U_CAPI void U_EXPORT2
ubidi_getLogicalRun(const UBiDi *pBiDi, UTextOffset logicalStart,
UTextOffset *pLogicalLimit, UBiDiLevel *pLevel);
/**
* Get the number of runs.
* This function may invoke the actual reordering on the
* <code>UBiDi</code> object, after <code>ubidi_setPara()</code>
* may have resolved only the levels of the text. Therefore,
* <code>ubidi_countRuns()</code> may have to allocate memory,
* and may fail doing so.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @return The number of runs.
*/
U_CAPI UTextOffset U_EXPORT2
ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
/**
* Get one run's logical start, length, and directionality,
* which can be 0 for LTR or 1 for RTL.
* In an RTL run, the character at the logical start is
* visually on the right of the displayed run.
* The length is the number of characters in the run.<p>
* <code>ubidi_countRuns()</code> should be called
* before the runs are retrieved.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @param runIndex is the number of the run in visual order, in the
* range <code>[0..ubidi_countRuns(pBiDi)-1]</code>.
*
* @param pLogicalStart is the first logical character index in the text.
* The pointer may be <code>NULL</code> if this index is not needed.
*
* @param pLength is the number of characters (at least one) in the run.
* The pointer may be <code>NULL</code> if this is not needed.
*
* @return the directionality of the run,
* <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>,
* never <code>UBIDI_MIXED</code>.
*
* @see ubidi_countRuns
*
* Example:
* <pre>
*&nbsp; UTextOffset i, count=ubidi_countRuns(pBiDi),
*&nbsp; logicalStart, visualIndex=0, length;
*&nbsp; for(i=0; i&lt;count; ++i) {
*&nbsp; if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) {
*&nbsp; do { // LTR
*&nbsp; show_char(text[logicalStart++], visualIndex++);
*&nbsp; } while(--length>0);
*&nbsp; } else {
*&nbsp; logicalStart+=length; // logicalLimit
*&nbsp; do { // RTL
*&nbsp; show_char(text[--logicalStart], visualIndex++);
*&nbsp; } while(--length>0);
*&nbsp; }
*&nbsp; }
* </pre>
*
* Note that in right-to-left runs, code like this places
* modifier letters before base characters and second surrogates
* before first ones.
*/
U_CAPI UBiDiDirection U_EXPORT2
ubidi_getVisualRun(UBiDi *pBiDi, UTextOffset runIndex,
UTextOffset *pLogicalStart, UTextOffset *pLength);
/**
* Get the visual position from a logical text position.
* If such a mapping is used many times on the same
* <code>UBiDi</code> object, then calling
* <code>ubidi_getLogicalMap()</code> is more efficient.<p>
*
* Note that in right-to-left runs, this mapping places
* modifier letters before base characters and second surrogates
* before first ones.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @param logicalIndex is the index of a character in the text.
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @return The visual position of this character.
*
* @see ubidi_getLogicalMap
* @see ubidi_getLogicalIndex
*/
U_CAPI UTextOffset U_EXPORT2
ubidi_getVisualIndex(UBiDi *pBiDi, UTextOffset logicalIndex, UErrorCode *pErrorCode);
/**
* Get the logical text position from a visual position.
* If such a mapping is used many times on the same
* <code>UBiDi</code> object, then calling
* <code>ubidi_getVisualMap()</code> is more efficient.<p>
*
* This is the inverse function to <code>ubidi_getVisualIndex()</code>.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @param visualIndex is the visual position of a character.
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @return The index of this character in the text.
*
* @see ubidi_getVisualMap
* @see ubidi_getVisualIndex
*/
U_CAPI UTextOffset U_EXPORT2
ubidi_getLogicalIndex(UBiDi *pBiDi, UTextOffset visualIndex, UErrorCode *pErrorCode);
/**
* Get a logical-to-visual index map (array) for the characters in the UBiDi
* (paragraph or line) object.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @param indexMap is a pointer to an array of <code>ubidi_getLength()</code>
* indexes which will reflect the reordering of the characters.
* The array does not need to be initialized.<p>
* The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.<p>
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @see ubidi_getVisualMap
* @see ubidi_getVisualIndex
*/
U_CAPI void U_EXPORT2
ubidi_getLogicalMap(UBiDi *pBiDi, UTextOffset *indexMap, UErrorCode *pErrorCode);
/**
* Get a visual-to-logical index map (array) for the characters in the UBiDi
* (paragraph or line) object.
*
* @param pBiDi is the paragraph or line <code>UBiDi</code> object.
*
* @param indexMap is a pointer to an array of <code>ubidi_getLength()</code>
* indexes which will reflect the reordering of the characters.
* The array does not need to be initialized.<p>
* The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.<p>
*
* @param pErrorCode must be a valid pointer to an error code value,
* which must not indicate a failure before the function call.
*
* @see ubidi_getLogicalMap
* @see ubidi_getLogicalIndex
*/
U_CAPI void U_EXPORT2
ubidi_getVisualMap(UBiDi *pBiDi, UTextOffset *indexMap, UErrorCode *pErrorCode);
/**
* This is a convenience function that does not use a UBiDi object.
* It is intended to be used for when an application has determined the levels
* of objects (character sequences) and just needs to have them reordered (L2).
* This is equivalent to using <code>ubidi_getLogicalMap</code> on a
* <code>UBiDi</code> object.
*
* @param levels is an array with <code>length</code> levels that have been determined by
* the application.
*
* @param length is the number of levels in the array, or, semantically,
* the number of objects to be reordered.
* It must be <code>length&gt;0</code>.
*
* @param indexMap is a pointer to an array of <code>length</code>
* indexes which will reflect the reordering of the characters.
* The array does not need to be initialized.<p>
* The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
*/
U_CAPI void U_EXPORT2
ubidi_reorderLogical(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap);
/**
* This is a convenience function that does not use a UBiDi object.
* It is intended to be used for when an application has determined the levels
* of objects (character sequences) and just needs to have them reordered (L2).
* This is equivalent to using <code>ubidi_getVisualMap</code> on a
* <code>UBiDi</code> object.
*
* @param levels is an array with <code>length</code> levels that have been determined by
* the application.
*
* @param length is the number of levels in the array, or, semantically,
* the number of objects to be reordered.
* It must be <code>length&gt;0</code>.
*
* @param indexMap is a pointer to an array of <code>length</code>
* indexes which will reflect the reordering of the characters.
* The array does not need to be initialized.<p>
* The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
*/
U_CAPI void U_EXPORT2
ubidi_reorderVisual(const UBiDiLevel *levels, UTextOffset length, UTextOffset *indexMap);
/**
* Invert an index map.
* The one-to-one index mapping of the first map is inverted and written to
* the second one.
*
* @param srcMap is an array with <code>length</code> indexes
* which define the original mapping.
*
* @param destMap is an array with <code>length</code> indexes
* which will be filled with the inverse mapping.
*
* @param length is the length of each array.
*/
U_CAPI void U_EXPORT2
ubidi_invertMap(const UTextOffset *srcMap, UTextOffset *destMap, UTextOffset length);
/**
* @name Sample code for the ICU BIDI API
*
* <h2>Rendering a paragraph with the ICU BiDi API</h2>
*
* This is (hypothetical) sample code that illustrates
* how the ICU BiDi API could be used to render a paragraph of text.
* Rendering code depends highly on the graphics system,
* therefore this sample code must make a lot of assumptions,
* which may or may not match any existing graphics system's properties.
*
* <p>The basic assumptions are:</p>
* <ul>
* <li>Rendering is done from left to right on a horizontal line.</li>
* <li>A run of single-style, unidirectional text can be rendered at once.</li>
* <li>Such a run of text is passed to the graphics system with
* characters (code units) in logical order.</li>
* <li>The line-breaking algorithm is very complicated
* and Locale-dependent -
* and therefore its implementation omitted from this sample code.</li>
* </ul>
*
* <pre>
*&nbsp; #include "unicode/ubidi.h"
*&nbsp;
*&nbsp; typedef enum {
*&nbsp; styleNormal=0, styleSelected=1,
*&nbsp; styleBold=2, styleItalics=4,
*&nbsp; styleSuper=8, styleSub=16
*&nbsp; } Style;
*&nbsp;
*&nbsp; typedef struct { UTextOffset limit; Style style; } StyleRun;
*&nbsp;
*&nbsp; int getTextWidth(const UChar *text, UTextOffset start, UTextOffset limit,
*&nbsp; const StyleRun *styleRuns, int styleRunCount);
*&nbsp;
*&nbsp; // set *pLimit and *pStyleRunLimit for a line
*&nbsp; // from text[start] and from styleRuns[styleRunStart]
*&nbsp; // using ubidi_getLogicalRun(para, ...)
*&nbsp; void getLineBreak(const UChar *text, UTextOffset start, UTextOffset *pLimit,
*&nbsp; UBiDi *para,
*&nbsp; const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit,
*&nbsp; int *pLineWidth);
*&nbsp;
*&nbsp; // render runs on a line sequentially, always from left to right
*&nbsp;
*&nbsp; // prepare rendering a new line
*&nbsp; void startLine(UBiDiDirection textDirection, int lineWidth);
*&nbsp;
*&nbsp; // render a run of text and advance to the right by the run width
*&nbsp; // the text[start..limit-1] is always in logical order
*&nbsp; void renderRun(const UChar *text, UTextOffset start, UTextOffset limit,
*&nbsp; UBiDiDirection textDirection, Style style);
*&nbsp;
*&nbsp; // We could compute a cross-product
*&nbsp; // from the style runs with the directional runs
*&nbsp; // and then reorder it.
*&nbsp; // Instead, here we iterate over each run type
*&nbsp; // and render the intersections -
*&nbsp; // with shortcuts in simple (and common) cases.
*&nbsp; // renderParagraph() is the main function.
*&nbsp;
*&nbsp; // render a directional run with
*&nbsp; // (possibly) multiple style runs intersecting with it
*&nbsp; void renderDirectionalRun(const UChar *text,
*&nbsp; UTextOffset start, UTextOffset limit,
*&nbsp; UBiDiDirection direction,
*&nbsp; const StyleRun *styleRuns, int styleRunCount) {
*&nbsp; int i;
*&nbsp;
*&nbsp; // iterate over style runs
*&nbsp; if(direction==UBIDI_LTR) {
*&nbsp; int styleLimit;
*&nbsp;
*&nbsp; for(i=0; i&lt;styleRunCount; ++i) {
*&nbsp; styleLimit=styleRun[i].limit;
*&nbsp; if(start&lt;styleLimit) {
*&nbsp; if(styleLimit>limit) { styleLimit=limit; }
*&nbsp; renderRun(text, start, styleLimit,
*&nbsp; direction, styleRun[i].style);
*&nbsp; if(styleLimit==limit) { break; }
*&nbsp; start=styleLimit;
*&nbsp; }
*&nbsp; }
*&nbsp; } else {
*&nbsp; int styleStart;
*&nbsp;
*&nbsp; for(i=styleRunCount-1; i>=0; --i) {
*&nbsp; if(i>0) {
*&nbsp; styleStart=styleRun[i-1].limit;
*&nbsp; } else {
*&nbsp; styleStart=0;
*&nbsp; }
*&nbsp; if(limit>=styleStart) {
*&nbsp; if(styleStart&lt;start) { styleStart=start; }
*&nbsp; renderRun(text, styleStart, limit,
*&nbsp; direction, styleRun[i].style);
*&nbsp; if(styleStart==start) { break; }
*&nbsp; limit=styleStart;
*&nbsp; }
*&nbsp; }
*&nbsp; }
*&nbsp; }
*&nbsp;
*&nbsp; // the line object represents text[start..limit-1]
*&nbsp; void renderLine(UBiDi *line, const UChar *text,
*&nbsp; UTextOffset start, UTextOffset limit,
*&nbsp; const StyleRun *styleRuns, int styleRunCount) {
*&nbsp; UBiDiDirection direction=ubidi_getDirection(line);
*&nbsp; if(direction!=UBIDI_MIXED) {
*&nbsp; // unidirectional
*&nbsp; if(styleRunCount&lt;=1) {
*&nbsp; renderRun(text, start, limit, direction, styleRuns[0].style);
*&nbsp; } else {
*&nbsp; renderDirectionalRun(text, start, limit,
*&nbsp; direction, styleRuns, styleRunCount);
*&nbsp; }
*&nbsp; } else {
*&nbsp; // mixed-directional
*&nbsp; UTextOffset count, i, length;
*&nbsp; UBiDiLevel level;
*&nbsp;
*&nbsp; count=ubidi_countRuns(para, pErrorCode);
*&nbsp; if(U_SUCCESS(*pErrorCode)) {
*&nbsp; if(styleRunCount&lt;=1) {
*&nbsp; Style style=styleRuns[0].style;
*&nbsp;
*&nbsp; // iterate over directional runs
*&nbsp; for(i=0; i&lt;count; ++i) {
*&nbsp; direction=ubidi_getVisualRun(para, i, &start, &length);
*&nbsp; renderRun(text, start, start+length, direction, style);
*&nbsp; }
*&nbsp; } else {
*&nbsp; UTextOffset j;
*&nbsp;
*&nbsp; // iterate over both directional and style runs
*&nbsp; for(i=0; i&lt;count; ++i) {
*&nbsp; direction=ubidi_getVisualRun(line, i, &start, &length);
*&nbsp; renderDirectionalRun(text, start, start+length,
*&nbsp; direction, styleRuns, styleRunCount);
*&nbsp; }
*&nbsp; }
*&nbsp; }
*&nbsp; }
*&nbsp; }
*&nbsp;
*&nbsp; void renderParagraph(const UChar *text, UTextOffset length,
*&nbsp; UBiDiDirection textDirection,
*&nbsp; const StyleRun *styleRuns, int styleRunCount,
*&nbsp; int lineWidth,
*&nbsp; UErrorCode *pErrorCode) {
*&nbsp; UBiDi *para;
*&nbsp;
*&nbsp; if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length&lt;=0) {
*&nbsp; return;
*&nbsp; }
*&nbsp;
*&nbsp; para=ubidi_openSized(length, 0, pErrorCode);
*&nbsp; if(para==NULL) { return; }
*&nbsp;
*&nbsp; ubidi_setPara(para, text, length,
*&nbsp; textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR,
*&nbsp; NULL, pErrorCode);
*&nbsp; if(U_SUCCESS(*pErrorCode)) {
*&nbsp; UBiDiLevel paraLevel=1&ubidi_getParaLevel(para);
*&nbsp; StyleRun styleRun={ length, styleNormal };
*&nbsp; int width;
*&nbsp;
*&nbsp; if(styleRuns==NULL || styleRunCount&lt;=0) {
*&nbsp; styleRunCount=1;
*&nbsp; styleRuns=&styleRun;
*&nbsp; }
*&nbsp;
*&nbsp; // assume styleRuns[styleRunCount-1].limit>=length
*&nbsp;
*&nbsp; width=getTextWidth(text, 0, length, styleRuns, styleRunCount);
*&nbsp; if(width&lt;=lineWidth) {
*&nbsp; // everything fits onto one line
*&nbsp;
*&nbsp; // prepare rendering a new line from either left or right
*&nbsp; startLine(paraLevel, width);
*&nbsp;
*&nbsp; renderLine(para, text, 0, length,
*&nbsp; styleRuns, styleRunCount);
*&nbsp; } else {
*&nbsp; UBiDi *line;
*&nbsp;
*&nbsp; // we need to render several lines
*&nbsp; line=ubidi_openSized(length, 0, pErrorCode);
*&nbsp; if(line!=NULL) {
*&nbsp; UTextOffset start=0, limit;
*&nbsp; int styleRunStart=0, styleRunLimit;
*&nbsp;
*&nbsp; for(;;) {
*&nbsp; limit=length;
*&nbsp; styleRunLimit=styleRunCount;
*&nbsp; getLineBreak(text, start, &limit, para,
*&nbsp; styleRuns, styleRunStart, &styleRunLimit,
*&nbsp; &width);
*&nbsp; ubidi_setLine(para, start, limit, line, pErrorCode);
*&nbsp; if(U_SUCCESS(*pErrorCode)) {
*&nbsp; // prepare rendering a new line
*&nbsp; // from either left or right
*&nbsp; startLine(paraLevel, width);
*&nbsp;
*&nbsp; renderLine(line, text, start, limit,
*&nbsp; styleRuns+styleRunStart,
*&nbsp; styleRunLimit-styleRunStart);
*&nbsp; }
*&nbsp; if(limit==length) { break; }
*&nbsp; start=limit;
*&nbsp; styleRunStart=styleRunLimit-1;
*&nbsp; if(start>=styleRuns[styleRunStart].limit) {
*&nbsp; ++styleRunStart;
*&nbsp; }
*&nbsp; }
*&nbsp;
*&nbsp; ubidi_close(line);
*&nbsp; }
*&nbsp; }
*&nbsp; }
*&nbsp;
*&nbsp; ubidi_close(para);
*&nbsp; }
* </pre>
*/
BIDI_SAMPLE_CODE
/*@{*/
/*@}*/
/*@}*/
#endif

View file

@ -0,0 +1,841 @@
/*
**********************************************************************
* Copyright (C) 1997-1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* File UCHAR.H
*
* Modification History:
*
* Date Name Description
* 04/02/97 aliu Creation.
* 03/29/99 helena Updated for C APIs.
* 4/15/99 Madhu Updated for C Implementation and Javadoc
* 5/20/99 Madhu Added the function u_getVersion()
* 8/19/1999 srl Upgraded scripts to Unicode 3.0
* 8/27/1999 schererm UCharDirection constants: U_...
* 11/11/1999 weiv added u_isalnum(), cleaned comments
********************************************************************************
*/
#ifndef UCHAR_H
#define UCHAR_H
#include "unicode/utypes.h"
/*===========================================================================*/
/* Unicode version number */
/*===========================================================================*/
#define UNICODE_VERSION "3.0.0"
/**
* The Unicode C API allows you to query the properties associated with individual
* Unicode character values.
* <p>
* The Unicode character information, provided implicitly by the
* Unicode character encoding standard, includes information about the script
* (for example, symbols or control characters) to which the character belongs,
* as well as semantic information such as whether a character is a digit or
* uppercase, lowercase, or uncased.
* <P>
*/
/**
* Constants.
*/
/**
* The minimum value a UChar can have. The lowest value a
* UChar can have is 0x0000.
*/
static UChar UCHAR_MIN_VALUE;
/**
* The maximum value a UChar can have. The greatest value a
* UChar can have is 0xffff.
*/
static UChar UCHAR_MAX_VALUE;
/**
* Data for enumerated Unicode general category types
*/
enum UCharCategory
{
/** */
U_UNASSIGNED = 0,
/** */
U_UPPERCASE_LETTER = 1,
/** */
U_LOWERCASE_LETTER = 2,
/** */
U_TITLECASE_LETTER = 3,
/** */
U_MODIFIER_LETTER = 4,
/** */
U_OTHER_LETTER = 5,
/** */
U_NON_SPACING_MARK = 6,
/** */
U_ENCLOSING_MARK = 7,
/** */
U_COMBINING_SPACING_MARK = 8,
/** */
U_DECIMAL_DIGIT_NUMBER = 9,
/** */
U_LETTER_NUMBER = 10,
/** */
U_OTHER_NUMBER = 11,
/** */
U_SPACE_SEPARATOR = 12,
/** */
U_LINE_SEPARATOR = 13,
/** */
U_PARAGRAPH_SEPARATOR = 14,
/** */
U_CONTROL_CHAR = 15,
/** */
U_FORMAT_CHAR = 16,
/** */
U_PRIVATE_USE_CHAR = 17,
/** */
U_SURROGATE = 18,
/** */
U_DASH_PUNCTUATION = 19,
/** */
U_START_PUNCTUATION = 20,
/** */
U_END_PUNCTUATION = 21,
/** */
U_CONNECTOR_PUNCTUATION = 22,
/** */
U_OTHER_PUNCTUATION = 23,
/** */
U_MATH_SYMBOL = 24,
/** */
U_CURRENCY_SYMBOL = 25,
/** */
U_MODIFIER_SYMBOL = 26,
/** */
U_OTHER_SYMBOL = 27,
/** */
U_INITIAL_PUNCTUATION = 28,
/** */
U_FINAL_PUNCTUATION = 29,
/** */
U_GENERAL_OTHER_TYPES = 30,
/** */
U_CHAR_CATEGORY_COUNT
};
typedef enum UCharCategory UCharCategory;
/**
* This specifies the language directional property of a character set.
*/
enum UCharDirection {
/** */
U_LEFT_TO_RIGHT = 0,
/** */
U_RIGHT_TO_LEFT = 1,
/** */
U_EUROPEAN_NUMBER = 2,
/** */
U_EUROPEAN_NUMBER_SEPARATOR = 3,
/** */
U_EUROPEAN_NUMBER_TERMINATOR = 4,
/** */
U_ARABIC_NUMBER = 5,
/** */
U_COMMON_NUMBER_SEPARATOR = 6,
/** */
U_BLOCK_SEPARATOR = 7,
/** */
U_SEGMENT_SEPARATOR = 8,
/** */
U_WHITE_SPACE_NEUTRAL = 9,
/** */
U_OTHER_NEUTRAL = 10,
/** */
U_LEFT_TO_RIGHT_EMBEDDING = 11,
/** */
U_LEFT_TO_RIGHT_OVERRIDE = 12,
/** */
U_RIGHT_TO_LEFT_ARABIC = 13,
/** */
U_RIGHT_TO_LEFT_EMBEDDING = 14,
/** */
U_RIGHT_TO_LEFT_OVERRIDE = 15,
/** */
U_POP_DIRECTIONAL_FORMAT = 16,
/** */
U_DIR_NON_SPACING_MARK = 17,
/** */
U_BOUNDARY_NEUTRAL = 18,
/** */
U_CHAR_DIRECTION_COUNT
};
typedef enum UCharDirection UCharDirection;
/**
* Script range as defined in the Unicode standard.
*/
/* Generated from Unicode Data files */
enum UCharScript {
/* Script names */
/** */
U_BASIC_LATIN,
/** */
U_LATIN_1_SUPPLEMENT,
/** */
U_LATIN_EXTENDED_A,
/** */
U_LATIN_EXTENDED_B,
/** */
U_IPA_EXTENSIONS,
/** */
U_SPACING_MODIFIER_LETTERS,
/** */
U_COMBINING_DIACRITICAL_MARKS,
/** */
U_GREEK,
/** */
U_CYRILLIC,
/** */
U_ARMENIAN,
/** */
U_HEBREW,
/** */
U_ARABIC,
/** */
U_SYRIAC,
/** */
U_THAANA,
/** */
U_DEVANAGARI,
/** */
U_BENGALI,
/** */
U_GURMUKHI,
/** */
U_GUJARATI,
/** */
U_ORIYA,
/** */
U_TAMIL,
/** */
U_TELUGU,
/** */
U_KANNADA,
/** */
U_MALAYALAM,
/** */
U_SINHALA,
/** */
U_THAI,
/** */
U_LAO,
/** */
U_TIBETAN,
/** */
U_MYANMAR,
/** */
U_GEORGIAN,
/** */
U_HANGUL_JAMO,
/** */
U_ETHIOPIC,
/** */
U_CHEROKEE,
/** */
U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
/** */
U_OGHAM,
/** */
U_RUNIC,
/** */
U_KHMER,
/** */
U_MONGOLIAN,
/** */
U_LATIN_EXTENDED_ADDITIONAL,
/** */
U_GREEK_EXTENDED,
/** */
U_GENERAL_PUNCTUATION,
/** */
U_SUPERSCRIPTS_AND_SUBSCRIPTS,
/** */
U_CURRENCY_SYMBOLS,
/** */
U_COMBINING_MARKS_FOR_SYMBOLS,
/** */
U_LETTERLIKE_SYMBOLS,
/** */
U_NUMBER_FORMS,
/** */
U_ARROWS,
/** */
U_MATHEMATICAL_OPERATORS,
/** */
U_MISCELLANEOUS_TECHNICAL,
/** */
U_CONTROL_PICTURES,
/** */
U_OPTICAL_CHARACTER_RECOGNITION,
/** */
U_ENCLOSED_ALPHANUMERICS,
/** */
U_BOX_DRAWING,
/** */
U_BLOCK_ELEMENTS,
/** */
U_GEOMETRIC_SHAPES,
/** */
U_MISCELLANEOUS_SYMBOLS,
/** */
U_DINGBATS,
/** */
U_BRAILLE_PATTERNS,
/** */
U_CJK_RADICALS_SUPPLEMENT,
/** */
U_KANGXI_RADICALS,
/** */
U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
/** */
U_CJK_SYMBOLS_AND_PUNCTUATION,
/** */
U_HIRAGANA,
/** */
U_KATAKANA,
/** */
U_BOPOMOFO,
/** */
U_HANGUL_COMPATIBILITY_JAMO,
/** */
U_KANBUN,
/** */
U_BOPOMOFO_EXTENDED,
/** */
U_ENCLOSED_CJK_LETTERS_AND_MONTHS,
/** */
U_CJK_COMPATIBILITY,
/** */
U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
/** */
U_CJK_UNIFIED_IDEOGRAPHS,
/** */
U_YI_SYLLABLES,
/** */
U_YI_RADICALS,
/** */
U_HANGUL_SYLLABLES,
/** */
U_HIGH_SURROGATES,
/** */
U_HIGH_PRIVATE_USE_SURROGATES,
/** */
U_LOW_SURROGATES,
/** */
U_PRIVATE_USE_AREA,
/** */
U_CJK_COMPATIBILITY_IDEOGRAPHS,
/** */
U_ALPHABETIC_PRESENTATION_FORMS,
/** */
U_ARABIC_PRESENTATION_FORMS_A,
/** */
U_COMBINING_HALF_MARKS,
/** */
U_CJK_COMPATIBILITY_FORMS,
/** */
U_SMALL_FORM_VARIANTS,
/** */
U_ARABIC_PRESENTATION_FORMS_B,
/** */
U_SPECIALS,
/** */
U_HALFWIDTH_AND_FULLWIDTH_FORMS,
/** */
U_CHAR_SCRIPT_COUNT,
/** */
U_NO_SCRIPT=U_CHAR_SCRIPT_COUNT
};
typedef enum UCharScript UCharScript;
/**
* Values returned by the u_getCellWidth() function.
*/
enum UCellWidth
{
/** */
U_ZERO_WIDTH = 0,
/** */
U_HALF_WIDTH = 1,
/** */
U_FULL_WIDTH = 2,
/** */
U_NEUTRAL_WIDTH = 3,
/** */
U_CELL_WIDTH_COUNT
};
typedef enum UCellWidth UCellWidth;
/**
* Selector constants for u_charName().
* <code>u_charName() returns either the "modern" name of a
* Unicode character or the name that was defined in
* Unicode version 1.0, before the Unicode standard merged
* with ISO-10646.
*
* @see u_charName()
*/
enum UCharNameChoice {
U_UNICODE_CHAR_NAME,
U_UNICODE_10_CHAR_NAME,
U_CHAR_NAME_CHOICE_COUNT
};
typedef enum UCharNameChoice UCharNameChoice;
/**
* Functions to classify characters.
*/
/**
* Determines whether the specified UChar is a lowercase character
* according to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the character is lowercase; false otherwise.
* @see UNICODE_VERSION
* @see u_isupper()
* @see u_istitle()
* @see u_islower()
*/
U_CAPI bool_t U_EXPORT2
u_islower(UChar c);
/**
* Determines whether the specified character is an uppercase character
* according to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the character is uppercase; false otherwise.
* @see u_islower()
* @see u_istitle
* @see u_tolower()
*/
U_CAPI bool_t U_EXPORT2
u_isupper(UChar c);
/**
* Determines whether the specified character is a titlecase character
* according to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the character is titlecase; false otherwise.
* @see u_isupper()
* @see u_islower()
* @see u_totitle()
*/
U_CAPI bool_t U_EXPORT2
u_istitle(UChar c);
/**
* Determines whether the specified character is a digit according to Unicode
* 2.1.2.
*
* @param ch the character to be tested
* @return true if the character is a digit; false otherwise.
*/
U_CAPI bool_t U_EXPORT2
u_isdigit(UChar c);
/**
* Determines whether the specified character is an alphanumeric character
* (letter or digit)according to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the character is a letter or a digit; false otherwise.
*/
U_CAPI bool_t U_EXPORT2
u_isalnum(UChar c);
/**
* Determines whether the specified numeric value is actually a defined character
* according to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the character has a defined Unicode meaning; false otherwise.
*
* @see u_isdigit()
* @see u_isalpha()
* @see u_isalnum()
* @see u_isupper()
* @see u_islower()
* @see u_istitle()
*/
U_CAPI bool_t U_EXPORT2
u_isdefined(UChar c);
/**
* Determines whether the specified character is a letter
* according to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the character is a letter; false otherwise.
*
* @see u_isdigit()
* @see u_isalnum()
*/
U_CAPI bool_t U_EXPORT2
u_isalpha(UChar c);
/**
* Determines if the specified character is a space character or not.
*
* @param ch the character to be tested
* @return true if the character is a space character; false otherwise.
*/
U_CAPI bool_t U_EXPORT2
u_isspace(UChar c);
/**
* Determines whether the specified character is a control character or not.
*
* @param ch the character to be tested
* @return true if the Unicode character is a control character; false otherwise.
*
* @see u_isprint()
*/
U_CAPI bool_t U_EXPORT2
u_iscntrl(UChar c);
/**
* Determines whether the specified character is a printable character according
* to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the Unicode character is a printable character; false otherwise.
*
* @see u_iscntrl()
*/
U_CAPI bool_t U_EXPORT2
u_isprint(UChar c);
/**
* Determines whether the specified character is of the base form according
* to Unicode 2.1.2.
*
* @param ch the character to be tested
* @return true if the Unicode character is of the base form; false otherwise.
*
* @see u_isalpha()
* @see u_isdigit()
*/
U_CAPI bool_t U_EXPORT2
u_isbase(UChar c);
/**
* Returns the linguistic direction property of a character.
* <P>
* Returns the linguistic direction property of a character.
* For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
* property.
* @see UCharDirection
*/
U_CAPI UCharDirection U_EXPORT2
u_charDirection(UChar c);
/**
* Returns a value indicating the display-cell width of the character
* when used in Asian text, according to the Unicode standard (see p. 6-130
* of The Unicode Standard, Version 2.0). The results for various characters
* are as follows:
* <P>
* ZERO_WIDTH: Characters which are considered to take up no display-cell space:
* control characters
* format characters
* line and paragraph separators
* non-spacing marks
* combining Hangul jungseong
* combining Hangul jongseong
* unassigned Unicode values
* <P>
* HALF_WIDTH: Characters which take up half a cell in standard Asian text:
* all characters in the General Scripts Area except combining Hangul choseong
* and the characters called out specifically above as ZERO_WIDTH
* alphabetic and Arabic presentation forms
* halfwidth CJK punctuation
* halfwidth Katakana
* halfwidth Hangul Jamo
* halfwidth forms, arrows, and shapes
* <P>
* FULL_WIDTH: Characters which take up a full cell in standard Asian text:
* combining Hangul choseong
* all characters in the CJK Phonetics and Symbols Area
* all characters in the CJK Ideographs Area
* all characters in the Hangul Syllables Area
* CJK compatibility ideographs
* CJK compatibility forms
* small form variants
* fullwidth ASCII
* fullwidth punctuation and currency signs
* <P>
* NEUTRAL: Characters whose cell width is context-dependent:
* all characters in the Symbols Area, except those specifically called out above
* all characters in the Surrogates Area
* all charcaters in the Private Use Area
* <P>
* For Korean text, this algorithm should work properly with properly normalized Korean
* text. Precomposed Hangul syllables and non-combining jamo are all considered full-
* width characters. For combining jamo, we treat we treat choseong (initial consonants)
* as double-width characters and junseong (vowels) and jongseong (final consonants)
* as non-spacing marks. This will work right in text that uses the precomposed
* choseong characters instead of teo choseong characters in a row, and which uses the
* choseong filler character at the beginning of syllables that don't have an initial
* consonant. The results may be slightly off with Korean text following different
* conventions.
*/
U_CAPI uint16_t U_EXPORT2
u_charCellWidth(UChar c);
/**
* Returns a value indicating a character category according to Unicode
* 2.1.2.
* @param c the character to be tested
* @return a value of type int, the character category.
* @see UCharCategory
*/
U_CAPI int8_t U_EXPORT2
u_charType(UChar c);
/**
* Retrives the decimal numeric value of a digit character.
*
* @param c the digit character for which to get the numeric value
* @return the numeric value of ch in decimal radix. This method returns
* -1 if ch is not a valid digit character.
*/
U_CAPI int32_t U_EXPORT2
u_charDigitValue(UChar c);
/**
* Returns the script associated with a character.
*
* @see #UCharScript
*/
U_CAPI UCharScript U_EXPORT2
u_charScript(UChar ch);
/**
* Retrieve the name of a Unicode character.
* Depending on <code>nameChoice</code>, the character name written
* into the buffer is the "modern" name or the name that was defined
* in Unicode version 1.0.
* The name contains only "invariant" characters
* like A-Z, 0-9, space, and '-'.
*
* @param code The character (code point) for which to get the name.
* It must be <code>0&lt;=code&lt;0x10ffff</code>.
* @param nameChoice Selector for which name to get.
* @param buffer Destination address for copying the name.
* @param bufferLength <code>==sizeof(buffer)</code>
* @param pErrorCode Pointer to a UErrorCode variable;
* check for <code>U_SUCCESS()</code> after <code>u_charName()</code>
* returns.
*
* @see UCharNameChoice
*/
U_CAPI UTextOffset U_EXPORT2
u_charName(uint32_t code, UCharNameChoice nameChoice,
char *buffer, UTextOffset bufferLength,
UErrorCode *pErrorCode);
/**
* The following functions are java specific.
*/
/**
* A convenience method for determining if a Unicode character
* is allowed to start in a Unicode identifier.
* A character may start a Unicode identifier if and only if
* it is a letter.
*
* @param c the Unicode character.
* @return TRUE if the character may start a Unicode identifier;
* FALSE otherwise.
* @see u_isalpha
* @see u_isIDPart
*/
U_CAPI bool_t U_EXPORT2
u_isIDStart(UChar c);
/**
* A convenience method for determining if a Unicode character
* may be part of a Unicode identifier other than the starting
* character.
* <P>
* A character may be part of a Unicode identifier if and only if
* it is one of the following:
* <ul>
* <li> a letter
* <li> a connecting punctuation character (such as "_").
* <li> a digit
* <li> a numeric letter (such as a Roman numeral character)
* <li> a combining mark
* <li> a non-spacing mark
* <li> an ignorable control character
* </ul>
*
* @param c the Unicode character.
* @return TRUE if the character may be part of a Unicode identifier;
* FALSE otherwise.
* @see u_isIDIgnorable
* @see u_isIDStart
*/
U_CAPI bool_t U_EXPORT2
u_isIDPart(UChar c);
/**
* A convenience method for determining if a Unicode character
* should be regarded as an ignorable character
* in a Unicode identifier.
* <P>
* The following Unicode characters are ignorable in a
* Unicode identifier:
* <table>
* <tr><td>0x0000 through 0x0008,</td>
* <td>ISO control characters that</td></tr>
* <tr><td>0x000E through 0x001B,</td> <td>are not whitespace</td></tr>
* <tr><td>and 0x007F through 0x009F</td></tr>
* <tr><td>0x200C through 0x200F</td> <td>join controls</td></tr>
* <tr><td>0x200A through 0x200E</td> <td>bidirectional controls</td></tr>
* <tr><td>0x206A through 0x206F</td> <td>format controls</td></tr>
* <tr><td>0xFEFF</td> <td>zero-width no-break space</td></tr>
* </table>
*
* @param c the Unicode character.
* @return TRUE if the character may be part of a Unicode identifier;
* FALSE otherwise.
* @see u_isIDPart
*/
U_CAPI bool_t U_EXPORT2
u_isIDIgnorable(UChar c);
/**
* A convenience method for determining if a Unicode character
* is allowed as the first character in a Java identifier.
* <P>
* A character may start a Java identifier if and only if
* it is one of the following:
* <ul>
* <li> a letter
* <li> a currency symbol (such as "$")
* <li> a connecting punctuation symbol (such as "_").
* </ul>
*
* @param c the Unicode character.
* @return TRUE if the character may start a Java identifier;
* FALSE otherwise.
* @see u_isJavaIDPart
* @see u_isalpha
* @see u_isIDStart
*/
U_CAPI bool_t U_EXPORT2
u_isJavaIDStart(UChar c);
/**
* A convenience method for determining if a Unicode character
* may be part of a Java identifier other than the starting
* character.
* <P>
* A character may be part of a Java identifier if and only if
* it is one of the following:
* <ul>
* <li> a letter
* <li> a currency symbol (such as "$")
* <li> a connecting punctuation character (such as "_").
* <li> a digit
* <li> a numeric letter (such as a Roman numeral character)
* <li> a combining mark
* <li> a non-spacing mark
* <li> an ignorable control character
* </ul>
*
* @param c the Unicode character.
* @return TRUE if the character may be part of a Unicode identifier;
* FALSE otherwise.
* @see u_isIDIgnorable
* @see u_isJavaIDStart
* @see u_isalpha
* @see u_isdigit
* @see u_isIDPart
*/
U_CAPI bool_t U_EXPORT2
u_isJavaIDPart(UChar c);
/**
* Functions to change character case.
*/
/**
* The given character is mapped to its lowercase equivalent according to
* Unicode 2.1.2; if the character has no lowercase equivalent, the character
* itself is returned.
* <P>
* A character has a lowercase equivalent if and only if a lowercase mapping
* is specified for the character in the Unicode 2.1.2 attribute table.
* <P>
* u_tolower() only deals with the general letter case conversion.
* For language specific case conversion behavior, use ustrToUpper().
* For example, the case conversion for dot-less i and dotted I in Turkish,
* or for final sigma in Greek.
*
* @param ch the character to be converted
* @return the lowercase equivalent of the character, if any;
* otherwise the character itself.
*/
U_CAPI UChar U_EXPORT2
u_tolower(UChar c);
/**
* The given character is mapped to its uppercase equivalent according to Unicode
* 2.1.2; if the character has no uppercase equivalent, the character itself is
* returned.
* <P>
* u_toupper() only deals with the general letter case conversion.
* For language specific case conversion behavior, use ustrToUpper().
* For example, the case conversion for dot-less i and dotted I in Turkish,
* or ess-zed (i.e., "sharp S") in German.
*
* @param ch the character to be converted
* @return the uppercase equivalent of the character, if any;
* otherwise the character itself.
*/
U_CAPI UChar U_EXPORT2
u_toupper(UChar c);
/**
* The given character is mapped to its titlecase equivalent according to Unicode
* 2.1.2. There are only four Unicode characters that are truly titlecase forms
* that are distinct from uppercase forms. As a rule, if a character has no
* true titlecase equivalent, its uppercase equivalent is returned.
* <P>
* A character has a titlecase equivalent if and only if a titlecase mapping
* is specified for the character in the Unicode 2.1.2 data.
*
* @param ch the character to be converted
* @return the titlecase equivalent of the character, if any;
* otherwise the character itself.
*/
U_CAPI UChar U_EXPORT2
u_totitle(UChar c);
/**
* The function is used to get the Unicode standard Version that is used.
*
* @return the Unicode stabdard Version number
*/
U_CAPI const char* U_EXPORT2
u_getVersion(void);
#endif /*_UCHAR*/
/*eof*/

View file

@ -0,0 +1,142 @@
/*
**********************************************************************
* Copyright (C) 1998-1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
#ifndef UCHRITER_H
#define UCHRITER_H
#include "unicode/utypes.h"
#include "unicode/chariter.h"
/**
* A concrete subclass of CharacterIterator that iterates over the
* characters in a UnicodeString. It's possible not only to create an
* iterator that iterates over an entire UnicodeString, but also to
* create only that iterates over only a subrange of a UnicodeString
* (iterators over different subranges of the same UnicodeString don't
* compare equal). */
class U_COMMON_API UCharCharacterIterator : public CharacterIterator {
public:
/**
* Create an iterator over the UnicodeString referred to by "text".
* The iteration range is the whole string, and the starting
* position is 0. */
UCharCharacterIterator(const UChar* text, int32_t len);
/**
* Copy constructor. The new iterator iterates over the same range
* of the same string as "that", and its initial position is the
* same as "that"'s current position. */
UCharCharacterIterator(const UCharCharacterIterator& that);
/**
* Destructor. */
~UCharCharacterIterator();
/**
* Assignment operator. *this is altered to iterate over the sane
* range of the same string as "that", and refers to the same
* character within that string as "that" does. */
UCharCharacterIterator&
operator=(const UCharCharacterIterator& that);
/**
* Returns true if the iterators iterate over the same range of the
* same string and are pointing at the same character. */
virtual bool_t operator==(const CharacterIterator& that) const;
/**
* Generates a hash code for this iterator. */
virtual int32_t hashCode(void) const;
/**
* Returns a new StringCharacterIterator referring to the same
* character in the same range of the same string as this one. The
* caller must delete the new iterator. */
virtual CharacterIterator* clone(void) const;
/**
* Sets the iterator to refer to the first character in its
* iteration range, and returns that character, */
virtual UChar first(void);
/**
* Sets the iterator to refer to the last character in its iteration
* range, and returns that character. */
virtual UChar last(void);
/**
* Sets the iterator to refer to the "position"-th character in the
* UnicodeString the iterator refers to, and returns that character.
* If the index is outside the iterator's iteration range, the
* behavior of the iterator is undefined. */
virtual UChar setIndex(UTextOffset pos);
/**
* Returns the character the iterator currently refers to. */
virtual UChar current(void) const;
/**
* Advances to the next character in the iteration range (toward
* last()), and returns that character. If there are no more
* characters to return, returns DONE. */
virtual UChar next(void);
/**
* Advances to the previous character in the iteration rance (toward
* first()), and returns that character. If there are no more
* characters to return, returns DONE. */
virtual UChar previous(void);
/**
* Returns the numeric index of the first character in this
* iterator's iteration range. */
virtual UTextOffset startIndex(void) const;
/**
* Returns the numeric index of the character immediately BEYOND the
* last character in this iterator's iteration range. */
virtual UTextOffset endIndex(void) const;
/**
* Returns the numeric index in the underlying UnicodeString of the
* character the iterator currently refers to (i.e., the character
* returned by current()). */
virtual UTextOffset getIndex(void) const;
/**
* Copies the UnicodeString under iteration into the UnicodeString
* referred to by "result". Even if this iterator iterates across
* only a part of this string, the whole string is copied. @param
* result Receives a copy of the text under iteration. */
virtual void getText(UnicodeString& result);
/**
* Return a class ID for this object (not really public) */
virtual UClassID getDynamicClassID(void) const
{ return getStaticClassID(); }
/**
* Return a class ID for this class (not really public) */
static UClassID getStaticClassID(void)
{ return (UClassID)(&fgClassID); }
private:
UCharCharacterIterator();
const UChar* text;
UTextOffset pos;
UTextOffset begin;
UTextOffset end;
static char fgClassID;
};
#endif

View file

@ -0,0 +1,605 @@
/*
**********************************************************************
* Copyright (C) 1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* ucnv.h:
* External APIs for the ICU's codeset conversion library
* Bertrand A. Damiba
*
* Modification History:
*
* Date Name Description
* 04/04/99 helena Fixed internal header inclusion.
*/
/**
* @name Character Conversion C API
*
* Character Conversion C API documentation is still under construction.
* Please check for updates soon.
*/
#ifndef UCNV_H
#define UCNV_H
#include "unicode/utypes.h"
#include "unicode/ucnv_bld.h"
#include "unicode/ucnv_err.h"
U_CDECL_BEGIN
typedef void (*UConverterToUCallback) (UConverter *,
UChar **,
const UChar *,
const char **,
const char *,
int32_t* offsets,
bool_t,
UErrorCode *);
typedef void (*UConverterFromUCallback) (UConverter *,
char **,
const char *,
const UChar **,
const UChar *,
int32_t* offsets,
bool_t,
UErrorCode *);
U_CDECL_END
/**
* Creates a UConverter object with the names specified as a C string.
* The actual name will be resolved with the alias file.
* if <TT>NULL</TT> is passed for the converter name, it will create one with the
* getDefaultName return value.
* @param converterName : name of the uconv table
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND</TT>
* @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
* @see ucnv_openU
* @see ucnv_openCCSID
* @see ucnv_close
*/
U_CAPI
UConverter* U_EXPORT2 ucnv_open (const char *converterName,
UErrorCode * err);
/**
* Creates a Unicode converter with the names specified as unicode string. The name should be limited to
* the ASCII-7 alphanumerics range. The actual name will be resolved with the alias file.
* if <TT>NULL</TT> is passed for the converter name, it will create one with the
* getDefaultName return value.
* @param converterName : name of the uconv table in a zero terminated Unicode string
* @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND</TT>
* @return the created Unicode converter object, or <TT>NULL</TT> if an error occured
* @see ucnv_open
* @see ucnv_openCCSID
* @see ucnv_close
*/
U_CAPI UConverter* U_EXPORT2 ucnv_openU (const UChar * name,
UErrorCode * err);
/**
* Creates a UConverter object using a CCSID number.
*
* @param codepage : codepage # of the uconv table
* @param platform : codepage's platform (now only <TT>IBM</TT> supported)
* @param err error status <TT>U_MEMORY_ALLOCATION_ERROR, TABLE_NOT_FOUND</TT>
* @return the created Unicode converter object, or <TT>NULL</TT> if and error occured
* @see ucnv_open
* @see ucnv_openU
* @see ucnv_close
*/
U_CAPI UConverter* U_EXPORT2 ucnv_openCCSID (int32_t codepage,
UConverterPlatform platform,
UErrorCode * err);
/**
* Deletes the unicode converter.
*
* @param converter the converter object to be deleted
* @see ucnv_open
* @see ucnv_openU
* @see ucnv_openCCSID
*/
U_CAPI void U_EXPORT2 ucnv_close (UConverter * converter);
/**
* Fills in the output parameter, subChars, with the substitution characters
* as multiple bytes.
*
* @param converter: the Unicode converter
* @param subChars: the subsitution characters
* @param len: on input the capacity of subChars, on output the number of bytes copied to it
* @param err: the outgoing error status code.
* If the substitution character array is too small, an
* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
* @see ucnv_setSubstChars
*/
U_CAPI void U_EXPORT2
ucnv_getSubstChars (const UConverter * converter,
char *subChars,
int8_t * len,
UErrorCode * err);
/**
* Sets the substitution chars when converting from unicode to a codepage. The
* substitution is specified as a string of 1-4 bytes, and may contain <TT>NULL</TT> byte.
* The fill-in parameter err will get the error status on return.
* @param converter the Unicode converter
* @param subChars the substitution character byte sequence we want set
* @param len the number of bytes in subChars
* @param err the error status code. <TT>U_INDEX_OUTOFBOUNDS_ERROR </TT> if
* len is bigger than the maximum number of bytes allowed in subchars
* @see ucnv_getSubstChars
*/
U_CAPI void U_EXPORT2
ucnv_setSubstChars (UConverter * converter,
const char *subChars,
int8_t len,
UErrorCode * err);
/**
* Fills in the output parameter, errBytes, with the error characters from the
* last failing conversion.
*
* @param converter: the Unicode converter
* @param errBytes: the bytes in error
* @param len: on input the capacity of errBytes, on output the number of bytes copied to it
* @param err: the outgoing error status code.
* If the substitution character array is too small, an
* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
*/
U_CAPI void U_EXPORT2
ucnv_getInvalidChars (const UConverter * converter,
char *errBytes,
int8_t * len,
UErrorCode * err);
/**
* Fills in the output parameter, errChars, with the error characters from the
* last failing conversion.
*
* @param converter: the Unicode converter
* @param errUChars: the bytes in error
* @param len: on input the capacity of errUChars, on output the number of UChars copied to it
* @param err: the outgoing error status code.
* If the substitution character array is too small, an
* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
*/
U_CAPI void U_EXPORT2
ucnv_getInvalidUChars (const UConverter * converter,
char *errUChars,
int8_t * len,
UErrorCode * err);
/**
* Resets the state of stateful conversion to the default state. This is used
* in the case of error to restart a conversion from a known default state.
* it will also empty the internal output buffers.
* @param converter the Unicode converter
*/
U_CAPI void U_EXPORT2
ucnv_reset (UConverter * converter);
/**
* Returns the maximum length of bytes used by a character. This varies between 1 and 4
* @param converter the Unicode converter
* @return the maximum number of bytes allowed by this particular converter
* @see ucnv_getMinCharSize
*/
U_CAPI int8_t U_EXPORT2
ucnv_getMaxCharSize (const UConverter * converter);
/**
* Returns the minimum byte length for characters in this codepage. This is either
* 1 or 2 for all supported codepages.
* @param converter the Unicode converter
* @return the minimum number of bytes allowed by this particular converter
* @see ucnv_getMaxCharSize
*/
U_CAPI int8_t U_EXPORT2
ucnv_getMinCharSize (const UConverter * converter);
/**
* Returns the display name of the converter passed in based on the Locale passed in,
* in the case the locale contains no display name, the internal ASCII name will be
* filled in.
*
* @param converter the Unicode converter.
* @param displayLocale is the specific Locale we want to localised for
* @param displayName user provided buffer to be filled in
* @param displayNameCapacty size of displayName Buffer
* @param err: outgoing error code.
* @return displayNameLength number of UChar needed in displayName
* @see ucnv_getName
*/
U_CAPI
int32_t U_EXPORT2 ucnv_getDisplayName (const UConverter * converter,
const char *displayLocale,
UChar * displayName,
int32_t displayNameCapacity,
UErrorCode * err);
/**
* Gets the name of the converter (zero-terminated).
* the name will be the internal name of the converter, the lifetime of the returned
* string will be that of the converter passed to this function.
* @param converter the Unicode converter
* @param err UErrorCode status
* @return the internal name of the converter
* @see ucnv_getDisplayName
*/
U_CAPI
const char * U_EXPORT2 ucnv_getName (const UConverter * converter, UErrorCode * err);
/**
* Gets a codepage number associated with the converter. This is not guaranteed
* to be the one used to create the converter. Some converters do not represent
* IBM registered codepages and return zero for the codepage number.
* The error code fill-in parameter indicates if the codepage number is available.
* @param converter the Unicode converter
* @param err the error status code.
* the converter is <TT>NULL</TT> or if converter's data table is <TT>NULL</TT>.
* @return If any error occurrs, -1 will be returned otherwise, the codepage number
* will be returned
*/
U_CAPI int32_t U_EXPORT2
ucnv_getCCSID (const UConverter * converter,
UErrorCode * err);
/**
* Gets a codepage platform associated with the converter. Currently, only <TT>IBM</TT> is supported
* The error code fill-in parameter indicates if the codepage number is available.
* @param converter the Unicode converter
* @param err the error status code.
* the converter is <TT>NULL</TT> or if converter's data table is <TT>NULL</TT>.
* @return The codepage platform
*/
U_CAPI UConverterPlatform U_EXPORT2
ucnv_getPlatform (const UConverter * converter,
UErrorCode * err);
/**
*Gets the type of conversion associated with the converter
* e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022, EBCDIC_STATEFUL, LATIN_1
* @param converter: a valid, opened converter
* @return the type of the converter
*/
U_CAPI UConverterType U_EXPORT2
ucnv_getType (const UConverter * converter);
/**
*Gets the "starter" bytes for the converters of type MBCS
*will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in
*is not MBCS.
*fills in an array of boolean, with the value of the byte as offset to the array.
*At return, if TRUE is found in at offset 0x20, it means that the byte 0x20 is a starter byte
*in this converter.
* @param converter: a valid, opened converter of type MBCS
* @param starters: an array of size 256 to be filled in
* @param err: an array of size 256 to be filled in
* @see ucnv_getType
*/
U_CAPI void U_EXPORT2 ucnv_getStarters(const UConverter* converter,
bool_t starters[256],
UErrorCode* err);
/**
* Gets the current calback function used by the converter when illegal or invalid sequence found.
*
* @param converter the unicode converter
* @return a pointer to the callback function
* @see ucnv_setToUCallBack
*/
U_CAPI UConverterToUCallback U_EXPORT2
ucnv_getToUCallBack (const UConverter * converter);
/**
* Gets the current callback function used by the converter when illegal or invalid sequence found.
*
* @param converter the unicode converter
* @return a pointer to the callback function
* @see ucnv_setFromUCallBack
*/
U_CAPI UConverterFromUCallback U_EXPORT2
ucnv_getFromUCallBack (const UConverter * converter);
/**
* Gets the current callback function used by the converter when illegal or invalid sequence found.
*
* @param converter the unicode converter
* @param action the callback function we want to set.
* @param err The error code status
* @return the previously assigned callback function pointer
* @see ucnv_getToUCallBack
*/
U_CAPI UConverterToUCallback U_EXPORT2
ucnv_setToUCallBack (UConverter * converter,
UConverterToUCallback action,
UErrorCode * err);
/**
* Gets the current callback function used by the converter when illegal or invalid sequence found.
*
* @param converter the unicode converter
* @param action the callback function we want to set.
* @param err The error code status
* @return the previously assigned callback function pointer
* @see ucnv_getFromUCallBack
*/
U_CAPI UConverterFromUCallback U_EXPORT2
ucnv_setFromUCallBack (UConverter * converter,
UConverterFromUCallback action,
UErrorCode * err);
/**
* Transcodes an array of unicode characters to an array of codepage characters.
* The source pointer is an I/O parameter, it starts out pointing where the function is
* to begin transcoding, and ends up pointing after the first sequence of the bytes
* that it encounters that are semantically invalid.
* if ucnv_setToUCallBack is called with an action other than <TT>STOP</TT>
* before a call is made to this API, <TT>consumed</TT> and <TT>source</TT> should point to the same place
* (unless <TT>target</TT> ends with an imcomplete sequence of bytes and <TT>flush</TT> is <TT>FALSE</TT>).
* the <TT>target</TT> buffer buffer needs to be a least the size of the maximum # of bytes per characters
* allowed by the target codepage.
* @param converter the Unicode converter
* @param converter the Unicode converter
* @param target : I/O parameter. Input : Points to the beginning of the buffer to copy
* codepage characters to. Output : points to after the last codepage character copied
* to <TT>target</TT>.
* @param targetLimit the pointer to the end of the <TT>target</TT> array
* @param source the source Unicode character array
* @param sourceLimit the pointer to the end of the source array
* @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
* of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
* e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
* For output data carried across calls -1 will be placed for offsets.
* @param flush <TT>TRUE</TT> if the buffer is the last buffer of the conversion interation
* and the conversion will finish with this call, FALSE otherwise.
* @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be returned if the
* converter is <TT>NULL</TT>.
* @see ucnv_fromUChars
* @see ucnv_convert
* @see ucnv_getMinCharSize
* @see ucnv_setToUCallBack
*/
U_CAPI
void U_EXPORT2 ucnv_fromUnicode (UConverter * converter,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
/**
* Converts an array of codepage characters into an array of unicode characters.
* The source pointer is an I/O parameter, it starts out pointing at the place
* to begin translating, and ends up pointing after the first sequence of the bytes
* that it encounters that are semantically invalid.
* if ucnv_setFromUCallBack is called with an action other than STOP
* before a call is made to this API, consumed and source should point to the same place
* (unless target ends with an imcomplete sequence of bytes and flush is FALSE).
* @param converter the Unicode converter
* @param target : I/O parameter. Input : Points to the beginning of the buffer to copy
* Unicode characters to. Output : points to after the last UChar copied to target.
* @param targetLimit the pointer to the end of the target array
* @param source the source codepage character array
* @param sourceLimit the pointer to the end of the source array
* @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
* of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
* e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
* For output data carried across calls -1 will be placed for offsets.
* @param flush TRUE if the buffer is the last buffer and the conversion will finish
* in this call, FALSE otherwise.
* @param err the error code status <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be returned if the
* converter is <TT>NULL</TT>, or if <TT>targetLimit</TT> and <TT>sourceLimit</TT> are misaligned.
* @see ucnv_toUChars
* @see ucnv_getNextUChar
* @see ucnv_convert
* @see ucnv_setFromUCallBack
*/
U_CAPI
void U_EXPORT2 ucnv_toUnicode (UConverter * converter,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
/**
* Transcodes the source Unicode string to the target string in a codepage encoding
* with the specified Unicode converter. For example, if a Unicode to/from JIS
* converter is specified, the source string in Unicode will be transcoded to JIS
* encoding. The result will be stored in JIS encoding.
* if any problems during conversion are encountered it will SUBSTITUTE with the default (initial)
* substitute characters.
* This function is a more convenient but less efficient version of \Ref{ucnv_fromUnicode}.
* @param converter the Unicode converter
* @param source the <TT>source</TT> Unicode string (zero Terminated)
* @param target the <TT>target</TT> string in codepage encoding (<STRONG>not zero-terminated</STRONG> because some
* codepage do not use '\0' as a string terminator
* @param targetCapacity Input the number of bytes available in the <TT>target</TT> buffer
* @param err the error status code.
* <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned if the
* the # of bytes provided are not enough for transcoding.
* <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> is returned if the converter is <TT>NULL</TT> or the source or target string is empty.
* <TT>U_BUFFER_OVERFLOW_ERROR</TT> when <TT>targetSize</TT> turns out to be bigger than <TT>targetCapacity</TT>
* @return number of bytes needed in target, regardless of <TT>targetCapacity</TT>
* @see ucnv_fromUnicode
* @see ucnv_convert
*/
U_CAPI
int32_t U_EXPORT2 ucnv_fromUChars (const UConverter * converter,
char *target,
int32_t targetCapacity,
const UChar * source,
UErrorCode * err);
/**
* Transcode the source string in codepage encoding to the target string in
* Unicode encoding. For example, if a Unicode to/from JIS
* converter is specified, the source string in JIS encoding will be transcoded
* to Unicode and placed into a provided target buffer.
* if any problems during conversion are encountered it will SUBSTITUTE with the Unicode REPLACEMENT char
* We recomment, the size of the target buffer needs to be at least as long as the maximum # of bytes per char
* in this character set.
* A zero-terminator will be placed at the end of the target buffer
* This function is a more convenient but less efficient version of \Ref{ucnv_toUnicode}.
* @param converter the Unicode converter
* @param source the source string in codepage encoding
* @param target the target string in Unicode encoding
* @param targetCapacity capacity of the target buffer
* @param sourceSize : Number of bytes in <TT>source</TT> to be transcoded
* @param err the error status code
* <TT>U_MEMORY_ALLOCATION_ERROR</TT> will be returned if the
* the internal process buffer cannot be allocated for transcoding.
* <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> is returned if the converter is <TT>NULL</TT> or
* if the source or target string is empty.
* <TT>U_BUFFER_OVERFLOW_ERROR</TT> when the input buffer is prematurely exhausted and targetSize non-<TT>NULL</TT>.
* @return the number of UChar needed in target (including the zero terminator)
* @see ucnv_getNextUChar
* @see ucnv_toUnicode
* @see ucnv_convert
*/
U_CAPI
int32_t U_EXPORT2 ucnv_toUChars (const UConverter * converter,
UChar * target,
int32_t targetCapacity,
const char *source,
int32_t sourceSize,
UErrorCode * err);
/********************************
* Will convert a codepage buffer one character at a time.
* This function was written to be efficient when transcoding small amounts of data at a time.
* In that case it will be more efficient than \Ref{ucnv_toUnicode}.
* When converting large buffers use \Ref{ucnv_toUnicode}.
*@param converter an open UConverter
*@param source the address of a pointer to the codepage buffer, will be updated to point after
*the bytes consumed in the conversion call.
*@param points to the end of the input buffer
*@param err fills in error status (see ucnv_toUnicode)
*@return a UChar resulting from the partial conversion of source
*@see ucnv_toUnicode
*@see ucnv_toUChars
*@see ucnv_convert
*/
U_CAPI
UChar U_EXPORT2 ucnv_getNextUChar (UConverter * converter,
const char **source,
const char *sourceLimit,
UErrorCode * err);
/**************************
* Will convert a sequence of bytes from one codepage to another.
* This is <STRONG>NOT AN EFFICIENT</STRONG> way to transcode.
* use \Ref{ucnv_toUnicode} and \Ref{ucnv_fromUnicode} for efficiency
* @param toConverterName: The name of the converter that will be used to encode the output buffer
* @param fromConverterName: The name of the converter that will be used to decode the input buffer
* @param target: Pointer to the output buffer to write to
* @param targetCapacity: on input contains the capacity of target
* @param source: Pointer to the input buffer
* @param sourceLength: on input contains the capacity of source
* @param err: fills in an error status
* @return will be filled in with the number of bytes needed in target
* @see ucnv_fromUnicode
* @see ucnv_toUnicode
* @see ucnv_fromUChars
* @see ucnv_toUChars
* @see ucnv_getNextUChar
*/
U_CAPI
int32_t U_EXPORT2 ucnv_convert (const char *toConverterName,
const char *fromConverterName,
char *target,
int32_t targetCapacity,
const char *source,
int32_t sourceLength,
UErrorCode * err);
/**
* SYSTEM API
* Iterates through every cached converter and frees all the unused ones.
*
* @return the number of cached converters successfully deleted
*/
U_CAPI int32_t U_EXPORT2 ucnv_flushCache (void);
/**
* provides a string containing the internal name (based on the alias file) of the converter.
* given an index.
* @param index the number of converters available on the system (<TT>[0..ucnv_countAvaiable()]</TT>)
* @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds.
* @see ucnv_countAvailable
*/
U_CAPI
const char * U_EXPORT2 ucnv_getAvailableName (int32_t index);
/**
* returns the number of available converters.
*
* @return the number of available converters
* @see ucnv_getAvailableName
*/
U_CAPI int32_t U_EXPORT2 ucnv_countAvailable (void);
/**
* returns the current default converter name.
*
* @return returns the current default converter name;
* if a default converter name cannot be determined,
* then <code>NULL</code> is returned
* @see ucnv_setDefaultName
*/
U_CAPI const char * U_EXPORT2 ucnv_getDefaultName (void);
/**
* sets the current default converter name.
* The lifetime of the return ptr is that of the library
* @param name: the converter name you want as default (has to appear in alias file)
* @see ucnv_getDefaultName
*
*/
U_CAPI void U_EXPORT2 ucnv_setDefaultName (const char *name);
#endif
/*_UCNV*/

View file

@ -0,0 +1,215 @@
/*
**********************************************************************
* Copyright (C) 1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
*
* ucnv_bld.h:
* Contains all internal and external data structure definitions
* Created & Maitained by Bertrand A. Damiba
*
*
*
* ATTENTION:
* ---------
* Although the data structures in this file are open and stack allocatable
* we reserve the right to hide them in further releases.
*/
#ifndef UCNV_BLD_H
#define UCNV_BLD_H
#include "unicode/utypes.h"
#define UCNV_MAX_SUBCHAR_LEN 4
#define UCNV_ERROR_BUFFER_LENGTH 20
#ifndef UCMP16_H
typedef struct _CompactShortArray CompactShortArray;
#endif
#ifndef UCMP8_H
typedef struct _CompactByteArray CompactByteArray;
#endif
#define UCNV_IMPLEMENTED_CONVERSION_TYPES 9
/*Sentinel Value used to check the integrity of the binary data files */
#define UCNV_FILE_CHECK_MARKER 0xBEDA
/*maximum length of the converter names */
#define UCNV_MAX_CONVERTER_NAME_LENGTH 60
#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
/*Pointer to the aforementioned file */
#define UCNV_MAX_LINE_TEXT (UCNV_MAX_CONVERTER_NAME_LENGTH*400)
#define UCNV_SI 0x0F /*Shift in for EBDCDIC_STATEFUL and iso2022 states */
#define UCNV_SO 0x0E /*Shift out for EBDCDIC_STATEFUL and iso2022 states */
typedef enum {
UCNV_UNSUPPORTED_CONVERTER = -1,
UCNV_SBCS = 0,
UCNV_DBCS = 1,
UCNV_MBCS = 2,
UCNV_LATIN_1 = 3,
UCNV_UTF8 = 4,
UCNV_UTF16_BigEndian = 5,
UCNV_UTF16_LittleEndian = 6,
UCNV_EBCDIC_STATEFUL = 7,
UCNV_ISO_2022 = 8,
/* Number of converter types for which we have conversion routines. */
UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES = 9,
UCNV_JIS = 9,
UCNV_EUC = 10,
UCNV_GB = 11
} UConverterType;
typedef enum {
UCNV_UNKNOWN = -1,
UCNV_IBM = 0
} UConverterPlatform;
/*Table Node Definitions */
typedef struct
{
UChar *toUnicode; /* [256]; */
CompactByteArray *fromUnicode;
}
UConverterSBCSTable;
typedef struct
{
CompactShortArray *toUnicode;
CompactShortArray *fromUnicode;
}
UConverterDBCSTable;
typedef struct
{
bool_t *starters; /* [256]; */
CompactShortArray *toUnicode;
CompactShortArray *fromUnicode;
}
UConverterMBCSTable;
typedef union
{
UConverterSBCSTable sbcs;
UConverterDBCSTable dbcs;
UConverterMBCSTable mbcs;
}
UConverterTable;
/*Defines the struct of a UConverterSharedData the immutable, shared part of
*UConverter
*/
typedef struct
{
uint32_t structSize; /* Size of this structure */
void *dataMemory;
uint32_t referenceCounter; /*used to count number of clients */
char name[UCNV_MAX_CONVERTER_NAME_LENGTH]; /*internal name of the converter */
UConverterPlatform platform; /*platform of the converter (only IBM now) */
int32_t codepage; /*codepage # (now IBM-$codepage) */
UConverterType conversionType; /*conversion type */
int8_t minBytesPerChar; /*Minimum # bytes per char in this codepage */
int8_t maxBytesPerChar; /*Maximum # bytes per char in this codepage */
struct
{ /*initial values of some members of the mutable part of object */
uint32_t toUnicodeStatus;
int8_t subCharLen;
unsigned char subChar[UCNV_MAX_SUBCHAR_LEN];
}
defaultConverterValues;
UConverterTable *table; /*Pointer to conversion data */
}
UConverterSharedData;
/*Defines a UConverter, the lightweight mutable part the user sees */
U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv
itself is compiled under C++, the linkage of the funcptrs will
work.
*/
struct UConverter
{
int32_t toUnicodeStatus; /*Used to internalize stream status information */
int32_t fromUnicodeStatus;
int8_t invalidCharLength;
int8_t invalidUCharLength;
int8_t pad;
int32_t mode;
int8_t subCharLen; /*length of the codepage specific character sequence */
unsigned char subChar[UCNV_MAX_SUBCHAR_LEN]; /*codepage specific character sequence */
UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store unicode data meant for
*output stream by the Error function pointers
*/
unsigned char charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /*used to store codepage data meant for
* output stream by the Error function pointers
*/
int8_t UCharErrorBufferLength; /*used to indicate the number of valid UChars
*in charErrorBuffer
*/
int8_t charErrorBufferLength; /*used to indicate the number of valid bytes
*in charErrorBuffer
*/
UChar invalidUCharBuffer[3];
char invalidCharBuffer[UCNV_MAX_SUBCHAR_LEN];
/*Error function pointer called when conversion issues
*occur during a T_UConverter_fromUnicode call
*/
void (*fromUCharErrorBehaviour) (struct UConverter *,
char **,
const char *,
const UChar **,
const UChar *,
int32_t* offsets,
bool_t,
UErrorCode *);
/*Error function pointer called when conversion issues
*occur during a T_UConverter_toUnicode call
*/
void (*fromCharErrorBehaviour) (struct UConverter *,
UChar **,
const UChar *,
const char **,
const char *,
int32_t* offsets,
bool_t,
UErrorCode *);
UConverterSharedData *sharedData; /*Pointer to the shared immutable part of the
*converter object
*/
void *extraInfo; /*currently only used to point to a struct containing UConverter used by iso 2022
Could be used by clients writing their own call back function to
pass context to them
*/
};
U_CDECL_END /* end of UConverter */
typedef struct UConverter UConverter;
typedef struct
{
UConverter *currentConverter;
unsigned char escSeq2022[10];
int8_t escSeq2022Length;
}
UConverterDataISO2022;
#define CONVERTER_FILE_EXTENSION ".cnv"
/*case insensitive hash key*/
U_CAPI int32_t U_EXPORT2 uhash_hashIString(const void* name);
#endif /* _UCNV_BLD */

View file

@ -0,0 +1,151 @@
/*
**********************************************************************
* Copyright (C) 1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
*
* ucnv_err.h:
* defines error behaviour functions called by T_UConverter_{from,to}Unicode
*
* These Functions, although public, should NEVER be called directly, they should be used as parameters to
* the T_UConverter_setMissing{Char,Unicode}Action API, to set the behaviour of a converter
* when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
*
* usage example:
*
* ...
* UErrorCode err = U_ZERO_ERROR;
* UConverter* myConverter = T_UConverter_create("ibm-949", &err);
*
* if (U_SUCCESS(err))
* {
* T_UConverter_setMissingUnicodeAction(myConverter, (MissingUnicodeAction)UCNV_FROM_U_CALLBACK_STOP, &err);
* T_UConverter_setMissingCharAction(myConverter, (MissingCharAction)UCNV_TO_U_CALLBACK_SUBSTITUTE, &err);
* }
* ...
*
* The code above tells "myConverter" to stop when it encounters a ILLEGAL/TRUNCATED/INVALID sequences when it is used to
* convert from Unicode -> Codepage.
* and to substitute with a codepage specific substitutions sequence when converting from Codepage -> Unicode
*/
#ifndef UCNV_ERR_H
#define UCNV_ERR_H
#include "unicode/ucnv.h"
#include "unicode/utypes.h"
/*Functor STOPS at the ILLEGAL_SEQUENCE */
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
/*Functor STOPS at the ILLEGAL_SEQUENCE */
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
/*Functor SKIPs the ILLEGAL_SEQUENCE */
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
/* Functor Substitute the ILLEGAL SEQUENCE with the current substitution string assiciated with _this,
* in the event target buffer is too small, it will store the extra info in the UConverter, and err
* will be set to U_INDEX_OUTOFBOUNDS_ERROR. The next time T_UConverter_fromUnicode is called, it will
* store the left over data in target, before transcoding the "source Stream"
*/
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
/* Functor Substitute the ILLEGAL SEQUENCE with a sequence escaped codepoints corresponding to the ILLEGAL
* SEQUENCE (format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). In the Event the Converter doesn't support the
* characters {u,%}[A-F][0-9], it will substitute the illegal sequence with the substitution characters
* (it will behave like the above functor).
* in the event target buffer is too small, it will store the extra info in the UConverter, and err
* will be set to U_INDEX_OUTOFBOUNDS_ERROR. The next time T_UConverter_fromUnicode is called, it will
* store the left over data in target, before transcoding the "source Stream"
*/
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
/*Functor SKIPs the ILLEGAL_SEQUENCE */
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
/* Functor Substitute the ILLEGAL SEQUENCE with the current substitution string assiciated with _this,
* in the event target buffer is too small, it will store the extra info in the UConverter, and err
* will be set to U_INDEX_OUTOFBOUNDS_ERROR. The next time T_UConverter_fromUnicode is called, it will
* store the left over data in target, before transcoding the "source Stream"
*/
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
/* Functor Substitute the ILLEGAL SEQUENCE with a sequence escaped codepoints corresponding to the
* ILLEGAL SEQUENCE (format %XNN, e.g. "%XFF%X0A%XC8%X03").
* in the event target buffer is too small, it will store the extra info in the UConverter, and err
* will be set to U_INDEX_OUTOFBOUNDS_ERROR. The next time T_UConverter_fromUnicode is called, it will
* store the left over data in target, before transcoding the "source Stream"
*/
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
bool_t flush,
UErrorCode * err);
#endif/*UCNV_ERR_H*/

View file

@ -0,0 +1,215 @@
/*
*******************************************************************************
* *
* COPYRIGHT: *
* (C) Copyright International Business Machines Corporation, 1999 *
* Licensed Material - Program-Property of IBM - All Rights Reserved. *
* US Government Users Restricted Rights - Use, duplication, or disclosure *
* restricted by GSA ADP Schedule Contract with IBM Corp. *
* *
*******************************************************************************
* file name: udata.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999oct25
* created by: Markus W. Scherer
*/
#ifndef __UDATA_H__
#define __UDATA_H__
#include "unicode/utypes.h"
/**
* Information about data memory.
* This structure may grow in the future, indicated by the
* <code>size</code> field.
*
* <p>The platform data property fields help determine if a data
* file can be efficiently used on a given machine.
* The particular fields are of importance only if the data
* is affected by the properties - if there is integer data
* with word sizes > 1 byte, char* text, or UChar* text.</p>
*
* <p>The implementation for the <code>udata_open[Choice]()</code>
* functions may reject data based on the value in <code>isBigEndian</code>.
* No other field is used by the <code>udata</code> API implementation.</p>
*
* <p>The <code>dataFormat</code> may be used to identify
* the kind of data, e.g. a converter table.</p>
*
* <p>The <code>formatVersion</code> field should be used to
* make sure that the format can be interpreted.
* I may be a good idea to check only for the one or two highest
* of the version elements to allow the data memory to
* get more or somewhat rearranged contents, for as long
* as the using code can still interpret the older contents.</p>
*
* <p>The <code>dataVersion</code> field is intended to be a
* common place to store the source version of the data;
* for data from the Unicode character database, this could
* reflect the Unicode version.</p>
*/
typedef struct {
/** @memo sizeof(UDataInfo) */
uint16_t size;
/** @memo unused, set to 0 */
uint16_t reservedWord;
/* platform data properties */
/** @memo 0 for little-endian machine, 1 for big-endian */
uint8_t isBigEndian;
/** @memo see U_CHARSET_FAMILY values in utypes.h */
uint8_t charsetFamily;
/** @memo sizeof(UChar), one of { 1, 2, 4 } */
uint8_t sizeofUChar;
/** @memo unused, set to 0 */
uint8_t reservedByte;
/** @memo data format identifier */
uint8_t dataFormat[4];
/** @memo versions: [0] major [1] minor [2] milli [3] micro */
uint8_t formatVersion[4];
uint8_t dataVersion[4];
} UDataInfo;
/* API for reading data -----------------------------------------------------*/
/**
* Forward declaration of the data memory type.
*/
typedef struct UDataMemory UDataMemory;
/**
* Callback function for udata_openChoice().
* @param context parameter passed into <code>udata_openChoice()</code>.
* @param type The type of the data as passed into <code>udata_openChoice()</code>.
* It may be <code>NULL</code>.
* @param name The name of the data as passed into <code>udata_openChoice()</code>.
* @param pInfo A pointer to the <code>UDataInfo</code> structure
* of data that has been loaded and will be returned
* by <code>udata_openChoice()</code> if this function
* returns <code>TRUE</code>.
* @return TRUE if the current data memory is acceptable
*/
typedef bool_t
UDataMemoryIsAcceptable(void *context,
const char *type, const char *name,
UDataInfo *pInfo);
/**
* Convenience function.
* This function works the same as <code>udata_openChoice</code>
* except that any data that matches the type and name
* is assumed to be acceptable.
*/
U_CAPI UDataMemory * U_EXPORT2
udata_open(const char *path, const char *type, const char *name,
UErrorCode *pErrorCode);
/**
* Data loading function.
* This function is used to find and load efficiently data for
* ICU and applications using ICU.
* It provides an abstract interface that allows to specify a data
* type and name to find and load the data.
*
* <p>The implementation depends on platform properties and user preferences
* and may involve loading shared libraries (DLLs), mapping
* files into memory, or fopen()/fread() files.
* It may also involve using static memory or database queries etc.
* Several or all data items may be combined into one entity
* (DLL, memory-mappable file).</p>
*
* <p>The data is always preceded by a header that includes
* a <code>UDataInfo</code> structure.
* The caller's <code>isAcceptable()</code> function is called to make
* sure that the data is useful. It may be called several times if it
* rejects the data and there is more than one location with data
* matching the type and name.</p>
*
* <p>If <code>path==NULL</code>, then ICU data is loaded.
* Otherwise, it is separated into a basename and a basename-less path string.
* If the path string is empty, then <code>u_getDataDirectory()</code>
* is set in its place.
* When data is loaded from files or DLLs (shared libraries) and
* may be stored in common files, then the data finding is roughly as follows:
* <ul>
* <li>common file at path/basename has entry name_type?</li>
* <li>common file at basename has entry name_type?</li>
* <li>separate file at path/basename_name_type?</li>
* <li>separate file at basename_name_type?</li>
* <li>separate file at path/name_type?</li>
* <li>separate file at name_type?</li>
* </ul>
* If the basename is empty, then only the last two options are attempted.
* Otherwise, it serves as a name for a common data file or as a basename
* (collection name) prefix for individual files.</p>
*
* @param path Specifies an absolute path and/or a basename for the
* finding of the data in the file system.
* <code>NULL</code> for ICU data.
* @param type A string that specifies the type of data to be loaded.
* For example, resource bundles are loaded with type "res",
* conversion tables with type "cnv".
* This may be <code>NULL</code> or empty.
* @param name A string that specifies the name of the data.
* @param isAcceptable This function is called to verify that loaded data
* is useful for the client code. If it returns FALSE
* for all data items, then <code>udata_openChoice()</code>
* will return with an error.
* @param context Arbitrary parameter to be passed into isAcceptable.
* @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
* @return A pointer (handle) to a data memory object, or <code>NULL</code>
* if an error occurs. Call <code>udata_getMemory()</code>
* to get a pointer to the actual data.
*/
U_CAPI UDataMemory * U_EXPORT2
udata_openChoice(const char *path, const char *type, const char *name,
UDataMemoryIsAcceptable *isAcceptable, void *context,
UErrorCode *pErrorCode);
/**
* Close the data memory.
* This function must be called to allow the system to
* release resources associated with this data memory.
*/
U_CAPI void U_EXPORT2
udata_close(UDataMemory *pData);
/**
* Get the pointer to the actual data inside the data memory.
* The data is read-only.
*/
U_CAPI const void * U_EXPORT2
udata_getMemory(UDataMemory *pData);
/**
* Get the information from the data memory header.
* This allows to get access to the header containing
* platform data properties etc. which is not part of
* the data itself and can therefore not be accessed
* via the pointer that <code>udata_getMemory()</code> returns.
*
* @param pData pointer to the data memory object
* @param pInfo pointer to a UDataInfo object;
* its <code>size</code> field must be set correctly,
* typically to <code>sizeof(UDataInfo)</code>.
*
* <code>*pInfo</code> will be filled with the UDataInfo structure
* in the data memory object. If this structure is smaller than
* <code>pInfo->size</code>, then the <code>size</code> will be
* adjusted and only part of the structure will be filled.
*/
U_CAPI void U_EXPORT2
udata_getInfo(UDataMemory *pData, UDataInfo *pInfo);
#endif

View file

@ -0,0 +1,469 @@
/*
**********************************************************************
* Copyright (C) 1997-1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* File ULOC.H
*
* Modification History:
*
* Date Name Description
* 04/01/97 aliu Creation.
* 08/22/98 stephen JDK 1.2 sync.
* 12/08/98 rtg New C API for Locale
* 03/30/99 damiba overhaul
* 03/31/99 helena Javadoc for uloc functions.
* 04/15/99 Madhu Updated Javadoc
********************************************************************************
*/
#ifndef ULOC_H
#define ULOC_H
#include "unicode/utypes.h"
/**
*
* A <code>Locale</code> represents a specific geographical, political,
* or cultural region. An operation that requires a <code>Locale</code> to perform
* its task is called <em>locale-sensitive</em> and uses the <code>Locale</code>
* to tailor information for the user. For example, displaying a number
* is a locale-sensitive operation--the number should be formatted
* according to the customs/conventions of the user's native country,
* region, or culture. In the C APIs, a locales is simply a const char string.
*
* <P>
* You create a <code>Locale</code> with one of the three options listed below.
* Each of the component is separated by '_' in the locale string.
* <blockquote>
* <pre>
* . newLanguage
* .
* . newLanguage + newCountry
* .
* . newLanguage + newCountry + newVariant
* </pre>
* </blockquote>
* The first option is a valid <STRONG>ISO
* Language Code.</STRONG> These codes are the lower-case two-letter
* codes as defined by ISO-639.
* You can find a full list of these codes at a number of sites, such as:
* <BR><a href ="http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt">
* <code>http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt</code></a>
*
* <P>
* The second option includes an additonal <STRONG>ISO Country
* Code.</STRONG> These codes are the upper-case two-letter codes
* as defined by ISO-3166.
* You can find a full list of these codes at a number of sites, such as:
* <BR><a href="http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html">
* <code>http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html</code></a>
*
* <P>
* The third option requires another additonal information--the
* <STRONG>Variant.</STRONG>
* The Variant codes are vendor and browser-specific.
* For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX.
* Where there are two variants, separate them with an underscore, and
* put the most important one first. For
* example, a Traditional Spanish collation might be referenced, with
* "ES", "ES", "Traditional_WIN".
*
* <P>
* Because a <code>Locale</code> is just an identifier for a region,
* no validity check is performed when you specify a <code>Locale</code>.
* If you want to see whether particular resources are available for the
* <code>Locale</code> you asked for, you must query those resources. For
* example, ask the <code>UNumberFormat</code> for the locales it supports
* using its <code>getAvailable</code> method.
* <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular
* locale, you get back the best available match, not necessarily
* precisely what you asked for. For more information, look at
* <a href="ures.html"><code>UResourceBundle</code></a>.
*
* <P>
* The <code>Locale</code> provides a number of convenient constants
* that you can use to specify the commonly used
* locales. For example, the following refers to a locale
* for the United States:
* <blockquote>
* <pre>
* . ULOC_US
* </pre>
* </blockquote>
*
* <P>
* Once you've specified a locale you can query it for information about
* itself. Use <code>uloc_getCountry</code> to get the ISO Country Code and
* <code>uloc_getLanguage</code> to get the ISO Language Code. You can
* use <code>uloc_getDisplayCountry</code> to get the
* name of the country suitable for displaying to the user. Similarly,
* you can use <code>uloc_getDisplayLanguage</code> to get the name of
* the language suitable for displaying to the user. Interestingly,
* the <code>uloc_getDisplayXXX</code> methods are themselves locale-sensitive
* and have two versions: one that uses the default locale and one
* that takes a locale as an argument and displays the name or country in
* a language appropriate to that locale.
*
* <P>
* The ICU provides a number of services that perform locale-sensitive
* operations. For example, the <code>unum_xxx</code> functions format
* numbers, currency, or percentages in a locale-sensitive manner.
* </P>
* <blockquote>
* <pre>
* . UErrorCode success = U_ZERO_ERROR;
* . UNumberFormat *nf;
* . const char* myLocale = "fr_FR";
* .
* . nf = unum_open( UNUM_DEFAULT, NULL, success );
* . unum_close(nf);
* . nf = unum_open( UNUM_CURRENCY, NULL, success );
* . unum_close(nf);
* . nf = unum_open( UNUM_PERCENT, NULL, success );
* . unum_close(nf);
* </pre>
* </blockquote>
* Each of these methods has two variants; one with an explicit locale
* and one without; the latter using the default locale.
* <blockquote>
* <pre>
* .
* . nf = unum_open( UNUM_DEFAULT, myLocale, success );
* . unum_close(nf);
* . nf = unum_open( UNUM_CURRENCY, myLocale, success );
* . unum_close(nf);
* . nf = unum_open( UNUM_PERCENT, myLocale, success );
* . unum_close(nf);
* </pre>
* </blockquote>
* A <code>Locale</code> is the mechanism for identifying the kind of services
* (<code>UNumberFormat</code>) that you would like to get. The locale is
* <STRONG>just</STRONG> a mechanism for identifying these services.
*
* <P>
* Each international serivce that performs locale-sensitive operations
* allows you
* to get all the available objects of that type. You can sift
* through these objects by language, country, or variant,
* and use the display names to present a menu to the user.
* For example, you can create a menu of all the collation objects
* suitable for a given language. Such classes implement these
* three class methods:
* <blockquote>
* <pre>
* . const char* uloc_getAvailable(int32_t index);
* . int32_t uloc_countAvailable();
* . int32_t
* . uloc_getDisplayName(const char* localeID,
* . const char* inLocaleID,
* . UChar* result,
* . int32_t maxResultSize,
* . UErrorCode* err);
* .
* </pre>
* </blockquote>
*/
/*
*
* Useful constants for language.
*/
#define ULOC_ENGLISH "en"
#define ULOC_FRENCH "fr"
#define ULOC_GERMAN "de"
#define ULOC_ITALIAN "it"
#define ULOC_JAPANESE "ja"
#define ULOC_KOREAN "ko"
#define ULOC_CHINESE "zh"
#define ULOC_SIMPLIFIED_CHINESE "zh_CN"
#define ULOC_TRADITIONAL_CHINESE "zh_TW"
/*
*
* Useful constants for country.
*/
#define ULOC_FRANCE "fr_FR"
#define ULOC_GERMANY "de_DE"
#define ULOC_ITALY "it_IT"
#define ULOC_JAPAN "ja_JP"
#define ULOC_KOREA "ko_KR"
#define ULOC_CHINA "zh_CN"
#define ULOC_PRC "zh_CN"
#define ULOC_TAIWAN "zh_TW"
#define ULOC_UK "en_GB"
#define ULOC_US "en_US"
#define ULOC_CANADA "en_CA"
#define ULOC_CANADA_FRENCH "fr_CA"
/**
* Gets the system's default locale.
*
* @return the system default locale
*/
U_CAPI const char* U_EXPORT2
uloc_getDefault(void);
/**
* Sets the system's default locale.
*
* @param localeID the new system default locale
* @param status the error information if the setting of default locale fails
*/
U_CAPI void U_EXPORT2
uloc_setDefault(const char* localeID,
UErrorCode* status);
/**
* Gets the language code for the specified locale.
*
* @param localeID the locale to get the ISO langauge code with
* @param language the langauge code for localeID
* @param languageCapacity the size of the language buffer to store the
* language code with
* @param err error information if retrieving the language code failed
* @return the actual buffer size needed for the langauge code. If it's greater
* than languageCapacity, the returned language code will be truncated.
*/
U_CAPI int32_t U_EXPORT2
uloc_getLanguage(const char* localeID,
char* language,
int32_t languageCapacity,
UErrorCode* err);
/**
* Gets the country code for the specified locale.
*
* @param localeID the locale to get the country code with
* @param country the country code for localeID
* @param languageCapacity the size of the coutry buffer to store the
* country code with
* @param err error information if retrieving the country code failed
* @return the actual buffer size needed for the country code. If it's greater
* than countryCapacity, the returned country code will be truncated.
*/
U_CAPI int32_t U_EXPORT2
uloc_getCountry(const char* localeID,
char* country,
int32_t countryCapacity,
UErrorCode* err);
/**
* Gets the variant code for the specified locale.
*
* @param localeID the locale to get the variant code with
* @param variant the variant code for localeID
* @param variantCapacity the size of the variant buffer to store the
* variant code with
* @param err error information if retrieving the variant code failed
* @return the actual buffer size needed for the variant code. If it's greater
* than variantCapacity, the returned variant code will be truncated.
*/
U_CAPI int32_t U_EXPORT2
uloc_getVariant(const char* localeID,
char* variant,
int32_t variantCapacity,
UErrorCode* err);
/**
* Gets the full name for the specified locale.
*
* @param localeID the locale to get the full name with
* @param name the full name for localeID
* @param nameCapacity the size of the name buffer to store the
* full name with
* @param err error information if retrieving the full name failed
* @return the actual buffer size needed for the full name. If it's greater
* than nameCapacity, the returned full name will be truncated.
*/
U_CAPI int32_t U_EXPORT2
uloc_getName(const char* localeID,
char* name,
int32_t nameCapacity,
UErrorCode* err);
/**
* Gets the ISO language code for the specified locale.
*
* @param localeID the locale to get the ISO langauge code with
* @return language the ISO langauge code for localeID
*/
U_CAPI const char* U_EXPORT2
uloc_getISO3Language(const char* localeID);
/**
* Gets the ISO country code for the specified locale.
*
* @param localeID the locale to get the ISO country code with
* @return country the ISO country code for localeID
*/
U_CAPI const char* U_EXPORT2
uloc_getISO3Country(const char* localeID);
/**
* Gets the Win32 LCID value for the specified locale.
*
* @param localeID the locale to get the Win32 LCID value with
* @return country the Win32 LCID for localeID
*/
U_CAPI uint32_t U_EXPORT2
uloc_getLCID(const char* localeID);
/**
* Gets the language name suitable for display for the specified locale.
*
* @param localeID the locale to get the ISO langauge code with
* @param language the displayable langauge code for localeID
* @param languageCapacity the size of the language buffer to store the
* displayable language code with
* @param err error information if retrieving the displayable language code failed
* @return the actual buffer size needed for the displayable langauge code. If it's greater
* than languageCapacity, the returned language code will be truncated.
*/
U_CAPI int32_t U_EXPORT2
uloc_getDisplayLanguage(const char* locale,
const char* inLocale,
UChar* language,
int32_t languageCapacity,
UErrorCode* status);
/**
* Gets the country name suitable for display for the specified locale.
*
* @param localeID the locale to get the displayable country code with
* @param country the displayable country code for localeID
* @param languageCapacity the size of the coutry buffer to store the
* displayable country code with
* @param err error information if retrieving the displayable country code failed
* @return the actual buffer size needed for the displayable country code. If it's greater
* than countryCapacity, the returned displayable country code will be truncated.
*/
U_CAPI int32_t U_EXPORT2
uloc_getDisplayCountry(const char* locale,
const char* inLocale,
UChar* country,
int32_t countryCapacity,
UErrorCode* status); /* NULL may be used to specify the default */
/**
* Gets the variant code suitable for display for the specified locale.
*
* @param localeID the locale to get the displayable variant code with
* @param variant the displayable variant code for localeID
* @param variantCapacity the size of the variant buffer to store the
* displayable variant code with
* @param err error information if retrieving the displayable variant code failed
* @return the actual buffer size needed for the displayable variant code. If it's greater
* than variantCapacity, the returned displayable variant code will be truncated.
*/
U_CAPI int32_t U_EXPORT2
uloc_getDisplayVariant(const char* locale,
const char* inLocale,
UChar* variant,
int32_t variantCapacity,
UErrorCode* status); /* NULL may be used to specify the default */
/**
* Gets the full name suitable for display for the specified locale.
*
* @param localeID the locale to get the displayable name with
* @param variant the displayable name for localeID
* @param variantCapacity the size of the name buffer to store the
* displayable full name with
* @param err error information if retrieving the displayable name failed
* @return the actual buffer size needed for the displayable name. If it's greater
* than variantCapacity, the returned displayable name will be truncated.
*/
U_CAPI int32_t U_EXPORT2
uloc_getDisplayName(const char* localeID,
const char* inLocaleID, /* NULL may be used to specify the default */
UChar* result,
int32_t maxResultSize,
UErrorCode* err);
/**
*
* Gets the specified locale from a list of all available locales.
* The return value is a pointer to an item of
* a locale name array. Both this array and the pointers
* it contains are owned by ICU and should not be deleted or written through
* by the caller. The locale name is terminated by a null pointer.
* @param index the specific locale name index of the available locale list
* @return a specified locale name of all available locales
*/
U_CAPI const char* U_EXPORT2
uloc_getAvailable(int32_t index);
/**
* Gets the size of the all available locale list.
*
* @return the size of the locale list
*/
U_CAPI int32_t U_EXPORT2 uloc_countAvailable(void);
/**
*
* Gets a list of all available language codes defined in ISO 639. This is a pointer
* to an array of pointers to arrays of char. All of these pointers are owned
* by ICU-- do not delete them, and do not write through them. The array is
* terminated with a null pointer.
* @return a list of all available language codes
*/
U_CAPI const char* const* U_EXPORT2
uloc_getISOLanguages(void);
/**
*
* Gets a list of all available 2-letter country codes defined in ISO 639. This is a
* pointer to an array of pointers to arrays of char. All of these pointers are
* owned by ICU-- do not delete them, and do not write through them. The array is
* terminated with a null pointer.
* @return a list of all available country codes
*/
U_CAPI const char* const* U_EXPORT2
uloc_getISOCountries(void);
/**
* Deprecated 1999dec14 - Gets the directory containing the locale data files.
*
* @return the locale data file directory
*/
#define uloc_getDataDirectory u_getDataDirectory
/**
* Deprecated 1999dec14 - Sets the directory containing the locale data files.
*
* @return the new directory to fetch locale data from
*/
#define uloc_setDataDirectory u_setDataDirectory
/*Internal function */
int32_t U_EXPORT2
uloc_getParent(const char* localeID,
char* parent,
int32_t parentCapacity,
UErrorCode* err);
/*eof*/
#endif /*_ULOC*/

Some files were not shown because too many files have changed in this diff Show more