mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-05 13:35:32 +00:00
ICU-8319 merge MessageFormat 2011q1 work into trunk, from icu/branches/markus/msg48 -r 29400:29882
X-SVN-Rev: 29886
This commit is contained in:
parent
fb5332c296
commit
d743bb693e
38 changed files with 5467 additions and 3229 deletions
|
@ -3,8 +3,7 @@
|
|||
|
||||
<html lang="en-US" xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US">
|
||||
<head>
|
||||
|
||||
<title>ReadMe for ICU 4.7.1 (4.8M1)</title>
|
||||
<title>ReadMe for ICU 4.8</title>
|
||||
<meta name="COPYRIGHT" content=
|
||||
"Copyright (c) 1997-2011 IBM Corporation and others. All Rights Reserved." />
|
||||
<meta name="KEYWORDS" content=
|
||||
|
@ -214,6 +213,11 @@
|
|||
this release, see the <a href="http://site.icu-project.org/download">ICU
|
||||
download page</a>.</p>
|
||||
|
||||
<h3>MessageFormat Changes</h3>
|
||||
<p>MessageFormat and related classes (choice/plural/select) have been reimplemented,
|
||||
with several improvements and some incompatible changes.
|
||||
See the <a href="http://site.icu-project.org/download/48">ICU 4.8 download</a> page for details.</p>
|
||||
|
||||
<h2><a name="Download" href="#Download" id="Download">How To Download the
|
||||
Source Code</a></h2>
|
||||
|
||||
|
|
|
@ -84,7 +84,7 @@ ucnv.o ucnv_bld.o ucnv_cnv.o ucnv_io.o ucnv_cb.o ucnv_err.o ucnvlat1.o \
|
|||
ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
|
||||
ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o ucnv_ct.o \
|
||||
uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
|
||||
ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o loclikely.o locresdata.o \
|
||||
messagepattern.o ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o loclikely.o locresdata.o \
|
||||
bytestream.o stringpiece.o \
|
||||
stringtriebuilder.o bytestriebuilder.o \
|
||||
bytestrie.o bytestrieiterator.o \
|
||||
|
@ -93,7 +93,7 @@ appendable.o ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
|
|||
utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
|
||||
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o unorm_it.o \
|
||||
chariter.o schriter.o uchriter.o uiter.o \
|
||||
uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
|
||||
patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
|
||||
uscript.o usc_impl.o unames.o \
|
||||
utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
|
||||
uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o triedict.o \
|
||||
|
|
|
@ -374,6 +374,7 @@
|
|||
<ClCompile Include="unorm_it.c" />
|
||||
<ClCompile Include="unormcmp.cpp" />
|
||||
<ClCompile Include="bmpset.cpp" />
|
||||
<ClCompile Include="patternprops.cpp" />
|
||||
<ClCompile Include="propname.cpp" />
|
||||
<ClCompile Include="ruleiter.cpp" />
|
||||
<ClCompile Include="ucase.c" />
|
||||
|
@ -408,6 +409,7 @@
|
|||
<ClCompile Include="charstr.cpp" />
|
||||
<ClCompile Include="cstring.c" />
|
||||
<ClCompile Include="cwchar.c" />
|
||||
<ClCompile Include="messagepattern.cpp" />
|
||||
<ClCompile Include="schriter.cpp" />
|
||||
<ClCompile Include="stringpiece.cpp" />
|
||||
<ClCompile Include="stringtriebuilder.cpp" />
|
||||
|
@ -1193,6 +1195,8 @@
|
|||
<ClInclude Include="unorm_it.h" />
|
||||
<ClInclude Include="unormimp.h" />
|
||||
<ClInclude Include="bmpset.h" />
|
||||
<ClInclude Include="messageimpl.h" />
|
||||
<ClInclude Include="patternprops.h" />
|
||||
<ClInclude Include="propname.h" />
|
||||
<ClInclude Include="ruleiter.h" />
|
||||
<CustomBuild Include="unicode\symtable.h">
|
||||
|
@ -1432,6 +1436,20 @@
|
|||
<ClInclude Include="charstr.h" />
|
||||
<ClInclude Include="cstring.h" />
|
||||
<ClInclude Include="cwchar.h" />
|
||||
<CustomBuild Include="unicode\messagepattern.h">
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="unicode\rep.h">
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
|
||||
</Command>
|
||||
|
|
63
icu4c/source/common/messageimpl.h
Normal file
63
icu4c/source/common/messageimpl.h
Normal file
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: messageimpl.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2011apr04
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __MESSAGEIMPL_H__
|
||||
#define __MESSAGEIMPL_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#include "unicode/messagepattern.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* Helper functions for use of MessagePattern.
|
||||
* In Java, these are package-private methods in MessagePattern itself.
|
||||
* In C++, they are declared here and implemented in messagepattern.cpp.
|
||||
*/
|
||||
class U_COMMON_API MessageImpl {
|
||||
public:
|
||||
/**
|
||||
* @return TRUE if getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED
|
||||
*/
|
||||
static UBool jdkAposMode(const MessagePattern &msgPattern) {
|
||||
return msgPattern.getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Appends the s[start, limit[ substring to sb, but with only half of the apostrophes
|
||||
* according to JDK pattern behavior.
|
||||
*/
|
||||
static void appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit,
|
||||
UnicodeString &sb);
|
||||
|
||||
/**
|
||||
* Appends the sub-message to the result string.
|
||||
* Omits SKIP_SYNTAX and appends whole arguments using appendReducedApostrophes().
|
||||
*/
|
||||
static UnicodeString &appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern,
|
||||
int32_t msgStart,
|
||||
UnicodeString &result);
|
||||
|
||||
private:
|
||||
MessageImpl(); // no constructor: all static methods
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // !UCONFIG_NO_FORMATTING
|
||||
|
||||
#endif // __MESSAGEIMPL_H__
|
1208
icu4c/source/common/messagepattern.cpp
Normal file
1208
icu4c/source/common/messagepattern.cpp
Normal file
File diff suppressed because it is too large
Load diff
218
icu4c/source/common/patternprops.cpp
Normal file
218
icu4c/source/common/patternprops.cpp
Normal file
|
@ -0,0 +1,218 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: patternprops.cpp
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2011mar13
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "patternprops.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/*
|
||||
* One byte per Latin-1 character.
|
||||
* Bit 0 is set if either Pattern property is true,
|
||||
* bit 1 if Pattern_Syntax is true,
|
||||
* bit 2 if Pattern_White_Space is true.
|
||||
* That is, Pattern_Syntax is encoded as 3 and Pattern_White_Space as 5.
|
||||
*/
|
||||
static const uint8_t latin1[256]={
|
||||
// WS: 9..D
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
// WS: 20 Syntax: 21..2F
|
||||
5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
// Syntax: 3A..40
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3,
|
||||
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
// Syntax: 5B..5E
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
|
||||
// Syntax: 60
|
||||
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
// Syntax: 7B..7E
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
|
||||
// WS: 85
|
||||
0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
// Syntax: A1..A7, A9, AB, AC, AE
|
||||
0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 0,
|
||||
// Syntax: B0, B1, B6, BB, BF
|
||||
3, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 3,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
// Syntax: D7
|
||||
0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
// Syntax: F7
|
||||
0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
/*
|
||||
* One byte per 32 characters from U+2000..U+303F indexing into
|
||||
* a small table of 32-bit data words.
|
||||
* The first two data words are all-zeros and all-ones.
|
||||
*/
|
||||
static const uint8_t index2000[130]={
|
||||
2, 3, 4, 0, 0, 0, 0, 0, // 20xx
|
||||
0, 0, 0, 0, 5, 1, 1, 1, // 21xx
|
||||
1, 1, 1, 1, 1, 1, 1, 1, // 22xx
|
||||
1, 1, 1, 1, 1, 1, 1, 1, // 23xx
|
||||
1, 1, 1, 0, 0, 0, 0, 0, // 24xx
|
||||
1, 1, 1, 1, 1, 1, 1, 1, // 25xx
|
||||
1, 1, 1, 1, 1, 1, 1, 1, // 26xx
|
||||
1, 1, 1, 6, 7, 1, 1, 1, // 27xx
|
||||
1, 1, 1, 1, 1, 1, 1, 1, // 28xx
|
||||
1, 1, 1, 1, 1, 1, 1, 1, // 29xx
|
||||
1, 1, 1, 1, 1, 1, 1, 1, // 2Axx
|
||||
1, 1, 1, 1, 1, 1, 1, 1, // 2Bxx
|
||||
0, 0, 0, 0, 0, 0, 0, 0, // 2Cxx
|
||||
0, 0, 0, 0, 0, 0, 0, 0, // 2Dxx
|
||||
1, 1, 1, 1, 0, 0, 0, 0, // 2Exx
|
||||
0, 0, 0, 0, 0, 0, 0, 0, // 2Fxx
|
||||
8, 9 // 3000..303F
|
||||
};
|
||||
|
||||
/*
|
||||
* One 32-bit integer per 32 characters. Ranges of all-false and all-true
|
||||
* are mapped to the first two values, other ranges map to appropriate bit patterns.
|
||||
*/
|
||||
static const uint32_t syntax2000[]={
|
||||
0,
|
||||
0xffffffff,
|
||||
0xffff0000, // 2: 2010..201F
|
||||
0x7fff00ff, // 3: 2020..2027, 2030..203E
|
||||
0x7feffffe, // 4: 2041..2053, 2055..205E
|
||||
0xffff0000, // 5: 2190..219F
|
||||
0x003fffff, // 6: 2760..2775
|
||||
0xfff00000, // 7: 2794..279F
|
||||
0xffffff0e, // 8: 3001..3003, 3008..301F
|
||||
0x00010001 // 9: 3020, 3030
|
||||
};
|
||||
|
||||
/*
|
||||
* Same as syntax2000, but with additional bits set for the
|
||||
* Pattern_White_Space characters 200E 200F 2028 2029.
|
||||
*/
|
||||
static const uint32_t syntaxOrWhiteSpace2000[]={
|
||||
0,
|
||||
0xffffffff,
|
||||
0xffffc000, // 2: 200E..201F
|
||||
0x7fff03ff, // 3: 2020..2029, 2030..203E
|
||||
0x7feffffe, // 4: 2041..2053, 2055..205E
|
||||
0xffff0000, // 5: 2190..219F
|
||||
0x003fffff, // 6: 2760..2775
|
||||
0xfff00000, // 7: 2794..279F
|
||||
0xffffff0e, // 8: 3001..3003, 3008..301F
|
||||
0x00010001 // 9: 3020, 3030
|
||||
};
|
||||
|
||||
UBool
|
||||
PatternProps::isSyntax(UChar32 c) {
|
||||
if(c<0) {
|
||||
return FALSE;
|
||||
} else if(c<=0xff) {
|
||||
return (UBool)(latin1[c]>>1)&1;
|
||||
} else if(c<0x2010) {
|
||||
return FALSE;
|
||||
} else if(c<=0x3030) {
|
||||
uint32_t bits=syntax2000[index2000[(c-0x2000)>>5]];
|
||||
return (UBool)((bits>>(c&0x1f))&1);
|
||||
} else if(0xfd3e<=c && c<=0xfe46) {
|
||||
return c<=0xfd3f || 0xfe45<=c;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
PatternProps::isSyntaxOrWhiteSpace(UChar32 c) {
|
||||
if(c<0) {
|
||||
return FALSE;
|
||||
} else if(c<=0xff) {
|
||||
return (UBool)(latin1[c]&1);
|
||||
} else if(c<0x200e) {
|
||||
return FALSE;
|
||||
} else if(c<=0x3030) {
|
||||
uint32_t bits=syntaxOrWhiteSpace2000[index2000[(c-0x2000)>>5]];
|
||||
return (UBool)((bits>>(c&0x1f))&1);
|
||||
} else if(0xfd3e<=c && c<=0xfe46) {
|
||||
return c<=0xfd3f || 0xfe45<=c;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
PatternProps::isWhiteSpace(UChar32 c) {
|
||||
if(c<0) {
|
||||
return FALSE;
|
||||
} else if(c<=0xff) {
|
||||
return (UBool)(latin1[c]>>2)&1;
|
||||
} else if(0x200e<=c && c<=0x2029) {
|
||||
return c<=0x200f || 0x2028<=c;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
const UChar *
|
||||
PatternProps::skipWhiteSpace(const UChar *s, int32_t length) {
|
||||
while(length>0 && isWhiteSpace(*s)) {
|
||||
++s;
|
||||
--length;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
const UChar *
|
||||
PatternProps::trimWhiteSpace(const UChar *s, int32_t &length) {
|
||||
if(length<=0 || (!isWhiteSpace(s[0]) && !isWhiteSpace(s[length-1]))) {
|
||||
return s;
|
||||
}
|
||||
int32_t start=0;
|
||||
int32_t limit=length;
|
||||
while(start<limit && isWhiteSpace(s[start])) {
|
||||
++start;
|
||||
}
|
||||
if(start<limit) {
|
||||
// There is non-white space at start; we will not move limit below that,
|
||||
// so we need not test start<limit in the loop.
|
||||
while(isWhiteSpace(s[limit-1])) {
|
||||
--limit;
|
||||
}
|
||||
}
|
||||
length=limit-start;
|
||||
return s+start;
|
||||
}
|
||||
|
||||
UBool
|
||||
PatternProps::isIdentifier(const UChar *s, int32_t length) {
|
||||
if(length<=0) {
|
||||
return FALSE;
|
||||
}
|
||||
const UChar *limit=s+length;
|
||||
do {
|
||||
if(isSyntaxOrWhiteSpace(*s++)) {
|
||||
return FALSE;
|
||||
}
|
||||
} while(s<limit);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
const UChar *
|
||||
PatternProps::skipIdentifier(const UChar *s, int32_t length) {
|
||||
while(length>0 && !isSyntaxOrWhiteSpace(*s)) {
|
||||
++s;
|
||||
--length;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
89
icu4c/source/common/patternprops.h
Normal file
89
icu4c/source/common/patternprops.h
Normal file
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: patternprops.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2011mar13
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __PATTERNPROPS_H__
|
||||
#define __PATTERNPROPS_H__
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
/**
|
||||
* Implements the immutable Unicode properties Pattern_Syntax and Pattern_White_Space.
|
||||
* Hardcodes these properties, does not load data, does not depend on other ICU classes.
|
||||
* <p>
|
||||
* Note: Both properties include ASCII as well as non-ASCII, non-Latin-1 code points,
|
||||
* and both properties only include BMP code points (no supplementary ones).
|
||||
* Pattern_Syntax includes some unassigned code points.
|
||||
* <p>
|
||||
* [:Pattern_White_Space:] =
|
||||
* [\u0009-\u000D\ \u0085\u200E\u200F\u2028\u2029]
|
||||
* <p>
|
||||
* [:Pattern_Syntax:] =
|
||||
* [!-/\:-@\[-\^`\{-~\u00A1-\u00A7\u00A9\u00AB\u00AC\u00AE
|
||||
* \u00B0\u00B1\u00B6\u00BB\u00BF\u00D7\u00F7
|
||||
* \u2010-\u2027\u2030-\u203E\u2041-\u2053\u2055-\u205E
|
||||
* \u2190-\u245F\u2500-\u2775\u2794-\u2BFF\u2E00-\u2E7F
|
||||
* \u3001-\u3003\u3008-\u3020\u3030\uFD3E\uFD3F\uFE45\uFE46]
|
||||
* @author mscherer
|
||||
*/
|
||||
class U_COMMON_API PatternProps {
|
||||
public:
|
||||
/**
|
||||
* @return TRUE if c is a Pattern_Syntax code point.
|
||||
*/
|
||||
static UBool isSyntax(UChar32 c);
|
||||
|
||||
/**
|
||||
* @return TRUE if c is a Pattern_Syntax or Pattern_White_Space code point.
|
||||
*/
|
||||
static UBool isSyntaxOrWhiteSpace(UChar32 c);
|
||||
|
||||
/**
|
||||
* @return TRUE if c is a Pattern_White_Space character.
|
||||
*/
|
||||
static UBool isWhiteSpace(UChar32 c);
|
||||
|
||||
/**
|
||||
* Skips over Pattern_White_Space starting at s.
|
||||
* @return The smallest pointer at or after s with a non-white space character.
|
||||
*/
|
||||
static const UChar *skipWhiteSpace(const UChar *s, int32_t length);
|
||||
|
||||
/**
|
||||
* @return s except with leading and trailing Pattern_White_Space removed and length adjusted.
|
||||
*/
|
||||
static const UChar *trimWhiteSpace(const UChar *s, int32_t &length);
|
||||
|
||||
/**
|
||||
* Tests whether the string contains a "pattern identifier", that is,
|
||||
* whether it contains only non-Pattern_White_Space, non-Pattern_Syntax characters.
|
||||
* @return TRUE if there are no Pattern_White_Space or Pattern_Syntax characters in s.
|
||||
*/
|
||||
static UBool isIdentifier(const UChar *s, int32_t length);
|
||||
|
||||
/**
|
||||
* Skips over a "pattern identifier" starting at index s.
|
||||
* @return The smallest pointer at or after s with
|
||||
* a Pattern_White_Space or Pattern_Syntax character.
|
||||
*/
|
||||
static const UChar *skipIdentifier(const UChar *s, int32_t length);
|
||||
|
||||
private:
|
||||
PatternProps(); // no constructor: all static methods
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // __PATTERNPROPS_H__
|
918
icu4c/source/common/unicode/messagepattern.h
Normal file
918
icu4c/source/common/unicode/messagepattern.h
Normal file
|
@ -0,0 +1,918 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
* file name: messagepattern.h
|
||||
* encoding: US-ASCII
|
||||
* tab size: 8 (not used)
|
||||
* indentation:4
|
||||
*
|
||||
* created on: 2011mar14
|
||||
* created by: Markus W. Scherer
|
||||
*/
|
||||
|
||||
#ifndef __MESSAGEPATTERN_H__
|
||||
#define __MESSAGEPATTERN_H__
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
|
||||
*/
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#include "unicode/parseerr.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
/**
|
||||
* Mode for when an apostrophe starts quoted literal text for MessageFormat output.
|
||||
* The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
|
||||
* (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
|
||||
* <p>
|
||||
* A pair of adjacent apostrophes always results in a single apostrophe in the output,
|
||||
* even when the pair is between two single, text-quoting apostrophes.
|
||||
* <p>
|
||||
* The following table shows examples of desired MessageFormat.format() output
|
||||
* with the pattern strings that yield that output.
|
||||
* <p>
|
||||
* <table>
|
||||
* <tr>
|
||||
* <th>Desired output</th>
|
||||
* <th>DOUBLE_OPTIONAL</th>
|
||||
* <th>DOUBLE_REQUIRED</th>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>I see {many}</td>
|
||||
* <td>I see '{many}'</td>
|
||||
* <td>(same)</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>I said {'Wow!'}</td>
|
||||
* <td>I said '{''Wow!''}'</td>
|
||||
* <td>(same)</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>I don't know</td>
|
||||
* <td>I don't know OR<br> I don''t know</td>
|
||||
* <td>I don''t know</td>
|
||||
* </tr>
|
||||
* </table>
|
||||
* @draft ICU 4.8
|
||||
* @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
|
||||
*/
|
||||
enum UMessagePatternApostropheMode {
|
||||
/**
|
||||
* A literal apostrophe is represented by
|
||||
* either a single or a double apostrophe pattern character.
|
||||
* Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
|
||||
* if it immediately precedes a curly brace {},
|
||||
* or a pipe symbol | if inside a choice format,
|
||||
* or a pound symbol # if inside a plural format.
|
||||
* <p>
|
||||
* This is the default behavior starting with ICU 4.8.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_APOS_DOUBLE_OPTIONAL,
|
||||
/**
|
||||
* A literal apostrophe must be represented by
|
||||
* a double apostrophe pattern character.
|
||||
* A single apostrophe always starts quoted literal text.
|
||||
* <p>
|
||||
* This is the behavior of ICU 4.6 and earlier, and of the JDK.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_APOS_DOUBLE_REQUIRED
|
||||
};
|
||||
typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
|
||||
|
||||
/**
|
||||
* MessagePattern::Part type constants.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
enum UMessagePatternPartType {
|
||||
/**
|
||||
* Start of a message pattern (main or nested).
|
||||
* The length is 0 for the top-level message
|
||||
* and for a choice argument sub-message, otherwise 1 for the '{'.
|
||||
* The value indicates the nesting level, starting with 0 for the main message.
|
||||
* <p>
|
||||
* There is always a later MSG_LIMIT part.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_MSG_START,
|
||||
/**
|
||||
* End of a message pattern (main or nested).
|
||||
* The length is 0 for the top-level message and
|
||||
* the last sub-message of a choice argument,
|
||||
* otherwise 1 for the '}' or (in a choice argument style) the '|'.
|
||||
* The value indicates the nesting level, starting with 0 for the main message.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_MSG_LIMIT,
|
||||
/**
|
||||
* Indicates a substring of the pattern string which is to be skipped when formatting.
|
||||
* For example, an apostrophe that begins or ends quoted text
|
||||
* would be indicated with such a part.
|
||||
* The value is undefined and currently always 0.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_SKIP_SYNTAX,
|
||||
/**
|
||||
* Indicates that a syntax character needs to be inserted for auto-quoting.
|
||||
* The length is 0.
|
||||
* The value is the character code of the insertion character. (U+0027=APOSTROPHE)
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_INSERT_CHAR,
|
||||
/**
|
||||
* Indicates a syntactic (non-escaped) # symbol in a plural variant.
|
||||
* When formatting, replace this part's substring with the
|
||||
* (value-offset) for the plural argument value.
|
||||
* The value is undefined and currently always 0.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_REPLACE_NUMBER,
|
||||
/**
|
||||
* Start of an argument.
|
||||
* The length is 1 for the '{'.
|
||||
* The value is the ordinal value of the ArgType. Use getArgType().
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_ARG_START,
|
||||
/**
|
||||
* End of an argument.
|
||||
* The length is 1 for the '}'.
|
||||
* The value is the ordinal value of the ArgType. Use getArgType().
|
||||
* <p>
|
||||
* This part is followed by either an ARG_NUMBER or ARG_NAME,
|
||||
* followed by optional argument sub-parts (see UMessagePatternArgType constants)
|
||||
* and finally an ARG_LIMIT part.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_ARG_LIMIT,
|
||||
/**
|
||||
* The argument number, provided by the value.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_ARG_NUMBER,
|
||||
/**
|
||||
* The argument name.
|
||||
* The value is undefined and currently always 0.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_ARG_NAME,
|
||||
/**
|
||||
* The argument type.
|
||||
* The value is undefined and currently always 0.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_ARG_TYPE,
|
||||
/**
|
||||
* The argument style text.
|
||||
* The value is undefined and currently always 0.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_ARG_STYLE,
|
||||
/**
|
||||
* A selector substring in a "complex" argument style.
|
||||
* The value is undefined and currently always 0.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_ARG_SELECTOR,
|
||||
/**
|
||||
* An integer value, for example the offset or an explicit selector value
|
||||
* in a PluralFormat style.
|
||||
* The part value is the integer value.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_ARG_INT,
|
||||
/**
|
||||
* A numeric value, for example the offset or an explicit selector value
|
||||
* in a PluralFormat style.
|
||||
* The part value is an index into an internal array of numeric values;
|
||||
* use getNumericValue().
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_PART_TYPE_ARG_DOUBLE
|
||||
};
|
||||
typedef enum UMessagePatternPartType UMessagePatternPartType;
|
||||
|
||||
/**
|
||||
* Argument type constants.
|
||||
* Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
|
||||
*
|
||||
* Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
|
||||
* with a nesting level one greater than the surrounding message.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
enum UMessagePatternArgType {
|
||||
/**
|
||||
* The argument has no specified type.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_ARG_TYPE_NONE,
|
||||
/**
|
||||
* The argument has a "simple" type which is provided by the ARG_TYPE part.
|
||||
* An ARG_STYLE part might follow that.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_ARG_TYPE_SIMPLE,
|
||||
/**
|
||||
* The argument is a ChoiceFormat with one or more
|
||||
* ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_ARG_TYPE_CHOICE,
|
||||
/**
|
||||
* The argument is a PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
|
||||
* (e.g., offset:1)
|
||||
* and one or more (ARG_SELECTOR [explicit-value] message) tuples.
|
||||
* If the selector has an explicit value (e.g., =2), then
|
||||
* that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
|
||||
* Otherwise the message immediately follows the ARG_SELECTOR.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_ARG_TYPE_PLURAL,
|
||||
/**
|
||||
* The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_ARG_TYPE_SELECT
|
||||
};
|
||||
typedef enum UMessagePatternArgType UMessagePatternArgType;
|
||||
|
||||
enum {
|
||||
/**
|
||||
* Return value from MessagePattern.validateArgumentName() for when
|
||||
* the string is a valid "pattern identifier" but not a number.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
|
||||
|
||||
/**
|
||||
* Return value from MessagePattern.validateArgumentName() for when
|
||||
* the string is invalid.
|
||||
* It might not be a valid "pattern identifier",
|
||||
* or it have only ASCII digits but there is a leading zero or the number is too large.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMSGPAT_ARG_NAME_NOT_VALID=-2
|
||||
};
|
||||
|
||||
/**
|
||||
* Special value that is returned by getNumericValue(Part) when no
|
||||
* numeric value is defined for a part.
|
||||
* @see MessagePattern.getNumericValue()
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
#define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class MessagePatternDoubleList;
|
||||
class MessagePatternPartsList;
|
||||
|
||||
/**
|
||||
* Parses and represents ICU MessageFormat patterns.
|
||||
* Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
|
||||
* Used in the implementations of those classes as well as in tools
|
||||
* for message validation, translation and format conversion.
|
||||
* <p>
|
||||
* The parser handles all syntax relevant for identifying message arguments.
|
||||
* This includes "complex" arguments whose style strings contain
|
||||
* nested MessageFormat pattern substrings.
|
||||
* For "simple" arguments (with no nested MessageFormat pattern substrings),
|
||||
* the argument style is not parsed any further.
|
||||
* <p>
|
||||
* The parser handles named and numbered message arguments and allows both in one message.
|
||||
* <p>
|
||||
* Once a pattern has been parsed successfully, iterate through the parsed data
|
||||
* with countParts(), getPart() and related methods.
|
||||
* <p>
|
||||
* The data logically represents a parse tree, but is stored and accessed
|
||||
* as a list of "parts" for fast and simple parsing and to minimize object allocations.
|
||||
* Arguments and nested messages are best handled via recursion.
|
||||
* For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
|
||||
* the index of the corresponding _LIMIT "part".
|
||||
* <p>
|
||||
* List of "parts":
|
||||
* <pre>
|
||||
* message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
|
||||
* argument = noneArg | simpleArg | complexArg
|
||||
* complexArg = choiceArg | pluralArg | selectArg
|
||||
*
|
||||
* noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
|
||||
* simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
|
||||
* choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
|
||||
* pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
|
||||
* selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
|
||||
*
|
||||
* choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
|
||||
* pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
|
||||
* selectStyle = (ARG_SELECTOR message)+
|
||||
* </pre>
|
||||
* <ul>
|
||||
* <li>Literal output text is not represented directly by "parts" but accessed
|
||||
* between parts of a message, from one part's getLimit() to the next part's getIndex().
|
||||
* <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
|
||||
* <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
|
||||
* the less-than-or-equal-to sign (U+2264).
|
||||
* <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
|
||||
* The optional numeric Part between each (ARG_SELECTOR, message) pair
|
||||
* is the value of an explicit-number selector like "=2",
|
||||
* otherwise the selector is a non-numeric identifier.
|
||||
* <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
|
||||
* <p>
|
||||
* This class is not intended for public subclassing.
|
||||
*
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
class U_COMMON_API MessagePattern : public UObject {
|
||||
public:
|
||||
/**
|
||||
* Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
MessagePattern(UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Constructs an empty MessagePattern.
|
||||
* @param mode Explicit UMessagePatternApostropheMode.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Constructs a MessagePattern with default UMessagePatternApostropheMode and
|
||||
* parses the MessageFormat pattern string.
|
||||
* @param pattern a MessageFormat pattern string
|
||||
* @param parseError Struct to receive information on the position
|
||||
* of an error within the pattern.
|
||||
* Can be NULL.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* TODO: turn @throws into UErrorCode specifics?
|
||||
* @throws IllegalArgumentException for syntax errors in the pattern string
|
||||
* @throws IndexOutOfBoundsException if certain limits are exceeded
|
||||
* (e.g., argument number too high, argument name too long, etc.)
|
||||
* @throws NumberFormatException if a number could not be parsed
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
* @param other Object to copy.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
MessagePattern(const MessagePattern &other);
|
||||
|
||||
/**
|
||||
* Assignment operator.
|
||||
* @param other Object to copy.
|
||||
* @return *this=other
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
MessagePattern &operator=(const MessagePattern &other);
|
||||
|
||||
/**
|
||||
* Destructor.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
virtual ~MessagePattern();
|
||||
|
||||
/**
|
||||
* Parses a MessageFormat pattern string.
|
||||
* @param pattern a MessageFormat pattern string
|
||||
* @param parseError Struct to receive information on the position
|
||||
* of an error within the pattern.
|
||||
* Can be NULL.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return *this
|
||||
* @throws IllegalArgumentException for syntax errors in the pattern string
|
||||
* @throws IndexOutOfBoundsException if certain limits are exceeded
|
||||
* (e.g., argument number too high, argument name too long, etc.)
|
||||
* @throws NumberFormatException if a number could not be parsed
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
MessagePattern &parse(const UnicodeString &pattern,
|
||||
UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Parses a ChoiceFormat pattern string.
|
||||
* @param pattern a ChoiceFormat pattern string
|
||||
* @param parseError Struct to receive information on the position
|
||||
* of an error within the pattern.
|
||||
* Can be NULL.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return *this
|
||||
* @throws IllegalArgumentException for syntax errors in the pattern string
|
||||
* @throws IndexOutOfBoundsException if certain limits are exceeded
|
||||
* (e.g., argument number too high, argument name too long, etc.)
|
||||
* @throws NumberFormatException if a number could not be parsed
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
|
||||
UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Parses a PluralFormat pattern string.
|
||||
* @param pattern a PluralFormat pattern string
|
||||
* @param parseError Struct to receive information on the position
|
||||
* of an error within the pattern.
|
||||
* Can be NULL.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return *this
|
||||
* @throws IllegalArgumentException for syntax errors in the pattern string
|
||||
* @throws IndexOutOfBoundsException if certain limits are exceeded
|
||||
* (e.g., argument number too high, argument name too long, etc.)
|
||||
* @throws NumberFormatException if a number could not be parsed
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
MessagePattern &parsePluralStyle(const UnicodeString &pattern,
|
||||
UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Parses a SelectFormat pattern string.
|
||||
* @param pattern a SelectFormat pattern string
|
||||
* @param parseError Struct to receive information on the position
|
||||
* of an error within the pattern.
|
||||
* Can be NULL.
|
||||
* @param errorCode Standard ICU error code. Its input value must
|
||||
* pass the U_SUCCESS() test, or else the function returns
|
||||
* immediately. Check for U_FAILURE() on output or use with
|
||||
* function chaining. (See User Guide for details.)
|
||||
* @return *this
|
||||
* @throws IllegalArgumentException for syntax errors in the pattern string
|
||||
* @throws IndexOutOfBoundsException if certain limits are exceeded
|
||||
* (e.g., argument number too high, argument name too long, etc.)
|
||||
* @throws NumberFormatException if a number could not be parsed
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
MessagePattern &parseSelectStyle(const UnicodeString &pattern,
|
||||
UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Clears this MessagePattern.
|
||||
* countParts() will return 0.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
void clear();
|
||||
|
||||
/**
|
||||
* Clears this MessagePattern and sets the UMessagePatternApostropheMode.
|
||||
* countParts() will return 0.
|
||||
* @param mode The new UMessagePatternApostropheMode.
|
||||
* @draft ICU 4.8
|
||||
* @provisional This API might change or be removed in a future release.
|
||||
*/
|
||||
void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
|
||||
clear();
|
||||
aposMode=mode;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param other another object to compare with.
|
||||
* @return TRUE if this object is equivalent to the other one.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UBool operator==(const MessagePattern &other) const;
|
||||
|
||||
/**
|
||||
* @param other another object to compare with.
|
||||
* @return FALSE if this object is equivalent to the other one.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
inline UBool operator!=(const MessagePattern &other) const {
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return A hash code for this object.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
int32_t hashCode() const;
|
||||
|
||||
/**
|
||||
* @return this instance's UMessagePatternApostropheMode.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMessagePatternApostropheMode getApostropheMode() const {
|
||||
return aposMode;
|
||||
}
|
||||
|
||||
// Java has package-private jdkAposMode() here.
|
||||
// In C++, this is declared in the MessageImpl class.
|
||||
|
||||
/**
|
||||
* @return the parsed pattern string (null if none was parsed).
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
const UnicodeString &getPatternString() const {
|
||||
return msg;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the parsed pattern have named arguments like {first_name}?
|
||||
* @return TRUE if the parsed pattern has at least one named argument.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UBool hasNamedArguments() const {
|
||||
return hasArgNames;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the parsed pattern have numbered arguments like {2}?
|
||||
* @return TRUE if the parsed pattern has at least one numbered argument.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UBool hasNumberedArguments() const {
|
||||
return hasArgNumbers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates and parses an argument name or argument number string.
|
||||
* An argument name must be a "pattern identifier", that is, it must contain
|
||||
* no Unicode Pattern_Syntax or Pattern_White_Space characters.
|
||||
* If it only contains ASCII digits, then it must be a small integer with no leading zero.
|
||||
* @param name Input string.
|
||||
* @return >=0 if the name is a valid number,
|
||||
* ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
|
||||
* ARG_NAME_NOT_VALID (-2) if it is neither.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
static int32_t validateArgumentName(const UnicodeString &name);
|
||||
|
||||
/**
|
||||
* Returns a version of the parsed pattern string where each ASCII apostrophe
|
||||
* is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
|
||||
* <p>
|
||||
* For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
|
||||
* into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
|
||||
* @return the deep-auto-quoted version of the parsed pattern string.
|
||||
* @see MessageFormat.autoQuoteApostrophe()
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UnicodeString autoQuoteApostropheDeep() const;
|
||||
|
||||
class Part;
|
||||
|
||||
/**
|
||||
* Returns the number of "parts" created by parsing the pattern string.
|
||||
* Returns 0 if no pattern has been parsed or clear() was called.
|
||||
* @return the number of pattern parts.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
int32_t countParts() const {
|
||||
return partsLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the i-th pattern "part".
|
||||
* @param i The index of the Part data. (0..countParts()-1)
|
||||
* @return the i-th pattern "part".
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
const Part &getPart(int32_t i) const {
|
||||
return parts[i];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the UMessagePatternPartType of the i-th pattern "part".
|
||||
* Convenience method for getPart(i).getType().
|
||||
* @param i The index of the Part data. (0..countParts()-1)
|
||||
* @return The UMessagePatternPartType of the i-th Part.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMessagePatternPartType getPartType(int32_t i) const {
|
||||
return getPart(i).type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the pattern index of the specified pattern "part".
|
||||
* Convenience method for getPart(partIndex).getIndex().
|
||||
* @param partIndex The index of the Part data. (0..countParts()-1)
|
||||
* @return The pattern index of this Part.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
int32_t getPatternIndex(int32_t partIndex) const {
|
||||
return getPart(partIndex).index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the substring of the pattern string indicated by the Part.
|
||||
* Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
|
||||
* @param part a part of this MessagePattern.
|
||||
* @return the substring associated with part.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UnicodeString getSubstring(const Part &part) const {
|
||||
return msg.tempSubString(part.index, part.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares the part's substring with the input string s.
|
||||
* @param part a part of this MessagePattern.
|
||||
* @param s a string.
|
||||
* @return TRUE if getSubstring(part).equals(s).
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
|
||||
return 0==msg.compare(part.index, part.length, s);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
|
||||
* @param part a part of this MessagePattern.
|
||||
* @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
double getNumericValue(const Part &part) const;
|
||||
|
||||
/**
|
||||
* Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
|
||||
* @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
|
||||
* @return the "offset:" value.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
double getPluralOffset(int32_t pluralStart) const;
|
||||
|
||||
/**
|
||||
* Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
|
||||
* @param start The index of some Part data (0..countParts()-1);
|
||||
* this Part should be of Type ARG_START or MSG_START.
|
||||
* @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
|
||||
* or start itself if getPartType(msgStart)!=ARG|MSG_START.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
int32_t getLimitPartIndex(int32_t start) const {
|
||||
int32_t limit=getPart(start).limitPartIndex;
|
||||
if(limit<start) {
|
||||
return start;
|
||||
}
|
||||
return limit;
|
||||
}
|
||||
|
||||
/**
|
||||
* A message pattern "part", representing a pattern parsing event.
|
||||
* There is a part for the start and end of a message or argument,
|
||||
* for quoting and escaping of and with ASCII apostrophes,
|
||||
* and for syntax elements of "complex" arguments.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
class Part : public UMemory {
|
||||
public:
|
||||
/**
|
||||
* Default constructor, do not use.
|
||||
* @internal
|
||||
*/
|
||||
Part() {}
|
||||
|
||||
/**
|
||||
* Returns the type of this part.
|
||||
* @return the part type.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMessagePatternPartType getType() const {
|
||||
return type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the pattern string index associated with this Part.
|
||||
* @return this part's pattern string index.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
int32_t getIndex() const {
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the length of the pattern substring associated with this Part.
|
||||
* This is 0 for some parts.
|
||||
* @return this part's pattern string index.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
int32_t getLength() const {
|
||||
return length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the pattern string limit (exclusive-end) index associated with this Part.
|
||||
* Convenience method for getIndex()+getLength().
|
||||
* @return this part's pattern string limit index, same as getIndex()+getLength().
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
int32_t getLimit() const {
|
||||
return index+length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a value associated with this part.
|
||||
* See the documentation of each part type for details.
|
||||
* @return the part value.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
int32_t getValue() const {
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
|
||||
* otherwise UMSGPAT_ARG_TYPE_NONE.
|
||||
* @return the argument type for this part.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMessagePatternArgType getArgType() const {
|
||||
UMessagePatternPartType type=getType();
|
||||
if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
|
||||
return (UMessagePatternArgType)value;
|
||||
} else {
|
||||
return UMSGPAT_ARG_TYPE_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicates whether the Part type has a numeric value.
|
||||
* If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
|
||||
* @param type The Part type to be tested.
|
||||
* @return TRUE if the Part type has a numeric value.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
static UBool hasNumericValue(UMessagePatternPartType type) {
|
||||
return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param other another object to compare with.
|
||||
* @return TRUE if this object is equivalent to the other one.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UBool operator==(const Part &other) const;
|
||||
|
||||
/**
|
||||
* @param other another object to compare with.
|
||||
* @return FALSE if this object is equivalent to the other one.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
inline UBool operator!=(const Part &other) const {
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return A hash code for this object.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
int32_t hashCode() const {
|
||||
return ((type*37+index)*37+length)*37+value;
|
||||
}
|
||||
|
||||
private:
|
||||
friend class MessagePattern;
|
||||
|
||||
static const int32_t MAX_LENGTH=0xffff;
|
||||
static const int32_t MAX_VALUE=0x7fff;
|
||||
|
||||
// Some fields are not final because they are modified during pattern parsing.
|
||||
// After pattern parsing, the parts are effectively immutable.
|
||||
UMessagePatternPartType type;
|
||||
int32_t index;
|
||||
uint16_t length;
|
||||
int16_t value;
|
||||
int32_t limitPartIndex;
|
||||
};
|
||||
|
||||
private:
|
||||
void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
void postParse();
|
||||
|
||||
int32_t parseMessage(int32_t index, int32_t msgStartLength,
|
||||
int32_t nestingLevel, UMessagePatternArgType parentType,
|
||||
UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
|
||||
UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
|
||||
UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
|
||||
UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Validates and parses an argument name or argument number string.
|
||||
* This internal method assumes that the input substring is a "pattern identifier".
|
||||
* @return >=0 if the name is a valid number,
|
||||
* ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
|
||||
* ARG_NAME_NOT_VALID (-2) if it is neither.
|
||||
* @see #validateArgumentName(String)
|
||||
*/
|
||||
static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
|
||||
|
||||
int32_t parseArgNumber(int32_t start, int32_t limit) {
|
||||
return parseArgNumber(msg, start, limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a number from the specified message substring.
|
||||
* @param start start index into the message string
|
||||
* @param limit limit index into the message string, must be start<limit
|
||||
* @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
|
||||
*/
|
||||
void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
|
||||
UParseError *parseError, UErrorCode &errorCode);
|
||||
|
||||
// Java has package-private appendReducedApostrophes() here.
|
||||
// In C++, this is declared in the MessageImpl class.
|
||||
|
||||
int32_t skipWhiteSpace(int32_t index);
|
||||
|
||||
int32_t skipIdentifier(int32_t index);
|
||||
|
||||
/**
|
||||
* Skips a sequence of characters that could occur in a double value.
|
||||
* Does not fully parse or validate the value.
|
||||
*/
|
||||
int32_t skipDouble(int32_t index);
|
||||
|
||||
static UBool isArgTypeChar(UChar32 c);
|
||||
|
||||
UBool isChoice(int32_t index);
|
||||
|
||||
UBool isPlural(int32_t index);
|
||||
|
||||
UBool isSelect(int32_t index);
|
||||
|
||||
/**
|
||||
* @return TRUE if we are inside a MessageFormat (sub-)pattern,
|
||||
* as opposed to inside a top-level choice/plural/select pattern.
|
||||
*/
|
||||
UBool inMessageFormatPattern(int32_t nestingLevel);
|
||||
|
||||
/**
|
||||
* @return TRUE if we are in a MessageFormat sub-pattern
|
||||
* of a top-level ChoiceFormat pattern.
|
||||
*/
|
||||
UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
|
||||
|
||||
void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
|
||||
int32_t value, UErrorCode &errorCode);
|
||||
|
||||
void addLimitPart(int32_t start,
|
||||
UMessagePatternPartType type, int32_t index, int32_t length,
|
||||
int32_t value, UErrorCode &errorCode);
|
||||
|
||||
void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
|
||||
|
||||
void setParseError(UParseError *parseError, int32_t index);
|
||||
|
||||
// No ICU "poor man's RTTI" for this class nor its subclasses.
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
|
||||
UBool init(UErrorCode &errorCode);
|
||||
UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
|
||||
|
||||
UMessagePatternApostropheMode aposMode;
|
||||
UnicodeString msg;
|
||||
// ArrayList<Part> parts=new ArrayList<Part>();
|
||||
MessagePatternPartsList *partsList;
|
||||
Part *parts;
|
||||
int32_t partsLength;
|
||||
// ArrayList<Double> numericValues;
|
||||
MessagePatternDoubleList *numericValuesList;
|
||||
double *numericValues;
|
||||
int32_t numericValuesLength;
|
||||
UBool hasArgNames;
|
||||
UBool hasArgNumbers;
|
||||
UBool needsAutoQuoting;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif // !UCONFIG_NO_FORMATTING
|
||||
|
||||
#endif // __MESSAGEPATTERN_H__
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2002-2009, International Business Machines
|
||||
* Copyright (C) 2002-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: uconfig.h
|
||||
|
@ -176,6 +176,17 @@
|
|||
# define UCONFIG_NO_IDNA 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* \def UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
|
||||
* Determines the default UMessagePatternApostropheMode.
|
||||
* See the documentation for that enum.
|
||||
*
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
#ifndef UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
|
||||
# define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL
|
||||
#endif
|
||||
|
||||
/* i18n library switches ---------------------------------------------------- */
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
***************************************************************************
|
||||
* Copyright (C) 1999-2010, International Business Machines Corporation
|
||||
* Copyright (C) 1999-2011, International Business Machines Corporation
|
||||
* and others. All Rights Reserved.
|
||||
***************************************************************************
|
||||
* Date Name Description
|
||||
|
@ -575,8 +575,8 @@ public:
|
|||
|
||||
/**
|
||||
* Modifies this set to represent the set specified by the given
|
||||
* pattern, optionally ignoring white space. See the class
|
||||
* description for the syntax of the pattern language.
|
||||
* pattern, ignoring Unicode Pattern_White_Space characters.
|
||||
* See the class description for the syntax of the pattern language.
|
||||
* A frozen set will not be modified.
|
||||
* @param pattern a string specifying what characters are in the set
|
||||
* @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
|
||||
|
@ -590,8 +590,8 @@ public:
|
|||
|
||||
/**
|
||||
* Modifies this set to represent the set specified by the given
|
||||
* pattern, optionally ignoring white space. See the class
|
||||
* description for the syntax of the pattern language.
|
||||
* pattern, optionally ignoring Unicode Pattern_White_Space characters.
|
||||
* See the class description for the syntax of the pattern language.
|
||||
* A frozen set will not be modified.
|
||||
* @param pattern a string specifying what characters are in the set
|
||||
* @param options bitmask for options to apply to the pattern.
|
||||
|
@ -1540,8 +1540,8 @@ private:
|
|||
* \\p{foo} \\P{foo} - white space not allowed within "\\p" or "\\P"
|
||||
* \\N{name} - white space not allowed within "\\N"
|
||||
*
|
||||
* Other than the above restrictions, white space is ignored. Case
|
||||
* is ignored except in "\\p" and "\\P" and "\\N". In 'name' leading
|
||||
* Other than the above restrictions, Unicode Pattern_White_Space characters are ignored.
|
||||
* Case is ignored except in "\\p" and "\\P" and "\\N". In 'name' leading
|
||||
* and trailing space is deleted, and internal runs of whitespace
|
||||
* are collapsed to a single space.
|
||||
*
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1999-2009, International Business Machines
|
||||
* Copyright (C) 1999-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* Date Name Description
|
||||
|
@ -15,6 +15,7 @@
|
|||
#include "ruleiter.h"
|
||||
#include "cmemory.h"
|
||||
#include "cstring.h"
|
||||
#include "patternprops.h"
|
||||
#include "uhash.h"
|
||||
#include "util.h"
|
||||
#include "uvector.h"
|
||||
|
@ -1926,7 +1927,7 @@ escapeUnprintable) {
|
|||
break;
|
||||
default:
|
||||
// Escape whitespace
|
||||
if (uprv_isRuleWhiteSpace(c)) {
|
||||
if (PatternProps::isWhiteSpace(c)) {
|
||||
buf.append(BACKSLASH);
|
||||
}
|
||||
break;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2010, International Business Machines
|
||||
* Copyright (C) 1999-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -399,20 +399,6 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
|
|||
// Public API
|
||||
//----------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Modifies this set to represent the set specified by the given
|
||||
* pattern, optionally ignoring white space. See the class
|
||||
* description for the syntax of the pattern language.
|
||||
* @param pattern a string specifying what characters are in the set
|
||||
* @param ignoreSpaces if <code>true</code>, all spaces in the
|
||||
* pattern are ignored. Spaces are those characters for which
|
||||
* <code>uprv_isRuleWhiteSpace()</code> is <code>true</code>.
|
||||
* Characters preceded by '\\' are escaped, losing any special
|
||||
* meaning they otherwise have. Spaces may be included by
|
||||
* escaping them.
|
||||
* @exception <code>IllegalArgumentException</code> if the pattern
|
||||
* contains a syntax error.
|
||||
*/
|
||||
UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
|
||||
UErrorCode& status) {
|
||||
return applyPattern(pattern, USET_IGNORE_SPACE, NULL, status);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 1997-2009, International Business Machines Corporation and *
|
||||
* Copyright (C) 1997-2011, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
*******************************************************************************
|
||||
*
|
||||
|
@ -32,7 +32,9 @@
|
|||
#include "unicode/locid.h"
|
||||
#include "cpputils.h"
|
||||
#include "cstring.h"
|
||||
#include "messageimpl.h"
|
||||
#include "putilimp.h"
|
||||
#include "uassert.h"
|
||||
#include <stdio.h>
|
||||
#include <float.h>
|
||||
|
||||
|
@ -54,6 +56,9 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
|
|||
#define VERTICAL_BAR ((UChar)0x007C) /*|*/
|
||||
#define MINUS ((UChar)0x002D) /*-*/
|
||||
|
||||
static const UChar LEFT_CURLY_BRACE = 0x7B; /*{*/
|
||||
static const UChar RIGHT_CURLY_BRACE = 0x7D; /*}*/
|
||||
|
||||
#ifdef INFINITY
|
||||
#undef INFINITY
|
||||
#endif
|
||||
|
@ -69,10 +74,8 @@ static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
|
|||
|
||||
ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
|
||||
UErrorCode& status)
|
||||
: fChoiceLimits(0),
|
||||
fClosures(0),
|
||||
fChoiceFormats(0),
|
||||
fCount(0)
|
||||
: constructorErrorCode(status),
|
||||
msgPattern(status)
|
||||
{
|
||||
applyPattern(newPattern, status);
|
||||
}
|
||||
|
@ -84,12 +87,10 @@ ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
|
|||
ChoiceFormat::ChoiceFormat(const double* limits,
|
||||
const UnicodeString* formats,
|
||||
int32_t cnt )
|
||||
: fChoiceLimits(0),
|
||||
fClosures(0),
|
||||
fChoiceFormats(0),
|
||||
fCount(0)
|
||||
: constructorErrorCode(U_ZERO_ERROR),
|
||||
msgPattern(constructorErrorCode)
|
||||
{
|
||||
setChoices(limits, formats, cnt );
|
||||
setChoices(limits, NULL, formats, cnt, constructorErrorCode);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
@ -98,12 +99,10 @@ ChoiceFormat::ChoiceFormat(const double* limits,
|
|||
const UBool* closures,
|
||||
const UnicodeString* formats,
|
||||
int32_t cnt )
|
||||
: fChoiceLimits(0),
|
||||
fClosures(0),
|
||||
fChoiceFormats(0),
|
||||
fCount(0)
|
||||
: constructorErrorCode(U_ZERO_ERROR),
|
||||
msgPattern(constructorErrorCode)
|
||||
{
|
||||
setChoices(limits, closures, formats, cnt );
|
||||
setChoices(limits, closures, formats, cnt, constructorErrorCode);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
@ -111,11 +110,9 @@ ChoiceFormat::ChoiceFormat(const double* limits,
|
|||
|
||||
ChoiceFormat::ChoiceFormat(const ChoiceFormat& that)
|
||||
: NumberFormat(that),
|
||||
fChoiceLimits(0),
|
||||
fClosures(0),
|
||||
fChoiceFormats(0)
|
||||
constructorErrorCode(that.constructorErrorCode),
|
||||
msgPattern(that.msgPattern)
|
||||
{
|
||||
*this = that;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
@ -126,10 +123,8 @@ ChoiceFormat::ChoiceFormat(const ChoiceFormat& that)
|
|||
ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
|
||||
UParseError& parseError,
|
||||
UErrorCode& status)
|
||||
: fChoiceLimits(0),
|
||||
fClosures(0),
|
||||
fChoiceFormats(0),
|
||||
fCount(0)
|
||||
: constructorErrorCode(status),
|
||||
msgPattern(status)
|
||||
{
|
||||
applyPattern(newPattern,parseError, status);
|
||||
}
|
||||
|
@ -141,16 +136,7 @@ ChoiceFormat::operator==(const Format& that) const
|
|||
if (this == &that) return TRUE;
|
||||
if (!NumberFormat::operator==(that)) return FALSE;
|
||||
ChoiceFormat& thatAlias = (ChoiceFormat&)that;
|
||||
if (fCount != thatAlias.fCount) return FALSE;
|
||||
// Checks the limits, the corresponding format string and LE or LT flags.
|
||||
// LE means less than and equal to, LT means less than.
|
||||
for (int32_t i = 0; i < fCount; i++) {
|
||||
if ((fChoiceLimits[i] != thatAlias.fChoiceLimits[i]) ||
|
||||
(fClosures[i] != thatAlias.fClosures[i]) ||
|
||||
(fChoiceFormats[i] != thatAlias.fChoiceFormats[i]))
|
||||
return FALSE;
|
||||
}
|
||||
return TRUE;
|
||||
return msgPattern == thatAlias.msgPattern;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
@ -161,37 +147,8 @@ ChoiceFormat::operator=(const ChoiceFormat& that)
|
|||
{
|
||||
if (this != &that) {
|
||||
NumberFormat::operator=(that);
|
||||
fCount = that.fCount;
|
||||
uprv_free(fChoiceLimits);
|
||||
fChoiceLimits = NULL;
|
||||
uprv_free(fClosures);
|
||||
fClosures = NULL;
|
||||
delete [] fChoiceFormats;
|
||||
fChoiceFormats = NULL;
|
||||
|
||||
fChoiceLimits = (double*) uprv_malloc( sizeof(double) * fCount);
|
||||
fClosures = (UBool*) uprv_malloc( sizeof(UBool) * fCount);
|
||||
fChoiceFormats = new UnicodeString[fCount];
|
||||
|
||||
// check for memory allocation error
|
||||
if (!fChoiceLimits || !fClosures || !fChoiceFormats) {
|
||||
if (fChoiceLimits) {
|
||||
uprv_free(fChoiceLimits);
|
||||
fChoiceLimits = NULL;
|
||||
}
|
||||
if (fClosures) {
|
||||
uprv_free(fClosures);
|
||||
fClosures = NULL;
|
||||
}
|
||||
if (fChoiceFormats) {
|
||||
delete[] fChoiceFormats;
|
||||
fChoiceFormats = NULL;
|
||||
}
|
||||
} else {
|
||||
uprv_arrayCopy(that.fChoiceLimits, fChoiceLimits, fCount);
|
||||
uprv_arrayCopy(that.fClosures, fClosures, fCount);
|
||||
uprv_arrayCopy(that.fChoiceFormats, fChoiceFormats, fCount);
|
||||
}
|
||||
constructorErrorCode = that.constructorErrorCode;
|
||||
msgPattern = that.msgPattern;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
@ -200,32 +157,12 @@ ChoiceFormat::operator=(const ChoiceFormat& that)
|
|||
|
||||
ChoiceFormat::~ChoiceFormat()
|
||||
{
|
||||
uprv_free(fChoiceLimits);
|
||||
fChoiceLimits = NULL;
|
||||
uprv_free(fClosures);
|
||||
fClosures = NULL;
|
||||
delete [] fChoiceFormats;
|
||||
fChoiceFormats = NULL;
|
||||
fCount = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a string to a double value
|
||||
*/
|
||||
double
|
||||
ChoiceFormat::stod(const UnicodeString& string)
|
||||
{
|
||||
char source[256];
|
||||
char* end;
|
||||
|
||||
string.extract(0, string.length(), source, (int32_t)sizeof(source), US_INV); /* invariant codepage */
|
||||
return uprv_strtod(source,&end);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
/**
|
||||
* Convert a double value to a string without the overhead of ICU.
|
||||
* Convert a double value to a string without the overhead of NumberFormat.
|
||||
*/
|
||||
UnicodeString&
|
||||
ChoiceFormat::dtos(double value,
|
||||
|
@ -286,8 +223,8 @@ void
|
|||
ChoiceFormat::applyPattern(const UnicodeString& pattern,
|
||||
UErrorCode& status)
|
||||
{
|
||||
UParseError parseError;
|
||||
applyPattern(pattern, parseError, status);
|
||||
msgPattern.parseChoiceStyle(pattern, NULL, status);
|
||||
constructorErrorCode = status;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
@ -298,217 +235,16 @@ ChoiceFormat::applyPattern(const UnicodeString& pattern,
|
|||
UParseError& parseError,
|
||||
UErrorCode& status)
|
||||
{
|
||||
if (U_FAILURE(status))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Clear error struct
|
||||
parseError.offset = -1;
|
||||
parseError.preContext[0] = parseError.postContext[0] = (UChar)0;
|
||||
|
||||
// Perform 2 passes. The first computes the number of limits in
|
||||
// this pattern (fCount), which is 1 more than the number of
|
||||
// literal VERTICAL_BAR characters.
|
||||
int32_t count = 1;
|
||||
int32_t i;
|
||||
for (i=0; i<pattern.length(); ++i) {
|
||||
UChar c = pattern[i];
|
||||
if (c == SINGLE_QUOTE) {
|
||||
// Skip over the entire quote, including embedded
|
||||
// contiguous pairs of SINGLE_QUOTE.
|
||||
for (;;) {
|
||||
do {
|
||||
++i;
|
||||
} while (i<pattern.length() &&
|
||||
pattern[i] != SINGLE_QUOTE);
|
||||
if ((i+1)<pattern.length() &&
|
||||
pattern[i+1] == SINGLE_QUOTE) {
|
||||
// SINGLE_QUOTE pair; skip over it
|
||||
++i;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (c == VERTICAL_BAR) {
|
||||
++count;
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate the required storage.
|
||||
double *newLimits = (double*) uprv_malloc( sizeof(double) * count);
|
||||
/* test for NULL */
|
||||
if (newLimits == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
UBool *newClosures = (UBool*) uprv_malloc( sizeof(UBool) * count);
|
||||
/* test for NULL */
|
||||
if (newClosures == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
uprv_free(newLimits);
|
||||
return;
|
||||
}
|
||||
UnicodeString *newFormats = new UnicodeString[count];
|
||||
/* test for NULL */
|
||||
if (newFormats == 0) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
uprv_free(newLimits);
|
||||
uprv_free(newClosures);
|
||||
return;
|
||||
}
|
||||
|
||||
// Perform the second pass
|
||||
int32_t k = 0; // index into newXxx[] arrays
|
||||
UnicodeString buf; // scratch buffer
|
||||
UBool inQuote = FALSE;
|
||||
UBool inNumber = TRUE; // TRUE before < or #, FALSE after
|
||||
|
||||
for (i=0; i<pattern.length(); ++i) {
|
||||
UChar c = pattern[i];
|
||||
if (c == SINGLE_QUOTE) {
|
||||
// Check for SINGLE_QUOTE pair indicating a literal quote
|
||||
if ((i+1) < pattern.length() &&
|
||||
pattern[i+1] == SINGLE_QUOTE) {
|
||||
buf += SINGLE_QUOTE;
|
||||
++i;
|
||||
} else {
|
||||
inQuote = !inQuote;
|
||||
}
|
||||
} else if (inQuote) {
|
||||
buf += c;
|
||||
} else if (c == LESS_THAN || c == LESS_EQUAL || c == LESS_EQUAL2) {
|
||||
if (!inNumber || buf.length() == 0) {
|
||||
goto error;
|
||||
}
|
||||
inNumber = FALSE;
|
||||
|
||||
double limit;
|
||||
buf.trim();
|
||||
if (!buf.compare(gPositiveInfinity, POSITIVE_INF_STRLEN)) {
|
||||
limit = uprv_getInfinity();
|
||||
} else if (!buf.compare(gNegativeInfinity, NEGATIVE_INF_STRLEN)) {
|
||||
limit = -uprv_getInfinity();
|
||||
} else {
|
||||
limit = stod(buf);
|
||||
}
|
||||
|
||||
if (k == count) {
|
||||
// This shouldn't happen. If it does, it means that
|
||||
// the count determined in the first pass did not
|
||||
// match the number of elements found in the second
|
||||
// pass.
|
||||
goto error;
|
||||
}
|
||||
newLimits[k] = limit;
|
||||
newClosures[k] = (c == LESS_THAN);
|
||||
|
||||
if (k > 0 && limit <= newLimits[k-1]) {
|
||||
// Each limit must be strictly > than the previous
|
||||
// limit. One exception: Two subsequent limits may be
|
||||
// == if the first closure is FALSE and the second
|
||||
// closure is TRUE. This places the limit value in
|
||||
// the second interval.
|
||||
if (!(limit == newLimits[k-1] &&
|
||||
!newClosures[k-1] &&
|
||||
newClosures[k])) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
buf.truncate(0);
|
||||
} else if (c == VERTICAL_BAR) {
|
||||
if (inNumber) {
|
||||
goto error;
|
||||
}
|
||||
inNumber = TRUE;
|
||||
|
||||
newFormats[k] = buf;
|
||||
++k;
|
||||
buf.truncate(0);
|
||||
} else {
|
||||
buf += c;
|
||||
}
|
||||
}
|
||||
|
||||
if (k != (count-1) || inNumber || inQuote) {
|
||||
goto error;
|
||||
}
|
||||
newFormats[k] = buf;
|
||||
|
||||
// Don't modify this object until the parse succeeds
|
||||
uprv_free(fChoiceLimits);
|
||||
uprv_free(fClosures);
|
||||
delete[] fChoiceFormats;
|
||||
fCount = count;
|
||||
fChoiceLimits = newLimits;
|
||||
fClosures = newClosures;
|
||||
fChoiceFormats = newFormats;
|
||||
return;
|
||||
|
||||
error:
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
syntaxError(pattern,i,parseError);
|
||||
uprv_free(newLimits);
|
||||
uprv_free(newClosures);
|
||||
delete[] newFormats;
|
||||
return;
|
||||
|
||||
msgPattern.parseChoiceStyle(pattern, &parseError, status);
|
||||
constructorErrorCode = status;
|
||||
}
|
||||
// -------------------------------------
|
||||
// Reconstruct the original input pattern.
|
||||
// Returns the input pattern string.
|
||||
|
||||
UnicodeString&
|
||||
ChoiceFormat::toPattern(UnicodeString& result) const
|
||||
{
|
||||
result.remove();
|
||||
for (int32_t i = 0; i < fCount; ++i) {
|
||||
if (i != 0) {
|
||||
result += VERTICAL_BAR;
|
||||
}
|
||||
UnicodeString buf;
|
||||
if (uprv_isPositiveInfinity(fChoiceLimits[i])) {
|
||||
result += INFINITY;
|
||||
} else if (uprv_isNegativeInfinity(fChoiceLimits[i])) {
|
||||
result += MINUS;
|
||||
result += INFINITY;
|
||||
} else {
|
||||
result += dtos(fChoiceLimits[i], buf);
|
||||
}
|
||||
if (fClosures[i]) {
|
||||
result += LESS_THAN;
|
||||
} else {
|
||||
result += LESS_EQUAL;
|
||||
}
|
||||
// Append fChoiceFormats[i], using quotes if there are special
|
||||
// characters. Single quotes themselves must be escaped in
|
||||
// either case.
|
||||
const UnicodeString& text = fChoiceFormats[i];
|
||||
UBool needQuote = text.indexOf(LESS_THAN) >= 0
|
||||
|| text.indexOf(LESS_EQUAL) >= 0
|
||||
|| text.indexOf(LESS_EQUAL2) >= 0
|
||||
|| text.indexOf(VERTICAL_BAR) >= 0;
|
||||
if (needQuote) {
|
||||
result += SINGLE_QUOTE;
|
||||
}
|
||||
if (text.indexOf(SINGLE_QUOTE) < 0) {
|
||||
result += text;
|
||||
}
|
||||
else {
|
||||
for (int32_t j = 0; j < text.length(); ++j) {
|
||||
UChar c = text[j];
|
||||
result += c;
|
||||
if (c == SINGLE_QUOTE) {
|
||||
result += c;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (needQuote) {
|
||||
result += SINGLE_QUOTE;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
return result = msgPattern.getPatternString();
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
@ -518,7 +254,8 @@ ChoiceFormat::setChoices( const double* limits,
|
|||
const UnicodeString* formats,
|
||||
int32_t cnt )
|
||||
{
|
||||
setChoices(limits, 0, formats, cnt);
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
setChoices(limits, NULL, formats, cnt, errorCode);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
@ -529,54 +266,76 @@ ChoiceFormat::setChoices( const double* limits,
|
|||
const UnicodeString* formats,
|
||||
int32_t cnt )
|
||||
{
|
||||
if(limits == 0 || formats == 0)
|
||||
return;
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
setChoices(limits, closures, formats, cnt, errorCode);
|
||||
}
|
||||
|
||||
if (fChoiceLimits) {
|
||||
uprv_free(fChoiceLimits);
|
||||
}
|
||||
if (fClosures) {
|
||||
uprv_free(fClosures);
|
||||
}
|
||||
if (fChoiceFormats) {
|
||||
delete [] fChoiceFormats;
|
||||
}
|
||||
|
||||
// Note that the old arrays are deleted and this owns
|
||||
// the created array.
|
||||
fCount = cnt;
|
||||
fChoiceLimits = (double*) uprv_malloc( sizeof(double) * fCount);
|
||||
fClosures = (UBool*) uprv_malloc( sizeof(UBool) * fCount);
|
||||
fChoiceFormats = new UnicodeString[fCount];
|
||||
|
||||
//check for memory allocation error
|
||||
if (!fChoiceLimits || !fClosures || !fChoiceFormats) {
|
||||
if (fChoiceLimits) {
|
||||
uprv_free(fChoiceLimits);
|
||||
fChoiceLimits = NULL;
|
||||
}
|
||||
if (fClosures) {
|
||||
uprv_free(fClosures);
|
||||
fClosures = NULL;
|
||||
}
|
||||
if (fChoiceFormats) {
|
||||
delete[] fChoiceFormats;
|
||||
fChoiceFormats = NULL;
|
||||
}
|
||||
void
|
||||
ChoiceFormat::setChoices(const double* limits,
|
||||
const UBool* closures,
|
||||
const UnicodeString* formats,
|
||||
int32_t count,
|
||||
UErrorCode &errorCode) {
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return;
|
||||
}
|
||||
|
||||
uprv_arrayCopy(limits, fChoiceLimits, fCount);
|
||||
uprv_arrayCopy(formats, fChoiceFormats, fCount);
|
||||
|
||||
if (closures != 0) {
|
||||
uprv_arrayCopy(closures, fClosures, fCount);
|
||||
} else {
|
||||
int32_t i;
|
||||
for (i=0; i<fCount; ++i) {
|
||||
fClosures[i] = FALSE;
|
||||
if (limits == NULL || formats == NULL) {
|
||||
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return;
|
||||
}
|
||||
// Reconstruct the original input pattern.
|
||||
// Modified version of the pre-ICU 4.8 toPattern() implementation.
|
||||
UnicodeString result;
|
||||
for (int32_t i = 0; i < count; ++i) {
|
||||
if (i != 0) {
|
||||
result += VERTICAL_BAR;
|
||||
}
|
||||
UnicodeString buf;
|
||||
if (uprv_isPositiveInfinity(limits[i])) {
|
||||
result += INFINITY;
|
||||
} else if (uprv_isNegativeInfinity(limits[i])) {
|
||||
result += MINUS;
|
||||
result += INFINITY;
|
||||
} else {
|
||||
result += dtos(limits[i], buf);
|
||||
}
|
||||
if (closures != NULL && closures[i]) {
|
||||
result += LESS_THAN;
|
||||
} else {
|
||||
result += LESS_EQUAL;
|
||||
}
|
||||
// Append formats[i], using quotes if there are special
|
||||
// characters. Single quotes themselves must be escaped in
|
||||
// either case.
|
||||
const UnicodeString& text = formats[i];
|
||||
int32_t textLength = text.length();
|
||||
int32_t nestingLevel = 0;
|
||||
for (int32_t j = 0; j < textLength; ++j) {
|
||||
UChar c = text[j];
|
||||
if (c == SINGLE_QUOTE && nestingLevel == 0) {
|
||||
// Double each top-level apostrophe.
|
||||
result.append(c);
|
||||
} else if (c == VERTICAL_BAR && nestingLevel == 0) {
|
||||
// Surround each pipe symbol with apostrophes for quoting.
|
||||
// If the next character is an apostrophe, then that will be doubled,
|
||||
// and although the parser will see the apostrophe pairs beginning
|
||||
// and ending one character earlier than our doubling, the result
|
||||
// is as desired.
|
||||
// | -> '|'
|
||||
// |' -> '|'''
|
||||
// |'' -> '|''''' etc.
|
||||
result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
|
||||
continue; // Skip the append(c) at the end of the loop body.
|
||||
} else if (c == LEFT_CURLY_BRACE) {
|
||||
++nestingLevel;
|
||||
} else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
|
||||
--nestingLevel;
|
||||
}
|
||||
result.append(c);
|
||||
}
|
||||
}
|
||||
// Apply the reconstructed pattern.
|
||||
applyPattern(result, errorCode);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
@ -585,8 +344,8 @@ ChoiceFormat::setChoices( const double* limits,
|
|||
const double*
|
||||
ChoiceFormat::getLimits(int32_t& cnt) const
|
||||
{
|
||||
cnt = fCount;
|
||||
return fChoiceLimits;
|
||||
cnt = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
@ -595,8 +354,8 @@ ChoiceFormat::getLimits(int32_t& cnt) const
|
|||
const UBool*
|
||||
ChoiceFormat::getClosures(int32_t& cnt) const
|
||||
{
|
||||
cnt = fCount;
|
||||
return fClosures;
|
||||
cnt = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
@ -605,8 +364,8 @@ ChoiceFormat::getClosures(int32_t& cnt) const
|
|||
const UnicodeString*
|
||||
ChoiceFormat::getFormats(int32_t& cnt) const
|
||||
{
|
||||
cnt = fCount;
|
||||
return fChoiceFormats;
|
||||
cnt = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
@ -623,9 +382,8 @@ ChoiceFormat::format(int64_t number,
|
|||
}
|
||||
|
||||
// -------------------------------------
|
||||
// Formats a long number, it's actually formatted as
|
||||
// a double. The returned format string may differ
|
||||
// from the input number because of this.
|
||||
// Formats an int32_t number, it's actually formatted as
|
||||
// a double.
|
||||
|
||||
UnicodeString&
|
||||
ChoiceFormat::format(int32_t number,
|
||||
|
@ -643,26 +401,63 @@ ChoiceFormat::format(double number,
|
|||
UnicodeString& appendTo,
|
||||
FieldPosition& /*pos*/) const
|
||||
{
|
||||
// find the number
|
||||
int32_t i;
|
||||
for (i = 0; i < fCount; ++i) {
|
||||
if (fClosures[i]) {
|
||||
if (!(number > fChoiceLimits[i])) {
|
||||
// same as number <= fChoiceLimits, except catches NaN
|
||||
break;
|
||||
}
|
||||
} else if (!(number >= fChoiceLimits[i])) {
|
||||
// same as number < fChoiceLimits, except catches NaN
|
||||
if (msgPattern.countParts() == 0) {
|
||||
// No pattern was applied, or it failed.
|
||||
return appendTo;
|
||||
}
|
||||
// Get the appropriate sub-message.
|
||||
int32_t msgStart = findSubMessage(msgPattern, 0, number);
|
||||
if (!MessageImpl::jdkAposMode(msgPattern)) {
|
||||
int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
|
||||
int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
|
||||
appendTo.append(msgPattern.getPatternString(),
|
||||
patternStart,
|
||||
msgPattern.getPatternIndex(msgLimit) - patternStart);
|
||||
return appendTo;
|
||||
}
|
||||
// JDK compatibility mode: Remove SKIP_SYNTAX.
|
||||
return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
|
||||
}
|
||||
|
||||
int32_t
|
||||
ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
|
||||
int32_t count = pattern.countParts();
|
||||
int32_t msgStart;
|
||||
// Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
|
||||
// until ARG_LIMIT or end of choice-only pattern.
|
||||
// Ignore the first number and selector and start the loop on the first message.
|
||||
partIndex += 2;
|
||||
for (;;) {
|
||||
// Skip but remember the current sub-message.
|
||||
msgStart = partIndex;
|
||||
partIndex = pattern.getLimitPartIndex(partIndex);
|
||||
if (++partIndex >= count) {
|
||||
// Reached the end of the choice-only pattern.
|
||||
// Return with the last sub-message.
|
||||
break;
|
||||
}
|
||||
const MessagePattern::Part &part = pattern.getPart(partIndex++);
|
||||
UMessagePatternPartType type = part.getType();
|
||||
if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
|
||||
// Reached the end of the ChoiceFormat style.
|
||||
// Return with the last sub-message.
|
||||
break;
|
||||
}
|
||||
// part is an ARG_INT or ARG_DOUBLE
|
||||
U_ASSERT(MessagePattern::Part::hasNumericValue(type));
|
||||
double boundary = pattern.getNumericValue(part);
|
||||
// Fetch the ARG_SELECTOR character.
|
||||
int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
|
||||
UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
|
||||
if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
|
||||
// The number is in the interval between the previous boundary and the current one.
|
||||
// Return with the sub-message between them.
|
||||
// The !(a>b) and !(a>=b) comparisons are equivalent to
|
||||
// (a<=b) and (a<b) except they "catch" NaN.
|
||||
break;
|
||||
}
|
||||
}
|
||||
--i;
|
||||
if (i < 0) {
|
||||
i = 0;
|
||||
}
|
||||
// return either a formatted number, or a string
|
||||
appendTo += fChoiceFormats[i];
|
||||
return appendTo;
|
||||
return msgStart;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
@ -680,13 +475,15 @@ ChoiceFormat::format(const Formattable* objs,
|
|||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return appendTo;
|
||||
}
|
||||
if (msgPattern.countParts() == 0) {
|
||||
status = U_INVALID_STATE_ERROR;
|
||||
return appendTo;
|
||||
}
|
||||
|
||||
UnicodeString buffer;
|
||||
for (int32_t i = 0; i < cnt; i++) {
|
||||
double objDouble = objs[i].getDouble(status);
|
||||
if (U_SUCCESS(status)) {
|
||||
buffer.remove();
|
||||
appendTo += format(objDouble, buffer, pos);
|
||||
format(objDouble, appendTo, pos);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -710,31 +507,68 @@ ChoiceFormat::format(const Formattable& obj,
|
|||
void
|
||||
ChoiceFormat::parse(const UnicodeString& text,
|
||||
Formattable& result,
|
||||
ParsePosition& status) const
|
||||
ParsePosition& pos) const
|
||||
{
|
||||
result.setDouble(parseArgument(msgPattern, 0, text, pos));
|
||||
}
|
||||
|
||||
double
|
||||
ChoiceFormat::parseArgument(
|
||||
const MessagePattern &pattern, int32_t partIndex,
|
||||
const UnicodeString &source, ParsePosition &pos) {
|
||||
// find the best number (defined as the one with the longest parse)
|
||||
int32_t start = status.getIndex();
|
||||
int32_t start = pos.getIndex();
|
||||
int32_t furthest = start;
|
||||
double bestNumber = uprv_getNaN();
|
||||
double tempNumber = 0.0;
|
||||
for (int i = 0; i < fCount; ++i) {
|
||||
int32_t len = fChoiceFormats[i].length();
|
||||
if (text.compare(start, len, fChoiceFormats[i]) == 0) {
|
||||
status.setIndex(start + len);
|
||||
tempNumber = fChoiceLimits[i];
|
||||
if (status.getIndex() > furthest) {
|
||||
furthest = status.getIndex();
|
||||
int32_t count = pattern.countParts();
|
||||
while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
|
||||
tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
|
||||
partIndex += 2; // skip the numeric part and ignore the ARG_SELECTOR
|
||||
int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
|
||||
int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
|
||||
if (len >= 0) {
|
||||
int32_t newIndex = start + len;
|
||||
if (newIndex > furthest) {
|
||||
furthest = newIndex;
|
||||
bestNumber = tempNumber;
|
||||
if (furthest == text.length())
|
||||
if (furthest == source.length()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
partIndex = msgLimit + 1;
|
||||
}
|
||||
status.setIndex(furthest);
|
||||
if (status.getIndex() == start) {
|
||||
status.setErrorIndex(furthest);
|
||||
if (furthest == start) {
|
||||
pos.setErrorIndex(start);
|
||||
} else {
|
||||
pos.setIndex(furthest);
|
||||
}
|
||||
return bestNumber;
|
||||
}
|
||||
|
||||
int32_t
|
||||
ChoiceFormat::matchStringUntilLimitPart(
|
||||
const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
|
||||
const UnicodeString &source, int32_t sourceOffset) {
|
||||
int32_t matchingSourceLength = 0;
|
||||
const UnicodeString &msgString = pattern.getPatternString();
|
||||
int32_t prevIndex = pattern.getPart(partIndex).getLimit();
|
||||
for (;;) {
|
||||
const MessagePattern::Part &part = pattern.getPart(++partIndex);
|
||||
if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
|
||||
int32_t index = part.getIndex();
|
||||
int32_t length = index - prevIndex;
|
||||
if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
|
||||
return -1; // mismatch
|
||||
}
|
||||
matchingSourceLength += length;
|
||||
if (partIndex == limitPartIndex) {
|
||||
return matchingSourceLength;
|
||||
}
|
||||
prevIndex = part.getLimit(); // SKIP_SYNTAX
|
||||
}
|
||||
}
|
||||
result.setDouble(bestNumber);
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2009, International Business Machines Corporation and
|
||||
* Copyright (C) 2009-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*
|
||||
|
@ -12,238 +12,161 @@
|
|||
*******************************************************************************
|
||||
*/
|
||||
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/messagepattern.h"
|
||||
#include "unicode/plurfmt.h"
|
||||
#include "unicode/plurrule.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "cmemory.h"
|
||||
#include "messageimpl.h"
|
||||
#include "plurrule_impl.h"
|
||||
#include "uassert.h"
|
||||
#include "uhash.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
U_CDECL_BEGIN
|
||||
static void U_CALLCONV
|
||||
deleteHashStrings(void *obj) {
|
||||
delete (UnicodeString *)obj;
|
||||
}
|
||||
U_CDECL_END
|
||||
static const UChar OTHER_STRING[] = {
|
||||
0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other"
|
||||
};
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
|
||||
|
||||
#define MAX_KEYWORD_SIZE 30
|
||||
|
||||
PluralFormat::PluralFormat(UErrorCode& status) {
|
||||
init(NULL, Locale::getDefault(), status);
|
||||
PluralFormat::PluralFormat(UErrorCode& status)
|
||||
: locale(Locale::getDefault()),
|
||||
msgPattern(status),
|
||||
numberFormat(NULL),
|
||||
offset(0) {
|
||||
init(NULL, status);
|
||||
}
|
||||
|
||||
PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status) {
|
||||
init(NULL, loc, status);
|
||||
PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
|
||||
: locale(loc),
|
||||
msgPattern(status),
|
||||
numberFormat(NULL),
|
||||
offset(0) {
|
||||
init(NULL, status);
|
||||
}
|
||||
|
||||
PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status) {
|
||||
init(&rules, Locale::getDefault(), status);
|
||||
PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
|
||||
: locale(Locale::getDefault()),
|
||||
msgPattern(status),
|
||||
numberFormat(NULL),
|
||||
offset(0) {
|
||||
init(&rules, status);
|
||||
}
|
||||
|
||||
PluralFormat::PluralFormat(const Locale& loc, const PluralRules& rules, UErrorCode& status) {
|
||||
init(&rules, loc, status);
|
||||
PluralFormat::PluralFormat(const Locale& loc,
|
||||
const PluralRules& rules,
|
||||
UErrorCode& status)
|
||||
: locale(loc),
|
||||
msgPattern(status),
|
||||
numberFormat(NULL),
|
||||
offset(0) {
|
||||
init(&rules, status);
|
||||
}
|
||||
|
||||
PluralFormat::PluralFormat(const UnicodeString& pat, UErrorCode& status) {
|
||||
init(NULL, Locale::getDefault(), status);
|
||||
PluralFormat::PluralFormat(const UnicodeString& pat,
|
||||
UErrorCode& status)
|
||||
: locale(Locale::getDefault()),
|
||||
msgPattern(status),
|
||||
numberFormat(NULL),
|
||||
offset(0) {
|
||||
init(NULL, status);
|
||||
applyPattern(pat, status);
|
||||
}
|
||||
|
||||
PluralFormat::PluralFormat(const Locale& loc, const UnicodeString& pat, UErrorCode& status) {
|
||||
init(NULL, loc, status);
|
||||
PluralFormat::PluralFormat(const Locale& loc,
|
||||
const UnicodeString& pat,
|
||||
UErrorCode& status)
|
||||
: locale(loc),
|
||||
msgPattern(status),
|
||||
numberFormat(NULL),
|
||||
offset(0) {
|
||||
init(NULL, status);
|
||||
applyPattern(pat, status);
|
||||
}
|
||||
|
||||
PluralFormat::PluralFormat(const PluralRules& rules, const UnicodeString& pat, UErrorCode& status) {
|
||||
init(&rules, Locale::getDefault(), status);
|
||||
PluralFormat::PluralFormat(const PluralRules& rules,
|
||||
const UnicodeString& pat,
|
||||
UErrorCode& status)
|
||||
: locale(Locale::getDefault()),
|
||||
msgPattern(status),
|
||||
numberFormat(NULL),
|
||||
offset(0) {
|
||||
init(&rules, status);
|
||||
applyPattern(pat, status);
|
||||
}
|
||||
|
||||
PluralFormat::PluralFormat(const Locale& loc, const PluralRules& rules, const UnicodeString& pat, UErrorCode& status) {
|
||||
init(&rules, loc, status);
|
||||
PluralFormat::PluralFormat(const Locale& loc,
|
||||
const PluralRules& rules,
|
||||
const UnicodeString& pat,
|
||||
UErrorCode& status)
|
||||
: locale(loc),
|
||||
msgPattern(status),
|
||||
numberFormat(NULL),
|
||||
offset(0) {
|
||||
init(&rules, status);
|
||||
applyPattern(pat, status);
|
||||
}
|
||||
|
||||
PluralFormat::PluralFormat(const PluralFormat& other) : Format(other) {
|
||||
PluralFormat::PluralFormat(const PluralFormat& other)
|
||||
: Format(other),
|
||||
locale(other.locale),
|
||||
msgPattern(other.msgPattern),
|
||||
numberFormat(NULL),
|
||||
offset(other.offset) {
|
||||
copyObjects(other);
|
||||
}
|
||||
|
||||
void
|
||||
PluralFormat::copyObjects(const PluralFormat& other) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
locale = other.locale;
|
||||
pluralRules = other.pluralRules->clone();
|
||||
pattern = other.pattern;
|
||||
copyHashtable(other.fParsedValuesHash, status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete pluralRules;
|
||||
pluralRules = NULL;
|
||||
return;
|
||||
if (other.numberFormat == NULL) {
|
||||
numberFormat = NumberFormat::createInstance(locale, status);
|
||||
} else {
|
||||
numberFormat = (NumberFormat*)other.numberFormat->clone();
|
||||
}
|
||||
numberFormat=NumberFormat::createInstance(locale, status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete pluralRules;
|
||||
pluralRules = NULL;
|
||||
delete fParsedValuesHash;
|
||||
fParsedValuesHash = NULL;
|
||||
return;
|
||||
if (other.pluralRulesWrapper.pluralRules == NULL) {
|
||||
pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
|
||||
} else {
|
||||
pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
|
||||
}
|
||||
replacedNumberFormat=other.replacedNumberFormat;
|
||||
}
|
||||
|
||||
|
||||
PluralFormat::~PluralFormat() {
|
||||
delete pluralRules;
|
||||
delete fParsedValuesHash;
|
||||
delete numberFormat;
|
||||
}
|
||||
|
||||
void
|
||||
PluralFormat::init(const PluralRules* rules, const Locale& curLocale, UErrorCode& status) {
|
||||
PluralFormat::init(const PluralRules* rules, UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
locale = curLocale;
|
||||
if ( rules==NULL) {
|
||||
pluralRules = PluralRules::forLocale(locale, status);
|
||||
if (U_FAILURE(status)) {
|
||||
|
||||
if (rules==NULL) {
|
||||
pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
|
||||
} else {
|
||||
pluralRulesWrapper.pluralRules = rules->clone();
|
||||
if (pluralRulesWrapper.pluralRules == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
}
|
||||
else {
|
||||
pluralRules = rules->clone();
|
||||
}
|
||||
fParsedValuesHash=NULL;
|
||||
pattern.remove();
|
||||
numberFormat= NumberFormat::createInstance(curLocale, status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete pluralRules;
|
||||
pluralRules = NULL;
|
||||
return;
|
||||
}
|
||||
replacedNumberFormat=NULL;
|
||||
|
||||
numberFormat= NumberFormat::createInstance(locale, status);
|
||||
}
|
||||
|
||||
void
|
||||
PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
|
||||
msgPattern.parsePluralStyle(newPattern, NULL, status);
|
||||
if (U_FAILURE(status)) {
|
||||
msgPattern.clear();
|
||||
offset = 0;
|
||||
return;
|
||||
}
|
||||
this->pattern = newPattern;
|
||||
UnicodeString token;
|
||||
int32_t braceCount=0;
|
||||
fmtToken type;
|
||||
UBool spaceIncluded=FALSE;
|
||||
|
||||
if (fParsedValuesHash==NULL) {
|
||||
fParsedValuesHash = new Hashtable(TRUE, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
fParsedValuesHash->setValueDeleter(deleteHashStrings);
|
||||
}
|
||||
|
||||
UBool getKeyword=TRUE;
|
||||
UnicodeString hashKeyword;
|
||||
UnicodeString *hashPattern;
|
||||
|
||||
for (int32_t i=0; i<pattern.length(); ++i) {
|
||||
UChar ch=pattern.charAt(i);
|
||||
|
||||
if ( !inRange(ch, type) ) {
|
||||
if (getKeyword) {
|
||||
status = U_ILLEGAL_CHARACTER;
|
||||
return;
|
||||
}
|
||||
else {
|
||||
token += ch;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
switch (type) {
|
||||
case tSpace:
|
||||
if (token.length()==0) {
|
||||
continue;
|
||||
}
|
||||
if (getKeyword) {
|
||||
// space after keyword
|
||||
spaceIncluded = TRUE;
|
||||
}
|
||||
else {
|
||||
token += ch;
|
||||
}
|
||||
break;
|
||||
case tLeftBrace:
|
||||
if ( getKeyword ) {
|
||||
if (fParsedValuesHash->get(token)!= NULL) {
|
||||
status = U_DUPLICATE_KEYWORD;
|
||||
return;
|
||||
}
|
||||
if (token.length()==0) {
|
||||
status = U_PATTERN_SYNTAX_ERROR;
|
||||
return;
|
||||
}
|
||||
if (!pluralRules->isKeyword(token)) {
|
||||
status = U_UNDEFINED_KEYWORD;
|
||||
return;
|
||||
}
|
||||
hashKeyword = token;
|
||||
getKeyword = FALSE;
|
||||
token.remove();
|
||||
}
|
||||
else {
|
||||
if (braceCount==0) {
|
||||
status = U_UNEXPECTED_TOKEN;
|
||||
return;
|
||||
}
|
||||
else {
|
||||
token += ch;
|
||||
}
|
||||
}
|
||||
braceCount++;
|
||||
spaceIncluded = FALSE;
|
||||
break;
|
||||
case tRightBrace:
|
||||
if ( getKeyword ) {
|
||||
status = U_UNEXPECTED_TOKEN;
|
||||
return;
|
||||
}
|
||||
else {
|
||||
hashPattern = new UnicodeString(token);
|
||||
fParsedValuesHash->put(hashKeyword, hashPattern, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
braceCount--;
|
||||
if ( braceCount==0 ) {
|
||||
getKeyword=TRUE;
|
||||
hashKeyword.remove();
|
||||
hashPattern=NULL;
|
||||
token.remove();
|
||||
}
|
||||
else {
|
||||
token += ch;
|
||||
}
|
||||
}
|
||||
spaceIncluded = FALSE;
|
||||
break;
|
||||
case tLetter:
|
||||
case tNumberSign:
|
||||
if (spaceIncluded) {
|
||||
status = U_PATTERN_SYNTAX_ERROR;
|
||||
return;
|
||||
}
|
||||
default:
|
||||
token+=ch;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( checkSufficientDefinition() ) {
|
||||
return;
|
||||
}
|
||||
else {
|
||||
status = U_DEFAULT_KEYWORD_MISSING;
|
||||
return;
|
||||
}
|
||||
offset = msgPattern.getPluralOffset(0);
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
|
@ -253,20 +176,10 @@ PluralFormat::format(const Formattable& obj,
|
|||
UErrorCode& status) const
|
||||
{
|
||||
if (U_FAILURE(status)) return appendTo;
|
||||
int32_t number;
|
||||
|
||||
switch (obj.getType())
|
||||
{
|
||||
case Formattable::kDouble:
|
||||
return format((int32_t)obj.getDouble(), appendTo, pos, status);
|
||||
break;
|
||||
case Formattable::kLong:
|
||||
number = (int32_t)obj.getLong();
|
||||
return format(number, appendTo, pos, status);
|
||||
break;
|
||||
case Formattable::kInt64:
|
||||
return format((int32_t)obj.getInt64(), appendTo, pos, status);
|
||||
default:
|
||||
|
||||
if (obj.isNumeric()) {
|
||||
return format(obj.getDouble(), appendTo, pos, status);
|
||||
} else {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return appendTo;
|
||||
}
|
||||
|
@ -274,30 +187,22 @@ PluralFormat::format(const Formattable& obj,
|
|||
|
||||
UnicodeString
|
||||
PluralFormat::format(int32_t number, UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) {
|
||||
return UnicodeString();
|
||||
}
|
||||
FieldPosition fpos(0);
|
||||
UnicodeString result;
|
||||
|
||||
return format(number, result, fpos, status);
|
||||
}
|
||||
|
||||
UnicodeString
|
||||
PluralFormat::format(double number, UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) {
|
||||
return UnicodeString();
|
||||
}
|
||||
FieldPosition fpos(0);
|
||||
UnicodeString result;
|
||||
|
||||
return format(number, result, fpos, status);
|
||||
}
|
||||
|
||||
|
||||
UnicodeString&
|
||||
PluralFormat::format(int32_t number,
|
||||
UnicodeString& appendTo,
|
||||
UnicodeString& appendTo,
|
||||
FieldPosition& pos,
|
||||
UErrorCode& status) const {
|
||||
return format((double)number, appendTo, pos, status);
|
||||
|
@ -305,101 +210,82 @@ PluralFormat::format(int32_t number,
|
|||
|
||||
UnicodeString&
|
||||
PluralFormat::format(double number,
|
||||
UnicodeString& appendTo,
|
||||
UnicodeString& appendTo,
|
||||
FieldPosition& pos,
|
||||
UErrorCode& /*status*/) const {
|
||||
|
||||
if (fParsedValuesHash==NULL) {
|
||||
if ( replacedNumberFormat== NULL ) {
|
||||
return numberFormat->format(number, appendTo, pos);
|
||||
}
|
||||
else {
|
||||
replacedNumberFormat->format(number, appendTo, pos);
|
||||
UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) {
|
||||
return appendTo;
|
||||
}
|
||||
if (msgPattern.countParts() == 0) {
|
||||
return numberFormat->format(number, appendTo, pos);
|
||||
}
|
||||
// Get the appropriate sub-message.
|
||||
int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, number, status);
|
||||
// Replace syntactic # signs in the top level of this sub-message
|
||||
// (not in nested arguments) with the formatted number-offset.
|
||||
const UnicodeString& pattern = msgPattern.getPatternString();
|
||||
number -= offset;
|
||||
int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
|
||||
for (;;) {
|
||||
const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
|
||||
const UMessagePatternPartType type = part.getType();
|
||||
int32_t index = part.getIndex();
|
||||
if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
|
||||
return appendTo.append(pattern, prevIndex, index - prevIndex);
|
||||
} else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
|
||||
(type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
|
||||
appendTo.append(pattern, prevIndex, index - prevIndex);
|
||||
if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
|
||||
numberFormat->format(number, appendTo);
|
||||
}
|
||||
prevIndex = part.getLimit();
|
||||
} else if (type == UMSGPAT_PART_TYPE_ARG_START) {
|
||||
appendTo.append(pattern, prevIndex, index - prevIndex);
|
||||
prevIndex = index;
|
||||
partIndex = msgPattern.getLimitPartIndex(partIndex);
|
||||
index = msgPattern.getPart(partIndex).getLimit();
|
||||
MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
|
||||
prevIndex = index;
|
||||
}
|
||||
}
|
||||
UnicodeString selectedRule = pluralRules->select(number);
|
||||
UnicodeString *selectedPattern = (UnicodeString *)fParsedValuesHash->get(selectedRule);
|
||||
if (selectedPattern==NULL) {
|
||||
selectedPattern = (UnicodeString *)fParsedValuesHash->get(pluralRules->getKeywordOther());
|
||||
}
|
||||
appendTo = insertFormattedNumber(number, *selectedPattern, appendTo, pos);
|
||||
|
||||
return appendTo;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
PluralFormat::toPattern(UnicodeString& appendTo) {
|
||||
appendTo+= pattern;
|
||||
if (0 == msgPattern.countParts()) {
|
||||
appendTo.setToBogus();
|
||||
} else {
|
||||
appendTo.append(msgPattern.getPatternString());
|
||||
}
|
||||
return appendTo;
|
||||
}
|
||||
|
||||
UBool
|
||||
PluralFormat::inRange(UChar ch, fmtToken& type) {
|
||||
if ((ch>=CAP_A) && (ch<=CAP_Z)) {
|
||||
// we assume all characters are in lower case already.
|
||||
return FALSE;
|
||||
}
|
||||
if ((ch>=LOW_A) && (ch<=LOW_Z)) {
|
||||
type = tLetter;
|
||||
return TRUE;
|
||||
}
|
||||
switch (ch) {
|
||||
case LEFTBRACE:
|
||||
type = tLeftBrace;
|
||||
return TRUE;
|
||||
case SPACE:
|
||||
type = tSpace;
|
||||
return TRUE;
|
||||
case RIGHTBRACE:
|
||||
type = tRightBrace;
|
||||
return TRUE;
|
||||
case NUMBER_SIGN:
|
||||
type = tNumberSign;
|
||||
return TRUE;
|
||||
default :
|
||||
type = none;
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
PluralFormat::checkSufficientDefinition() {
|
||||
// Check that at least the default rule is defined.
|
||||
if (fParsedValuesHash==NULL) return FALSE;
|
||||
if (fParsedValuesHash->get(pluralRules->getKeywordOther()) == NULL) {
|
||||
return FALSE;
|
||||
}
|
||||
else {
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
if (pluralRules!=NULL) {
|
||||
delete pluralRules;
|
||||
pluralRules=NULL;
|
||||
}
|
||||
if (fParsedValuesHash!= NULL) {
|
||||
delete fParsedValuesHash;
|
||||
fParsedValuesHash = NULL;
|
||||
}
|
||||
if (numberFormat!=NULL) {
|
||||
delete numberFormat;
|
||||
numberFormat = NULL;
|
||||
replacedNumberFormat=NULL;
|
||||
}
|
||||
init(NULL, loc, status);
|
||||
locale = loc;
|
||||
msgPattern.clear();
|
||||
delete numberFormat;
|
||||
offset = 0;
|
||||
numberFormat = NULL;
|
||||
pluralRulesWrapper.reset();
|
||||
init(NULL, status);
|
||||
}
|
||||
|
||||
void
|
||||
PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& /*status*/) {
|
||||
// TODO: The copy constructor and assignment op of NumberFormat class are protected.
|
||||
// create a pointer as the workaround.
|
||||
replacedNumberFormat = (NumberFormat *)format;
|
||||
PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
NumberFormat* nf = (NumberFormat*)format->clone();
|
||||
if (nf != NULL) {
|
||||
delete numberFormat;
|
||||
numberFormat = nf;
|
||||
} else {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
Format*
|
||||
|
@ -408,34 +294,14 @@ PluralFormat::clone() const
|
|||
return new PluralFormat(*this);
|
||||
}
|
||||
|
||||
|
||||
PluralFormat&
|
||||
PluralFormat::operator=(const PluralFormat& other) {
|
||||
if (this != &other) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
delete pluralRules;
|
||||
delete fParsedValuesHash;
|
||||
delete numberFormat;
|
||||
locale = other.locale;
|
||||
pluralRules = other.pluralRules->clone();
|
||||
pattern = other.pattern;
|
||||
copyHashtable(other.fParsedValuesHash, status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete pluralRules;
|
||||
pluralRules = NULL;
|
||||
fParsedValuesHash = NULL;
|
||||
numberFormat = NULL;
|
||||
return *this;
|
||||
}
|
||||
numberFormat=NumberFormat::createInstance(locale, status);
|
||||
if (U_FAILURE(status)) {
|
||||
delete pluralRules;
|
||||
delete fParsedValuesHash;
|
||||
pluralRules = NULL;
|
||||
fParsedValuesHash = NULL;
|
||||
numberFormat = NULL;
|
||||
return *this;
|
||||
}
|
||||
replacedNumberFormat=other.replacedNumberFormat;
|
||||
msgPattern = other.msgPattern;
|
||||
offset = other.offset;
|
||||
copyObjects(other);
|
||||
}
|
||||
|
||||
return *this;
|
||||
|
@ -443,13 +309,21 @@ PluralFormat::operator=(const PluralFormat& other) {
|
|||
|
||||
UBool
|
||||
PluralFormat::operator==(const Format& other) const {
|
||||
// This protected comparison operator should only be called by subclasses
|
||||
// which have confirmed that the other object being compared against is
|
||||
// an instance of a sublcass of PluralFormat. THIS IS IMPORTANT.
|
||||
// Format::operator== guarantees that this cast is safe
|
||||
PluralFormat* fmt = (PluralFormat*)&other;
|
||||
return ((*pluralRules == *(fmt->pluralRules)) &&
|
||||
(*numberFormat == *(fmt->numberFormat)));
|
||||
if (this == &other) {
|
||||
return TRUE;
|
||||
}
|
||||
if (!Format::operator==(other)) {
|
||||
return FALSE;
|
||||
}
|
||||
const PluralFormat& o = (const PluralFormat&)other;
|
||||
return
|
||||
locale == o.locale &&
|
||||
msgPattern == o.msgPattern && // implies same offset
|
||||
(numberFormat == NULL) == (o.numberFormat == NULL) &&
|
||||
(numberFormat == NULL || *numberFormat == *o.numberFormat) &&
|
||||
(pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
|
||||
(pluralRulesWrapper.pluralRules == NULL ||
|
||||
*pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
|
||||
}
|
||||
|
||||
UBool
|
||||
|
@ -460,72 +334,112 @@ PluralFormat::operator!=(const Format& other) const {
|
|||
void
|
||||
PluralFormat::parseObject(const UnicodeString& /*source*/,
|
||||
Formattable& /*result*/,
|
||||
ParsePosition& /*pos*/) const
|
||||
ParsePosition& pos) const
|
||||
{
|
||||
// TODO: not yet supported in icu4j and icu4c
|
||||
// Parsing not supported.
|
||||
pos.setErrorIndex(pos.getIndex());
|
||||
}
|
||||
|
||||
UnicodeString
|
||||
PluralFormat::insertFormattedNumber(double number,
|
||||
UnicodeString& message,
|
||||
UnicodeString& appendTo,
|
||||
FieldPosition& pos) const {
|
||||
UnicodeString result;
|
||||
int32_t braceStack=0;
|
||||
int32_t startIndex=0;
|
||||
|
||||
if (message.length()==0) {
|
||||
return result;
|
||||
int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
|
||||
const PluralSelector& selector, double number, UErrorCode& ec) {
|
||||
if (U_FAILURE(ec)) {
|
||||
return 0;
|
||||
}
|
||||
appendTo = numberFormat->format(number, appendTo, pos);
|
||||
for(int32_t i=0; i<message.length(); ++i) {
|
||||
switch(message.charAt(i)) {
|
||||
case LEFTBRACE:
|
||||
++braceStack;
|
||||
int32_t count=pattern.countParts();
|
||||
double offset;
|
||||
const MessagePattern::Part* part=&pattern.getPart(partIndex);
|
||||
if (MessagePattern::Part::hasNumericValue(part->getType())) {
|
||||
offset=pattern.getNumericValue(*part);
|
||||
++partIndex;
|
||||
} else {
|
||||
offset=0;
|
||||
}
|
||||
// The keyword is empty until we need to match against non-explicit, not-"other" value.
|
||||
// Then we get the keyword from the selector.
|
||||
// (In other words, we never call the selector if we match against an explicit value,
|
||||
// or if the only non-explicit keyword is "other".)
|
||||
UnicodeString keyword;
|
||||
UnicodeString other(FALSE, OTHER_STRING, 5);
|
||||
// When we find a match, we set msgStart>0 and also set this boolean to true
|
||||
// to avoid matching the keyword again (duplicates are allowed)
|
||||
// while we continue to look for an explicit-value match.
|
||||
UBool haveKeywordMatch=FALSE;
|
||||
// msgStart is 0 until we find any appropriate sub-message.
|
||||
// We remember the first "other" sub-message if we have not seen any
|
||||
// appropriate sub-message before.
|
||||
// We remember the first matching-keyword sub-message if we have not seen
|
||||
// one of those before.
|
||||
// (The parser allows [does not check for] duplicate keywords.
|
||||
// We just have to make sure to take the first one.)
|
||||
// We avoid matching the keyword twice by also setting haveKeywordMatch=true
|
||||
// at the first keyword match.
|
||||
// We keep going until we find an explicit-value match or reach the end of the plural style.
|
||||
int32_t msgStart=0;
|
||||
// Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
|
||||
// until ARG_LIMIT or end of plural-only pattern.
|
||||
do {
|
||||
part=&pattern.getPart(partIndex++);
|
||||
const UMessagePatternPartType type = part->getType();
|
||||
if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
|
||||
break;
|
||||
case RIGHTBRACE:
|
||||
--braceStack;
|
||||
break;
|
||||
case NUMBER_SIGN:
|
||||
if (braceStack==0) {
|
||||
result += UnicodeString(message, startIndex, i);
|
||||
result += appendTo;
|
||||
startIndex = i + 1;
|
||||
}
|
||||
U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
|
||||
// part is an ARG_SELECTOR followed by an optional explicit value, and then a message
|
||||
if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
|
||||
// explicit value like "=2"
|
||||
part=&pattern.getPart(partIndex++);
|
||||
if(number==pattern.getNumericValue(*part)) {
|
||||
// matches explicit value
|
||||
return partIndex;
|
||||
}
|
||||
} else if(!haveKeywordMatch) {
|
||||
// plural keyword like "few" or "other"
|
||||
// Compare "other" first and call the selector if this is not "other".
|
||||
if(pattern.partSubstringMatches(*part, other)) {
|
||||
if(msgStart==0) {
|
||||
msgStart=partIndex;
|
||||
if(0 == keyword.compare(other)) {
|
||||
// This is the first "other" sub-message,
|
||||
// and the selected keyword is also "other".
|
||||
// Do not match "other" again.
|
||||
haveKeywordMatch=TRUE;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(keyword.isEmpty()) {
|
||||
keyword=selector.select(number-offset, ec);
|
||||
if(msgStart!=0 && (0 == keyword.compare(other))) {
|
||||
// We have already seen an "other" sub-message.
|
||||
// Do not match "other" again.
|
||||
haveKeywordMatch=TRUE;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if(pattern.partSubstringMatches(*part, keyword)) {
|
||||
// keyword matches
|
||||
msgStart=partIndex;
|
||||
// Do not match this keyword again.
|
||||
haveKeywordMatch=TRUE;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( startIndex < message.length() ) {
|
||||
result += UnicodeString(message, startIndex, message.length()-startIndex);
|
||||
}
|
||||
appendTo = result;
|
||||
return result;
|
||||
partIndex=pattern.getLimitPartIndex(partIndex);
|
||||
} while(++partIndex<count);
|
||||
return msgStart;
|
||||
}
|
||||
|
||||
void
|
||||
PluralFormat::copyHashtable(Hashtable *other, UErrorCode& status) {
|
||||
if (other == NULL || U_FAILURE(status)) {
|
||||
fParsedValuesHash = NULL;
|
||||
return;
|
||||
}
|
||||
fParsedValuesHash = new Hashtable(TRUE, status);
|
||||
if(U_FAILURE(status)){
|
||||
return;
|
||||
}
|
||||
fParsedValuesHash->setValueDeleter(deleteHashStrings);
|
||||
int32_t pos = -1;
|
||||
const UHashElement* elem = NULL;
|
||||
// walk through the hash table and create a deep clone
|
||||
while((elem = other->nextElement(pos))!= NULL){
|
||||
const UHashTok otherKeyTok = elem->key;
|
||||
UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer;
|
||||
const UHashTok otherKeyToVal = elem->value;
|
||||
UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer;
|
||||
fParsedValuesHash->put(*otherKey, new UnicodeString(*otherValue), status);
|
||||
if(U_FAILURE(status)){
|
||||
return;
|
||||
}
|
||||
}
|
||||
PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
|
||||
delete pluralRules;
|
||||
}
|
||||
|
||||
UnicodeString PluralFormat::PluralSelectorAdapter::select(double number,
|
||||
UErrorCode& /*ec*/) const {
|
||||
return pluralRules->select(number);
|
||||
}
|
||||
|
||||
void PluralFormat::PluralSelectorAdapter::reset() {
|
||||
delete pluralRules;
|
||||
pluralRules = NULL;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -13,7 +13,6 @@
|
|||
*/
|
||||
|
||||
|
||||
#include "unicode/uniset.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/plurrule.h"
|
||||
|
@ -21,6 +20,7 @@
|
|||
#include "cstring.h"
|
||||
#include "hash.h"
|
||||
#include "mutex.h"
|
||||
#include "patternprops.h"
|
||||
#include "plurrule_impl.h"
|
||||
#include "putilimp.h"
|
||||
#include "ucln_in.h"
|
||||
|
@ -1159,16 +1159,9 @@ RuleChain::isKeyword(const UnicodeString& keywordParam) const {
|
|||
|
||||
|
||||
RuleParser::RuleParser() {
|
||||
UErrorCode err=U_ZERO_ERROR;
|
||||
const UnicodeString idStart=UNICODE_STRING_SIMPLE("[[a-z]]");
|
||||
const UnicodeString idContinue=UNICODE_STRING_SIMPLE("[[a-z][A-Z][_][0-9]]");
|
||||
idStartFilter = new UnicodeSet(idStart, err);
|
||||
idContinueFilter = new UnicodeSet(idContinue, err);
|
||||
}
|
||||
|
||||
RuleParser::~RuleParser() {
|
||||
delete idStartFilter;
|
||||
delete idContinueFilter;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1413,21 +1406,7 @@ RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCod
|
|||
|
||||
UBool
|
||||
RuleParser::isValidKeyword(const UnicodeString& token) {
|
||||
if ( token.length()==0 ) {
|
||||
return FALSE;
|
||||
}
|
||||
if ( idStartFilter->contains(token.charAt(0) )==TRUE ) {
|
||||
int32_t i;
|
||||
for (i=1; i< token.length(); i++) {
|
||||
if (idContinueFilter->contains(token.charAt(i))== FALSE) {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
else {
|
||||
return FALSE;
|
||||
}
|
||||
return PatternProps::isIdentifier(token.getBuffer(), token.length());
|
||||
}
|
||||
|
||||
PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) :
|
||||
|
|
|
@ -13,10 +13,7 @@
|
|||
#ifndef PLURRULE_IMPLE
|
||||
#define PLURRULE_IMPLE
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Defines rules for mapping positive long values onto a small set of keywords.
|
||||
*/
|
||||
// Internal definitions for the PluralRules implementation.
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
|
@ -89,8 +86,6 @@ U_NAMESPACE_BEGIN
|
|||
#define PLURAL_RANGE_HIGH 0x7fffffff;
|
||||
|
||||
|
||||
class UnicodeSet;
|
||||
|
||||
typedef enum PluralKey {
|
||||
pZero,
|
||||
pOne,
|
||||
|
@ -138,9 +133,6 @@ public:
|
|||
tokenType& type, UErrorCode &status);
|
||||
void checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status);
|
||||
private:
|
||||
UnicodeSet *idStartFilter;
|
||||
UnicodeSet *idContinueFilter;
|
||||
|
||||
void getKeyType(const UnicodeString& token, tokenType& type, UErrorCode &status);
|
||||
UBool inRange(UChar ch, tokenType& type);
|
||||
UBool isValidKeyword(const UnicodeString& token);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2010, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
* Copyright (C) 2010 , Yahoo! Inc.
|
||||
********************************************************************
|
||||
|
@ -16,76 +16,41 @@
|
|||
|
||||
#include <typeinfo> // for 'typeid' to work
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/umsg.h"
|
||||
#include "unicode/messagepattern.h"
|
||||
#include "unicode/rbnf.h"
|
||||
#include "unicode/selfmt.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "unicode/umsg.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utypes.h"
|
||||
#include "cmemory.h"
|
||||
#include "util.h"
|
||||
#include "messageimpl.h"
|
||||
#include "patternprops.h"
|
||||
#include "selfmtimpl.h"
|
||||
#include "uassert.h"
|
||||
#include "ustrfmt.h"
|
||||
#include "util.h"
|
||||
#include "uvector.h"
|
||||
|
||||
#include "unicode/selfmt.h"
|
||||
#include "selfmtimpl.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SelectFormat)
|
||||
|
||||
#define MAX_KEYWORD_SIZE 30
|
||||
static const UChar SELECT_KEYWORD_OTHER[] = {LOW_O, LOW_T, LOW_H, LOW_E, LOW_R, 0};
|
||||
|
||||
SelectFormat::SelectFormat(const UnicodeString& pat, UErrorCode& status) : parsedValuesHash(NULL) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
initHashTable(status);
|
||||
SelectFormat::SelectFormat(const UnicodeString& pat,
|
||||
UErrorCode& status) : msgPattern(status) {
|
||||
applyPattern(pat, status);
|
||||
}
|
||||
|
||||
SelectFormat::SelectFormat(const SelectFormat& other) : Format(other), parsedValuesHash(NULL) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
pattern = other.pattern;
|
||||
copyHashtable(other.parsedValuesHash, status);
|
||||
SelectFormat::SelectFormat(const SelectFormat& other) : Format(other),
|
||||
msgPattern(other.msgPattern) {
|
||||
}
|
||||
|
||||
SelectFormat::~SelectFormat() {
|
||||
cleanHashTable();
|
||||
}
|
||||
|
||||
void SelectFormat::initHashTable(UErrorCode &status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
// has inited
|
||||
if (parsedValuesHash != NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
parsedValuesHash = new Hashtable(TRUE, status);
|
||||
if (U_FAILURE(status)) {
|
||||
cleanHashTable();
|
||||
return;
|
||||
} else {
|
||||
if (parsedValuesHash == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return;
|
||||
}
|
||||
}
|
||||
// to use hashtable->equals(), must set Value Compartor.
|
||||
parsedValuesHash->setValueComparator(uhash_compareCaselessUnicodeString);
|
||||
}
|
||||
|
||||
void SelectFormat::cleanHashTable() {
|
||||
if (parsedValuesHash != NULL) {
|
||||
delete parsedValuesHash;
|
||||
parsedValuesHash = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -94,164 +59,10 @@ SelectFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status)
|
|||
return;
|
||||
}
|
||||
|
||||
pattern = newPattern;
|
||||
enum State{ startState, keywordState, pastKeywordState, phraseState};
|
||||
|
||||
//Initialization
|
||||
UnicodeString keyword ;
|
||||
UnicodeString phrase ;
|
||||
UnicodeString* ptrPhrase ;
|
||||
int32_t braceCount = 0;
|
||||
|
||||
if (parsedValuesHash == NULL) {
|
||||
initHashTable(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
msgPattern.parseSelectStyle(newPattern, NULL, status);
|
||||
if (U_FAILURE(status)) {
|
||||
msgPattern.clear();
|
||||
}
|
||||
parsedValuesHash->removeAll();
|
||||
parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString);
|
||||
|
||||
//Process the state machine
|
||||
State state = startState;
|
||||
for (int32_t i = 0; i < pattern.length(); ++i) {
|
||||
//Get the character and check its type
|
||||
UChar ch = pattern.charAt(i);
|
||||
CharacterClass type = classifyCharacter(ch);
|
||||
|
||||
//Allow any character in phrase but nowhere else
|
||||
if ( type == tOther ) {
|
||||
if ( state == phraseState ){
|
||||
phrase += ch;
|
||||
continue;
|
||||
}else {
|
||||
status = U_PATTERN_SYNTAX_ERROR;
|
||||
cleanHashTable();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
//Process the state machine
|
||||
switch (state) {
|
||||
//At the start of pattern
|
||||
case startState:
|
||||
switch (type) {
|
||||
case tSpace:
|
||||
break;
|
||||
case tStartKeyword:
|
||||
state = keywordState;
|
||||
keyword += ch;
|
||||
break;
|
||||
//If anything else is encountered, it's a syntax error
|
||||
default:
|
||||
status = U_PATTERN_SYNTAX_ERROR;
|
||||
cleanHashTable();
|
||||
return;
|
||||
}//end of switch(type)
|
||||
break;
|
||||
|
||||
//Handle the keyword state
|
||||
case keywordState:
|
||||
switch (type) {
|
||||
case tSpace:
|
||||
state = pastKeywordState;
|
||||
break;
|
||||
case tStartKeyword:
|
||||
case tContinueKeyword:
|
||||
keyword += ch;
|
||||
break;
|
||||
case tLeftBrace:
|
||||
state = phraseState;
|
||||
break;
|
||||
//If anything else is encountered, it's a syntax error
|
||||
default:
|
||||
status = U_PATTERN_SYNTAX_ERROR;
|
||||
cleanHashTable();
|
||||
return;
|
||||
}//end of switch(type)
|
||||
break;
|
||||
|
||||
//Handle the pastkeyword state
|
||||
case pastKeywordState:
|
||||
switch (type) {
|
||||
case tSpace:
|
||||
break;
|
||||
case tLeftBrace:
|
||||
state = phraseState;
|
||||
break;
|
||||
//If anything else is encountered, it's a syntax error
|
||||
default:
|
||||
status = U_PATTERN_SYNTAX_ERROR;
|
||||
cleanHashTable();
|
||||
return;
|
||||
}//end of switch(type)
|
||||
break;
|
||||
|
||||
//Handle the phrase state
|
||||
case phraseState:
|
||||
switch (type) {
|
||||
case tLeftBrace:
|
||||
braceCount++;
|
||||
phrase += ch;
|
||||
break;
|
||||
case tRightBrace:
|
||||
//Matching keyword, phrase pair found
|
||||
if (braceCount == 0){
|
||||
//Check validity of keyword
|
||||
if (parsedValuesHash->get(keyword) != NULL) {
|
||||
status = U_DUPLICATE_KEYWORD;
|
||||
cleanHashTable();
|
||||
return;
|
||||
}
|
||||
if (keyword.length() == 0) {
|
||||
status = U_PATTERN_SYNTAX_ERROR;
|
||||
cleanHashTable();
|
||||
return;
|
||||
}
|
||||
|
||||
//Store the keyword, phrase pair in hashTable
|
||||
ptrPhrase = new UnicodeString(phrase);
|
||||
parsedValuesHash->put( keyword, ptrPhrase, status);
|
||||
|
||||
//Reinitialize
|
||||
keyword.remove();
|
||||
phrase.remove();
|
||||
ptrPhrase = NULL;
|
||||
state = startState;
|
||||
}
|
||||
|
||||
if (braceCount > 0){
|
||||
braceCount-- ;
|
||||
phrase += ch;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
phrase += ch;
|
||||
}//end of switch(type)
|
||||
break;
|
||||
|
||||
//Handle the default case of switch(state)
|
||||
default:
|
||||
status = U_PATTERN_SYNTAX_ERROR;
|
||||
cleanHashTable();
|
||||
return;
|
||||
|
||||
}//end of switch(state)
|
||||
}
|
||||
|
||||
//Check if the state machine is back to startState
|
||||
if ( state != startState){
|
||||
status = U_PATTERN_SYNTAX_ERROR;
|
||||
cleanHashTable();
|
||||
return;
|
||||
}
|
||||
|
||||
//Check if "other" keyword is present
|
||||
if ( !checkSufficientDefinition() ) {
|
||||
status = U_DEFAULT_KEYWORD_MISSING;
|
||||
cleanHashTable();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
|
@ -260,14 +71,13 @@ SelectFormat::format(const Formattable& obj,
|
|||
FieldPosition& pos,
|
||||
UErrorCode& status) const
|
||||
{
|
||||
switch (obj.getType())
|
||||
{
|
||||
case Formattable::kString:
|
||||
return format(obj.getString(), appendTo, pos, status);
|
||||
default:
|
||||
if( U_SUCCESS(status) ){
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
return appendTo;
|
||||
}
|
||||
if (obj.getType() == Formattable::kString) {
|
||||
return format(obj.getString(status), appendTo, pos, status);
|
||||
} else {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return appendTo;
|
||||
}
|
||||
}
|
||||
|
@ -277,85 +87,66 @@ SelectFormat::format(const UnicodeString& keyword,
|
|||
UnicodeString& appendTo,
|
||||
FieldPosition& /*pos */,
|
||||
UErrorCode& status) const {
|
||||
|
||||
if (U_FAILURE(status)) return appendTo;
|
||||
|
||||
if (parsedValuesHash == NULL) {
|
||||
status = U_INVALID_FORMAT_ERROR;
|
||||
if (U_FAILURE(status)) {
|
||||
return appendTo;
|
||||
}
|
||||
|
||||
//Check for the validity of the keyword
|
||||
if ( !checkValidKeyword(keyword) ){
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
// Check for the validity of the keyword
|
||||
if (!PatternProps::isIdentifier(keyword.getBuffer(), keyword.length())) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR; // Invalid formatting argument.
|
||||
}
|
||||
if (msgPattern.countParts() == 0) {
|
||||
status = U_INVALID_STATE_ERROR;
|
||||
return appendTo;
|
||||
}
|
||||
|
||||
UnicodeString *selectedPattern = (UnicodeString *)parsedValuesHash->get(keyword);
|
||||
if (selectedPattern == NULL) {
|
||||
selectedPattern = (UnicodeString *)parsedValuesHash->get(SELECT_KEYWORD_OTHER);
|
||||
int32_t msgStart = findSubMessage(msgPattern, 0, keyword, status);
|
||||
if (!MessageImpl::jdkAposMode(msgPattern)) {
|
||||
int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
|
||||
int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
|
||||
appendTo.append(msgPattern.getPatternString(),
|
||||
patternStart,
|
||||
msgPattern.getPatternIndex(msgLimit) - patternStart);
|
||||
return appendTo;
|
||||
}
|
||||
|
||||
return appendTo += *selectedPattern;
|
||||
// JDK compatibility mode: Remove SKIP_SYNTAX.
|
||||
return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
|
||||
}
|
||||
|
||||
UnicodeString&
|
||||
SelectFormat::toPattern(UnicodeString& appendTo) {
|
||||
return appendTo += pattern;
|
||||
if (0 == msgPattern.countParts()) {
|
||||
appendTo.setToBogus();
|
||||
} else {
|
||||
appendTo.append(msgPattern.getPatternString());
|
||||
}
|
||||
return appendTo;
|
||||
}
|
||||
|
||||
SelectFormat::CharacterClass
|
||||
SelectFormat::classifyCharacter(UChar ch) const{
|
||||
if ((ch >= CAP_A) && (ch <= CAP_Z)) {
|
||||
return tStartKeyword;
|
||||
}
|
||||
if ((ch >= LOW_A) && (ch <= LOW_Z)) {
|
||||
return tStartKeyword;
|
||||
}
|
||||
if ((ch >= U_ZERO) && (ch <= U_NINE)) {
|
||||
return tContinueKeyword;
|
||||
}
|
||||
if ( uprv_isRuleWhiteSpace(ch) ){
|
||||
return tSpace;
|
||||
}
|
||||
switch (ch) {
|
||||
case LEFTBRACE:
|
||||
return tLeftBrace;
|
||||
case RIGHTBRACE:
|
||||
return tRightBrace;
|
||||
case HYPHEN:
|
||||
case LOWLINE:
|
||||
return tContinueKeyword;
|
||||
default :
|
||||
return tOther;
|
||||
}
|
||||
}
|
||||
|
||||
UBool
|
||||
SelectFormat::checkSufficientDefinition() {
|
||||
// Check that at least the default rule is defined.
|
||||
return (parsedValuesHash != NULL &&
|
||||
parsedValuesHash->get(SELECT_KEYWORD_OTHER) != NULL) ;
|
||||
}
|
||||
|
||||
UBool
|
||||
SelectFormat::checkValidKeyword(const UnicodeString& argKeyword ) const{
|
||||
int32_t len = argKeyword.length();
|
||||
if (len < 1){
|
||||
return FALSE;
|
||||
int32_t SelectFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
|
||||
const UnicodeString& keyword, UErrorCode& ec) {
|
||||
if (U_FAILURE(ec)) {
|
||||
return 0;
|
||||
}
|
||||
CharacterClass type = classifyCharacter(argKeyword.charAt(0));
|
||||
if( type != tStartKeyword ){
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < argKeyword.length(); ++i) {
|
||||
type = classifyCharacter(argKeyword.charAt(i));
|
||||
if( type != tStartKeyword && type != tContinueKeyword ){
|
||||
return FALSE;
|
||||
UnicodeString other(FALSE, SELECT_KEYWORD_OTHER, 5);
|
||||
int32_t count = pattern.countParts();
|
||||
int32_t msgStart=0;
|
||||
// Iterate over (ARG_SELECTOR, message) pairs until ARG_LIMIT or end of select-only pattern.
|
||||
do {
|
||||
const MessagePattern::Part& part=pattern.getPart(partIndex++);
|
||||
const UMessagePatternPartType type=part.getType();
|
||||
if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return TRUE;
|
||||
// part is an ARG_SELECTOR followed by a message
|
||||
if(pattern.partSubstringMatches(part, keyword)) {
|
||||
// keyword matches
|
||||
return partIndex;
|
||||
} else if(msgStart==0 && pattern.partSubstringMatches(part, other)) {
|
||||
msgStart=partIndex;
|
||||
}
|
||||
partIndex=pattern.getLimitPartIndex(partIndex);
|
||||
} while(++partIndex<count);
|
||||
return msgStart;
|
||||
}
|
||||
|
||||
Format* SelectFormat::clone() const
|
||||
|
@ -366,28 +157,21 @@ Format* SelectFormat::clone() const
|
|||
SelectFormat&
|
||||
SelectFormat::operator=(const SelectFormat& other) {
|
||||
if (this != &other) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
pattern = other.pattern;
|
||||
copyHashtable(other.parsedValuesHash, status);
|
||||
msgPattern = other.msgPattern;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
UBool
|
||||
SelectFormat::operator==(const Format& other) const {
|
||||
if( this == &other){
|
||||
if (this == &other) {
|
||||
return TRUE;
|
||||
}
|
||||
if (typeid(*this) != typeid(other)) {
|
||||
return FALSE;
|
||||
}
|
||||
SelectFormat* fmt = (SelectFormat*)&other;
|
||||
Hashtable* hashOther = fmt->parsedValuesHash;
|
||||
if ( parsedValuesHash == NULL && hashOther == NULL)
|
||||
return TRUE;
|
||||
if ( parsedValuesHash == NULL || hashOther == NULL)
|
||||
if (!Format::operator==(other)) {
|
||||
return FALSE;
|
||||
return parsedValuesHash->equals(*hashOther);
|
||||
}
|
||||
const SelectFormat& o = (const SelectFormat&)other;
|
||||
return msgPattern == o.msgPattern;
|
||||
}
|
||||
|
||||
UBool
|
||||
|
@ -400,46 +184,10 @@ SelectFormat::parseObject(const UnicodeString& /*source*/,
|
|||
Formattable& /*result*/,
|
||||
ParsePosition& pos) const
|
||||
{
|
||||
// TODO: not yet supported in icu4j and icu4c
|
||||
// Parsing not supported.
|
||||
pos.setErrorIndex(pos.getIndex());
|
||||
}
|
||||
|
||||
void
|
||||
SelectFormat::copyHashtable(Hashtable *other, UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
if (other == NULL) {
|
||||
cleanHashTable();
|
||||
return;
|
||||
}
|
||||
if (parsedValuesHash == NULL) {
|
||||
initHashTable(status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
parsedValuesHash->removeAll();
|
||||
parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString);
|
||||
|
||||
int32_t pos = -1;
|
||||
const UHashElement* elem = NULL;
|
||||
|
||||
// walk through the hash table and create a deep clone
|
||||
while ((elem = other->nextElement(pos)) != NULL){
|
||||
const UHashTok otherKeyTok = elem->key;
|
||||
UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer;
|
||||
const UHashTok otherKeyToVal = elem->value;
|
||||
UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer;
|
||||
parsedValuesHash->put(*otherKey, new UnicodeString(*otherValue), status);
|
||||
if (U_FAILURE(status)){
|
||||
cleanHashTable();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2010, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
* Copyright (C) 2010 , Yahoo! Inc.
|
||||
********************************************************************
|
||||
* File SELECTFMT_IMPL.H
|
||||
* File selectfmtimpl.h
|
||||
*
|
||||
* Date Name Description
|
||||
* 11/11/09 kirtig Finished first cut of implementation.
|
||||
|
@ -14,11 +14,6 @@
|
|||
#ifndef SELFMTIMPL
|
||||
#define SELFMTIMPL
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C++ API: Defines rules for mapping positive long values onto a small set of keywords.
|
||||
*/
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#include "unicode/format.h"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 1999-2006, International Business Machines
|
||||
* Copyright (C) 1999-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
|
@ -32,6 +32,27 @@
|
|||
#include "uassert.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
/**
|
||||
* This class isolates our access to private internal methods of
|
||||
* MessageFormat. It is never instantiated; it exists only for C++
|
||||
* access management.
|
||||
*/
|
||||
class MessageFormatAdapter {
|
||||
public:
|
||||
static const Formattable::Type* getArgTypeList(const MessageFormat& m,
|
||||
int32_t& count);
|
||||
static UBool hasArgTypeConflicts(const MessageFormat& m) {
|
||||
return m.hasArgTypeConflicts;
|
||||
}
|
||||
};
|
||||
const Formattable::Type*
|
||||
MessageFormatAdapter::getArgTypeList(const MessageFormat& m,
|
||||
int32_t& count) {
|
||||
return m.getArgTypeList(count);
|
||||
}
|
||||
U_NAMESPACE_END
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
U_CAPI int32_t
|
||||
|
@ -217,25 +238,23 @@ umsg_open( const UChar *pattern,
|
|||
}
|
||||
|
||||
UParseError tErr;
|
||||
|
||||
if(parseError==NULL)
|
||||
{
|
||||
parseError = &tErr;
|
||||
}
|
||||
|
||||
UMessageFormat* retVal = 0;
|
||||
|
||||
int32_t len = (patternLength == -1 ? u_strlen(pattern) : patternLength);
|
||||
|
||||
UnicodeString patString((patternLength == -1 ? TRUE:FALSE), pattern,len);
|
||||
UnicodeString patString(patternLength == -1, pattern, len);
|
||||
|
||||
retVal = (UMessageFormat*) new MessageFormat(patString,Locale(locale),*parseError,*status);
|
||||
|
||||
if(retVal == 0) {
|
||||
MessageFormat* retVal = new MessageFormat(patString,Locale(locale),*parseError,*status);
|
||||
if(retVal == NULL) {
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return 0;
|
||||
return NULL;
|
||||
}
|
||||
return retVal;
|
||||
if (U_SUCCESS(*status) && MessageFormatAdapter::hasArgTypeConflicts(*retVal)) {
|
||||
*status = U_ARGUMENT_TYPE_MISMATCH;
|
||||
}
|
||||
return (UMessageFormat*)retVal;
|
||||
}
|
||||
|
||||
U_CAPI void U_EXPORT2
|
||||
|
@ -366,24 +385,6 @@ umsg_format( const UMessageFormat *fmt,
|
|||
return actLen;
|
||||
}
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
/**
|
||||
* This class isolates our access to private internal methods of
|
||||
* MessageFormat. It is never instantiated; it exists only for C++
|
||||
* access management.
|
||||
*/
|
||||
class MessageFormatAdapter {
|
||||
public:
|
||||
static const Formattable::Type* getArgTypeList(const MessageFormat& m,
|
||||
int32_t& count);
|
||||
};
|
||||
const Formattable::Type*
|
||||
MessageFormatAdapter::getArgTypeList(const MessageFormat& m,
|
||||
int32_t& count) {
|
||||
return m.getArgTypeList(count);
|
||||
}
|
||||
U_NAMESPACE_END
|
||||
|
||||
U_CAPI int32_t U_EXPORT2
|
||||
umsg_vformat( const UMessageFormat *fmt,
|
||||
UChar *result,
|
||||
|
@ -456,11 +457,13 @@ umsg_vformat( const UMessageFormat *fmt,
|
|||
break;
|
||||
|
||||
case Formattable::kObject:
|
||||
default:
|
||||
// This will never happen because MessageFormat doesn't
|
||||
// support kObject. When MessageFormat is changed to
|
||||
// understand MeasureFormats, modify this code to do the
|
||||
// right thing. [alan]
|
||||
U_ASSERT(FALSE);
|
||||
*status=U_ILLEGAL_ARGUMENT_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
********************************************************************************
|
||||
* Copyright (C) 1997-2010, International Business Machines
|
||||
* Copyright (C) 1997-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
********************************************************************************
|
||||
*
|
||||
|
@ -31,147 +31,91 @@
|
|||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/numfmt.h"
|
||||
#include "unicode/fieldpos.h"
|
||||
#include "unicode/format.h"
|
||||
#include "unicode/messagepattern.h"
|
||||
#include "unicode/numfmt.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class MessageFormat;
|
||||
|
||||
/**
|
||||
* ChoiceFormat converts between ranges of numeric values
|
||||
* and string names for those ranges. A <code>ChoiceFormat</code> splits
|
||||
* the real number line <code>-Inf</code> to <code>+Inf</code> into two
|
||||
* ChoiceFormat converts between ranges of numeric values and strings for those ranges.
|
||||
* The strings must conform to the MessageFormat pattern syntax.
|
||||
*
|
||||
* <p><em><code>ChoiceFormat</code> is probably not what you need.
|
||||
* Please use <code>MessageFormat</code>
|
||||
* with <code>plural</code> arguments for proper plural selection,
|
||||
* and <code>select</code> arguments for simple selection among a fixed set of choices!</em></p>
|
||||
*
|
||||
* <p>A <code>ChoiceFormat</code> splits
|
||||
* the real number line \htmlonly<code>-∞</code> to
|
||||
* <code>+∞</code>\endhtmlonly into two
|
||||
* or more contiguous ranges. Each range is mapped to a
|
||||
* string. <code>ChoiceFormat</code> is generally used in a
|
||||
* <code>MessageFormat</code> for displaying grammatically correct
|
||||
* plurals such as "There are 2 files."</p>
|
||||
* string.</p>
|
||||
*
|
||||
* <p><code>ChoiceFormat</code> was originally intended
|
||||
* for displaying grammatically correct
|
||||
* plurals such as "There is one file." vs. "There are 2 files."
|
||||
* <em>However,</em> plural rules for many languages
|
||||
* are too complex for the capabilities of ChoiceFormat,
|
||||
* and its requirement of specifying the precise rules for each message
|
||||
* is unmanageable for translators.</p>
|
||||
*
|
||||
* <p>There are two methods of defining a <code>ChoiceFormat</code>; both
|
||||
* are equivalent. The first is by using a string pattern. This is the
|
||||
* preferred method in most cases. The second method is through direct
|
||||
* specification of the arrays that make up the
|
||||
* specification of the arrays that logically make up the
|
||||
* <code>ChoiceFormat</code>.</p>
|
||||
*
|
||||
* <p><strong>Patterns</strong></p>
|
||||
* <p>Note: Typically, choice formatting is done (if done at all) via <code>MessageFormat</code>
|
||||
* with a <code>choice</code> argument type,
|
||||
* rather than using a stand-alone <code>ChoiceFormat</code>.</p>
|
||||
*
|
||||
* <p>In most cases, the preferred way to define a
|
||||
* <code>ChoiceFormat</code> is with a pattern. Here is an example of a
|
||||
* <code>ChoiceFormat</code> pattern:</p>
|
||||
* <h5>Patterns and Their Interpretation</h5>
|
||||
*
|
||||
* \htmlonly<pre> 0≤are no files|1≤is one file|1<are many files</pre>\endhtmlonly
|
||||
* <p>The pattern string defines the range boundaries and the strings for each number range.
|
||||
* Syntax:
|
||||
* <pre>
|
||||
* choiceStyle = number separator message ('|' number separator message)*
|
||||
* number = normal_number | ['-'] \htmlonly∞\endhtmlonly (U+221E, infinity)
|
||||
* normal_number = double value (unlocalized ASCII string)
|
||||
* separator = less_than | less_than_or_equal
|
||||
* less_than = '<'
|
||||
* less_than_or_equal = '#' | \htmlonly≤\endhtmlonly (U+2264)
|
||||
* message: see {@link MessageFormat}
|
||||
* </pre>
|
||||
* Pattern_White_Space between syntax elements is ignored, except
|
||||
* around each range's sub-message.</p>
|
||||
*
|
||||
* <p>or equivalently,</p>
|
||||
* <p>Each numeric sub-range extends from the current range's number
|
||||
* to the next range's number.
|
||||
* The number itself is included in its range if a <code>less_than_or_equal</code> sign is used,
|
||||
* and excluded from its range (and instead included in the previous range)
|
||||
* if a <code>less_than</code> sign is used.</p>
|
||||
*
|
||||
* \htmlonly<pre> 0#are no files|1#is one file|1<are many files</pre>\endhtmlonly
|
||||
* <p>When a <code>ChoiceFormat</code> is constructed from
|
||||
* arrays of numbers, closure flags and strings,
|
||||
* they are interpreted just like
|
||||
* the sequence of <code>(number separator string)</code> in an equivalent pattern string.
|
||||
* <code>closure[i]==TRUE</code> corresponds to a <code>less_than</code> separator sign.
|
||||
* The equivalent pattern string will be constructed automatically.</p>
|
||||
*
|
||||
* <p>The pattern consists of a number or <em>range specifiers</em>
|
||||
* separated by vertical bars '|' (U+007C). There is no
|
||||
* vertical bar after the last range. Each range specifier is of the
|
||||
* form:</p>
|
||||
* <p>During formatting, a number is mapped to the first range
|
||||
* where the number is not greater than the range's upper limit.
|
||||
* That range's message string is returned. A NaN maps to the very first range.</p>
|
||||
*
|
||||
* \htmlonly<blockquote><em>Number Separator String</em></blockquote>\endhtmlonly
|
||||
* <p>During parsing, a range is selected for the longest match of
|
||||
* any range's message. That range's number is returned, ignoring the separator/closure.
|
||||
* Only a simple string match is performed, without parsing of arguments that
|
||||
* might be specified in the message strings.</p>
|
||||
*
|
||||
* <p><em>Number</em> is a floating point number that can be parsed by a
|
||||
* default <code>NumberFormat</code> for the US locale. It gives the
|
||||
* lower limit of this range. The lower limit is either inclusive or
|
||||
* exclusive, depending on the <em>separator</em>. The upper limit is
|
||||
* given by the lower limit of the next range. The Unicode infinity
|
||||
* sign \htmlonly∞ \endhtmlonly (U+221E) is recognized for positive infinity. It may be preceded by
|
||||
* '-' (U+002D) to indicate negative infinity.</p>
|
||||
* <p>Note that the first range's number is ignored in formatting
|
||||
* but may be returned from parsing.</p>
|
||||
*
|
||||
* <p><em>String</em> is the format string for this range, with special
|
||||
* characters enclosed in single quotes (<code>'The #
|
||||
* sign'</code>). Single quotes themselves are indicated by two single
|
||||
* quotes in a row (<code>'o''clock'</code>).</p>
|
||||
*
|
||||
* <p><em>Separator</em> is one of the following single characters:
|
||||
*
|
||||
* <ul>
|
||||
* <li>\htmlonly'≤' \endhtmlonly (U+2264) or '#' (U+0023)
|
||||
* indicates that the lower limit given by <em>Number</em> is
|
||||
* inclusive. (The two characters are equivalent to ChoiceFormat.)
|
||||
* This means that the limit value <em>Number</em> belongs to this
|
||||
* range. Another way of saying this is that the corresponding
|
||||
* closure is <code>FALSE</code>.</li>
|
||||
*
|
||||
* <li>'<' (U+003C) indicates that the lower limit given by
|
||||
* <em>Number</em> is exclusive. This means that the value
|
||||
* <em>Number</em> belongs to the prior range.</li> Another way of
|
||||
* saying this is that the corresponding closure is
|
||||
* <code>TRUE</code>.
|
||||
* </ul>
|
||||
*
|
||||
* <p>See below for more information about closures.</p>
|
||||
*
|
||||
* <p><strong>Arrays</strong></p>
|
||||
*
|
||||
* <p>A <code>ChoiceFormat</code> defining <code>n</code> intervals
|
||||
* (<code>n</code> >= 2) is specified by three arrays of
|
||||
* <code>n</code> items:
|
||||
*
|
||||
* <ul>
|
||||
* <li><code>double limits[]</code> gives the start of each
|
||||
* interval. This must be a non-decreasing list of values, none of
|
||||
* which may be <code>NaN</code>.</li>
|
||||
* <li><code>UBool closures[]</code> determines whether each limit
|
||||
* value is contained in the interval below it or in the interval
|
||||
* above it. If <code>closures[i]</code> is <code>FALSE</code>, then
|
||||
* <code>limits[i]</code> is a member of interval
|
||||
* <code>i</code>. Otherwise it is a member of interval
|
||||
* <code>i+1</code>. If no closures array is specified, this is
|
||||
* equivalent to having all closures be <code>FALSE</code>. Closures
|
||||
* allow one to specify half-open, open, or closed intervals.</li>
|
||||
* <li><code>UnicodeString formats[]</code> gives the string label
|
||||
* associated with each interval.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p><strong>Formatting and Parsing</strong></p>
|
||||
*
|
||||
* <p>During formatting, a number is converted to a
|
||||
* string. <code>ChoiceFormat</code> accomplishes this by mapping the
|
||||
* number to an interval using the following rule. Given a number
|
||||
* <code>X</code> and and index value <code>j</code> in the range
|
||||
* <code>0..n-1</code>, where <code>n</code> is the number of ranges:</p>
|
||||
*
|
||||
* \htmlonly<blockquote>\endhtmlonly<code>X</code> matches <code>j</code> if and only if
|
||||
* <code>limit[j] <= X < limit[j+1]</code>
|
||||
* \htmlonly</blockquote>\endhtmlonly
|
||||
*
|
||||
* <p>(This assumes that all closures are <code>FALSE</code>. If some
|
||||
* closures are <code>TRUE</code> then the relations must be changed to
|
||||
* <code><=</code> or <code><</code> as appropriate.) If there is
|
||||
* no match, then either the first or last index is used, depending on
|
||||
* whether the number is too low or too high. Once a number is mapped to
|
||||
* an interval <code>j</code>, the string <code>formats[j]</code> is
|
||||
* output.</p>
|
||||
*
|
||||
* <p>During parsing, a string is converted to a
|
||||
* number. <code>ChoiceFormat</code> finds the element
|
||||
* <code>formats[j]</code> equal to the string, and returns
|
||||
* <code>limits[j]</code> as the parsed value.</p>
|
||||
*
|
||||
* <p><strong>Notes</strong></p>
|
||||
*
|
||||
* <p>The first limit value does not define a range boundary. For
|
||||
* example, in the pattern \htmlonly"<code>1.0#a|2.0#b</code>"\endhtmlonly, the
|
||||
* intervals are [-Inf, 2.0) and [2.0, +Inf]. It appears that the first
|
||||
* interval should be [1.0, 2.0). However, since all values that are too
|
||||
* small are mapped to range zero, the first interval is effectively
|
||||
* [-Inf, 2.0). However, the first limit value <em>is</em> used during
|
||||
* formatting. In this example, <code>parse("a")</code> returns
|
||||
* 1.0.</p>
|
||||
*
|
||||
* <p>There are no gaps between intervals and the entire number line is
|
||||
* covered. A <code>ChoiceFormat</code> maps <em>all</em> possible
|
||||
* double values to a finite set of intervals.</p>
|
||||
*
|
||||
* <p>The non-number <code>NaN</code> is mapped to interval zero during
|
||||
* formatting.</p>
|
||||
*
|
||||
* <p><strong>Examples</strong></p>
|
||||
* <h5>Examples</h5>
|
||||
*
|
||||
* <p>Here is an example of two arrays that map the number
|
||||
* <code>1..7</code> to the English day of the week abbreviations
|
||||
|
@ -183,13 +127,15 @@ class MessageFormat;
|
|||
*
|
||||
* <p>Here is an example that maps the ranges [-Inf, 1), [1, 1], and (1,
|
||||
* +Inf] to three strings. That is, the number line is split into three
|
||||
* ranges: x < 1.0, x = 1.0, and x > 1.0.</p>
|
||||
* ranges: x < 1.0, x = 1.0, and x > 1.0.
|
||||
* (The round parentheses in the notation above indicate an exclusive boundary,
|
||||
* like the turned bracket in European notation: [-Inf, 1) == [-Inf, 1[ )</p>
|
||||
*
|
||||
* <pre> {0, 1, 1},
|
||||
* {FALSE, FALSE, TRUE},
|
||||
* {"no files", "one file", "many files"}</pre>
|
||||
*
|
||||
* <p>Here is a simple example that shows formatting and parsing: </p>
|
||||
* <p>Here is an example that shows formatting and parsing: </p>
|
||||
*
|
||||
* \code
|
||||
* #include <unicode/choicfmt.h>
|
||||
|
@ -215,43 +161,6 @@ class MessageFormat;
|
|||
* }
|
||||
* \endcode
|
||||
*
|
||||
* <p>Here is a more complex example using a <code>ChoiceFormat</code>
|
||||
* constructed from a pattern together with a
|
||||
* <code>MessageFormat</code>.</p>
|
||||
*
|
||||
* \code
|
||||
* #include <unicode/choicfmt.h>
|
||||
* #include <unicode/msgfmt.h>
|
||||
* #include <unicode/unistr.h>
|
||||
* #include <iostream.h>
|
||||
*
|
||||
* int main(int argc, char *argv[]) {
|
||||
* UErrorCode status = U_ZERO_ERROR;
|
||||
* double filelimits[] = {0,1,2};
|
||||
* UnicodeString filepart[] =
|
||||
* {"are no files","is one file","are {0} files"};
|
||||
* ChoiceFormat* fileform = new ChoiceFormat(filelimits, filepart, 3 );
|
||||
* Format* testFormats[] =
|
||||
* {fileform, NULL, NumberFormat::createInstance(status)};
|
||||
* MessageFormat pattform("There {0} on {1}", status );
|
||||
* pattform.adoptFormats(testFormats, 3);
|
||||
* Formattable testArgs[] = {0L, "Disk A"};
|
||||
* FieldPosition fp(0);
|
||||
* UnicodeString str;
|
||||
* char buf[256];
|
||||
* for (int32_t i = 0; i < 4; ++i) {
|
||||
* Formattable fInt(i);
|
||||
* testArgs[0] = fInt;
|
||||
* pattform.format(testArgs, 2, str, fp, status );
|
||||
* str.extract(0, str.length(), buf, "");
|
||||
* str.truncate(0);
|
||||
* cout << "Output for i=" << i << " : " << buf << endl;
|
||||
* }
|
||||
* cout << endl;
|
||||
* return 0;
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* <p><em>User subclasses are not supported.</em> While clients may write
|
||||
* subclasses, such code will not necessarily work and will not be
|
||||
* guaranteed to work stably from release to release.
|
||||
|
@ -259,8 +168,7 @@ class MessageFormat;
|
|||
class U_I18N_API ChoiceFormat: public NumberFormat {
|
||||
public:
|
||||
/**
|
||||
* Construct a new ChoiceFormat with the limits and the corresponding formats
|
||||
* based on the pattern.
|
||||
* Constructs a new ChoiceFormat from the pattern string.
|
||||
*
|
||||
* @param pattern Pattern used to construct object.
|
||||
* @param status Output param to receive success code. If the
|
||||
|
@ -272,32 +180,31 @@ public:
|
|||
|
||||
|
||||
/**
|
||||
* Construct a new ChoiceFormat with the given limits and formats. Copy
|
||||
* the limits and formats instead of adopting them.
|
||||
* Constructs a new ChoiceFormat with the given limits and message strings.
|
||||
* All closure flags default to <code>FALSE</code>,
|
||||
* equivalent to <code>less_than_or_equal</code> separators.
|
||||
*
|
||||
* Copies the limits and formats instead of adopting them.
|
||||
*
|
||||
* @param limits Array of limit values.
|
||||
* @param formats Array of formats.
|
||||
* @param count Size of 'limits' and 'formats' arrays.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
|
||||
ChoiceFormat(const double* limits,
|
||||
const UnicodeString* formats,
|
||||
int32_t count );
|
||||
|
||||
/**
|
||||
* Construct a new ChoiceFormat with the given limits and formats.
|
||||
* Copy the limits and formats (instead of adopting them). By
|
||||
* default, each limit in the array specifies the inclusive lower
|
||||
* bound of its range, and the exclusive upper bound of the previous
|
||||
* range. However, if the isLimitOpen element corresponding to a
|
||||
* limit is TRUE, then the limit is the exclusive lower bound of its
|
||||
* range, and the inclusive upper bound of the previous range.
|
||||
* Constructs a new ChoiceFormat with the given limits, closure flags and message strings.
|
||||
*
|
||||
* Copies the limits and formats instead of adopting them.
|
||||
*
|
||||
* @param limits Array of limit values
|
||||
* @param closures Array of booleans specifying whether each
|
||||
* element of 'limits' is open or closed. If FALSE, then the
|
||||
* corresponding limit is a member of the range above it. If TRUE,
|
||||
* then the limit belongs to the range below it.
|
||||
* corresponding limit number is a member of its range.
|
||||
* If TRUE, then the limit number belongs to the previous range it.
|
||||
* @param formats Array of formats
|
||||
* @param count Size of 'limits', 'closures', and 'formats' arrays
|
||||
* @stable ICU 2.4
|
||||
|
@ -330,8 +237,8 @@ public:
|
|||
virtual ~ChoiceFormat();
|
||||
|
||||
/**
|
||||
* Clone this Format object polymorphically. The caller owns the
|
||||
* result and should delete it when done.
|
||||
* Clones this Format object. The caller owns the
|
||||
* result and must delete it when done.
|
||||
*
|
||||
* @return a copy of this object
|
||||
* @stable ICU 2.0
|
||||
|
@ -339,7 +246,7 @@ public:
|
|||
virtual Format* clone(void) const;
|
||||
|
||||
/**
|
||||
* Return true if the given Format objects are semantically equal.
|
||||
* Returns true if the given Format objects are semantically equal.
|
||||
* Objects of different subclasses are considered unequal.
|
||||
*
|
||||
* @param other ChoiceFormat object to be compared
|
||||
|
@ -362,7 +269,7 @@ public:
|
|||
/**
|
||||
* Sets the pattern.
|
||||
* @param pattern The pattern to be applied.
|
||||
* @param parseError Struct to recieve information on position
|
||||
* @param parseError Struct to receive information on position
|
||||
* of error if an error is encountered
|
||||
* @param status Output param set to success/failure code on
|
||||
* exit. If the pattern is invalid, this will be
|
||||
|
@ -375,7 +282,7 @@ public:
|
|||
/**
|
||||
* Gets the pattern.
|
||||
*
|
||||
* @param pattern Output param which will recieve the pattern
|
||||
* @param pattern Output param which will receive the pattern
|
||||
* Previous contents are deleted.
|
||||
* @return A reference to 'pattern'
|
||||
* @stable ICU 2.0
|
||||
|
@ -383,7 +290,8 @@ public:
|
|||
virtual UnicodeString& toPattern(UnicodeString &pattern) const;
|
||||
|
||||
/**
|
||||
* Set the choices to be used in formatting.
|
||||
* Sets the choices to be used in formatting.
|
||||
* For details see the constructor with the same parameter list.
|
||||
*
|
||||
* @param limitsToCopy Contains the top value that you want
|
||||
* parsed with that format,and should be in
|
||||
|
@ -399,8 +307,9 @@ public:
|
|||
int32_t count );
|
||||
|
||||
/**
|
||||
* Set the choices to be used in formatting. See class description
|
||||
* for documenatation of the limits, closures, and formats arrays.
|
||||
* Sets the choices to be used in formatting.
|
||||
* For details see the constructor with the same parameter list.
|
||||
*
|
||||
* @param limits Array of limits
|
||||
* @param closures Array of limit booleans
|
||||
* @param formats Array of format string
|
||||
|
@ -413,30 +322,32 @@ public:
|
|||
int32_t count);
|
||||
|
||||
/**
|
||||
* Get the limits passed in the constructor.
|
||||
* Returns NULL and 0.
|
||||
* Before ICU 4.8, this used to return the choice limits array.
|
||||
*
|
||||
* @param count The size of the limits arrays
|
||||
* @return the limits.
|
||||
* @stable ICU 2.0
|
||||
* @param count Will be set to 0.
|
||||
* @return NULL
|
||||
* @deprecated ICU 4.8 Use the MessagePattern class to analyze a ChoiceFormat pattern.
|
||||
*/
|
||||
virtual const double* getLimits(int32_t& count) const;
|
||||
|
||||
/**
|
||||
* Get the limit booleans passed in the constructor. The caller
|
||||
* must not delete the result.
|
||||
* Returns NULL and 0.
|
||||
* Before ICU 4.8, this used to return the limit booleans array.
|
||||
*
|
||||
* @param count The size of the arrays
|
||||
* @return the closures
|
||||
* @stable ICU 2.4
|
||||
* @param count Will be set to 0.
|
||||
* @return NULL
|
||||
* @deprecated ICU 4.8 Use the MessagePattern class to analyze a ChoiceFormat pattern.
|
||||
*/
|
||||
virtual const UBool* getClosures(int32_t& count) const;
|
||||
|
||||
/**
|
||||
* Get the formats passed in the constructor.
|
||||
* Returns NULL and 0.
|
||||
* Before ICU 4.8, this used to return the array of choice strings.
|
||||
*
|
||||
* @param count The size of the arrays
|
||||
* @return the formats.
|
||||
* @stable ICU 2.0
|
||||
* @param count Will be set to 0.
|
||||
* @return NULL
|
||||
* @deprecated ICU 4.8 Use the MessagePattern class to analyze a ChoiceFormat pattern.
|
||||
*/
|
||||
virtual const UnicodeString* getFormats(int32_t& count) const;
|
||||
|
||||
|
@ -444,7 +355,7 @@ public:
|
|||
using NumberFormat::format;
|
||||
|
||||
/**
|
||||
* Format a double or long number using this object's choices.
|
||||
* Formats a double number using this object's choices.
|
||||
*
|
||||
* @param number The value to be formatted.
|
||||
* @param appendTo Output parameter to receive result.
|
||||
|
@ -458,7 +369,7 @@ public:
|
|||
UnicodeString& appendTo,
|
||||
FieldPosition& pos) const;
|
||||
/**
|
||||
* Format a int_32t number using this object's choices.
|
||||
* Formats an int32_t number using this object's choices.
|
||||
*
|
||||
* @param number The value to be formatted.
|
||||
* @param appendTo Output parameter to receive result.
|
||||
|
@ -473,7 +384,7 @@ public:
|
|||
FieldPosition& pos) const;
|
||||
|
||||
/**
|
||||
* Format an int64_t number using this object's choices.
|
||||
* Formats an int64_t number using this object's choices.
|
||||
*
|
||||
* @param number The value to be formatted.
|
||||
* @param appendTo Output parameter to receive result.
|
||||
|
@ -488,7 +399,7 @@ public:
|
|||
FieldPosition& pos) const;
|
||||
|
||||
/**
|
||||
* Format an array of objects using this object's choices.
|
||||
* Formats an array of objects using this object's choices.
|
||||
*
|
||||
* @param objs The array of objects to be formatted.
|
||||
* @param cnt The size of objs.
|
||||
|
@ -507,7 +418,7 @@ public:
|
|||
FieldPosition& pos,
|
||||
UErrorCode& success) const;
|
||||
/**
|
||||
* Format an object using this object's choices.
|
||||
* Formats an object using this object's choices.
|
||||
*
|
||||
*
|
||||
* @param obj The object to be formatted.
|
||||
|
@ -542,7 +453,7 @@ public:
|
|||
|
||||
/**
|
||||
* Redeclared NumberFormat method.
|
||||
* Format a double number. These methods call the NumberFormat
|
||||
* Formats a double number. These methods call the NumberFormat
|
||||
* pure virtual format() methods with the default FieldPosition.
|
||||
*
|
||||
* @param number The value to be formatted.
|
||||
|
@ -556,7 +467,7 @@ public:
|
|||
|
||||
/**
|
||||
* Redeclared NumberFormat method.
|
||||
* Format a long number. These methods call the NumberFormat
|
||||
* Formats an int32_t number. These methods call the NumberFormat
|
||||
* pure virtual format() methods with the default FieldPosition.
|
||||
*
|
||||
* @param number The value to be formatted.
|
||||
|
@ -569,13 +480,10 @@ public:
|
|||
UnicodeString& appendTo) const;
|
||||
|
||||
/**
|
||||
* Return a long if possible (e.g. within range LONG_MAX,
|
||||
* LONG_MAX], and with no decimals), otherwise a double. If
|
||||
* IntegerOnly is set, will stop at a decimal point (or equivalent;
|
||||
* e.g. for rational numbers "1 2/3", will stop after the 1).
|
||||
* <P>
|
||||
* If no object can be parsed, parsePosition is unchanged, and NULL is
|
||||
* returned.
|
||||
* Looks for the longest match of any message string on the input text and,
|
||||
* if there is a match, sets the result object to the corresponding range's number.
|
||||
*
|
||||
* If no string matches, then the parsePosition is unchanged.
|
||||
*
|
||||
* @param text The text to be parsed.
|
||||
* @param result Formattable to be set to the parse result.
|
||||
|
@ -583,7 +491,6 @@ public:
|
|||
* @param parsePosition The position to start parsing at on input.
|
||||
* On output, moved to after the last successfully
|
||||
* parse character. On parse failure, does not change.
|
||||
* @see NumberFormat::isParseIntegerOnly
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void parse(const UnicodeString& text,
|
||||
|
@ -591,32 +498,23 @@ public:
|
|||
ParsePosition& parsePosition) const;
|
||||
|
||||
/**
|
||||
* Return a long if possible (e.g. within range LONG_MAX,
|
||||
* LONG_MAX], and with no decimals), otherwise a double. If
|
||||
* IntegerOnly is set, will stop at a decimal point (or equivalent;
|
||||
* e.g. for rational numbers "1 2/3", will stop after the 1).
|
||||
* <P>
|
||||
* If no object can be parsed, parsePosition is unchanged, and NULL is
|
||||
* returned.
|
||||
*
|
||||
* @param text The text to be parsed.
|
||||
* @param result Formattable to be set to the parse result.
|
||||
* If parse fails, return contents are undefined.
|
||||
* @param status Output param with the formatted string.
|
||||
* @see NumberFormat::isParseIntegerOnly
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
* Looks for the longest match of any message string on the input text and,
|
||||
* if there is a match, sets the result object to the corresponding range's number.
|
||||
*
|
||||
* If no string matches, then the UErrorCode is set to U_INVALID_FORMAT_ERROR.
|
||||
*
|
||||
* @param text The text to be parsed.
|
||||
* @param result Formattable to be set to the parse result.
|
||||
* If parse fails, return contents are undefined.
|
||||
* @param status Output param with the formatted string.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void parse(const UnicodeString& text,
|
||||
Formattable& result,
|
||||
UErrorCode& status) const;
|
||||
|
||||
|
||||
public:
|
||||
/**
|
||||
* Returns a unique class ID POLYMORPHICALLY. Pure virtual override.
|
||||
* This method is to implement a simple version of RTTI, since not all
|
||||
* C++ compilers support genuine RTTI. Polymorphic operator==() and
|
||||
* clone() methods call this method.
|
||||
* Returns a unique class ID POLYMORPHICALLY. Part of ICU's "poor man's RTTI".
|
||||
*
|
||||
* @return The class ID for this object. All objects of a
|
||||
* given class have the same class ID. Objects of
|
||||
|
@ -626,7 +524,7 @@ public:
|
|||
virtual UClassID getDynamicClassID(void) const;
|
||||
|
||||
/**
|
||||
* Return the class ID for this class. This is useful only for
|
||||
* Returns the class ID for this class. This is useful only for
|
||||
* comparing to a return value from getDynamicClassID(). For example:
|
||||
* <pre>
|
||||
* . Base* polymorphic_pointer = createPolymorphicObject();
|
||||
|
@ -639,22 +537,9 @@ public:
|
|||
static UClassID U_EXPORT2 getStaticClassID(void);
|
||||
|
||||
private:
|
||||
// static cache management (thread-safe)
|
||||
// static NumberFormat* getNumberFormat(UErrorCode &status); // call this function to 'check out' a numberformat from the cache.
|
||||
// static void releaseNumberFormat(NumberFormat *adopt); // call this function to 'return' the number format to the cache.
|
||||
|
||||
/**
|
||||
* Converts a string to a double value using a default NumberFormat object
|
||||
* which is static (shared by all ChoiceFormat instances).
|
||||
* @param string the string to be converted with.
|
||||
* @return the converted double number.
|
||||
*/
|
||||
static double stod(const UnicodeString& string);
|
||||
|
||||
/**
|
||||
* Converts a double value to a string using a default NumberFormat object
|
||||
* which is static (shared by all ChoiceFormat instances).
|
||||
* @param value the double number to be converted with.
|
||||
* Converts a double value to a string.
|
||||
* @param value the double number to be converted.
|
||||
* @param string the result string.
|
||||
* @return the converted string.
|
||||
*/
|
||||
|
@ -667,7 +552,7 @@ private:
|
|||
* based on the pattern.
|
||||
*
|
||||
* @param newPattern Pattern used to construct object.
|
||||
* @param parseError Struct to recieve information on position
|
||||
* @param parseError Struct to receive information on position
|
||||
* of error if an error is encountered.
|
||||
* @param status Output param to receive success code. If the
|
||||
* pattern cannot be parsed, set to failure code.
|
||||
|
@ -678,7 +563,59 @@ private:
|
|||
UErrorCode& status);
|
||||
|
||||
friend class MessageFormat;
|
||||
|
||||
virtual void setChoices(const double* limits,
|
||||
const UBool* closures,
|
||||
const UnicodeString* formats,
|
||||
int32_t count,
|
||||
UErrorCode &errorCode);
|
||||
|
||||
/**
|
||||
* Finds the ChoiceFormat sub-message for the given number.
|
||||
* @param pattern A MessagePattern.
|
||||
* @param partIndex the index of the first ChoiceFormat argument style part.
|
||||
* @param number a number to be mapped to one of the ChoiceFormat argument's intervals
|
||||
* @return the sub-message start part index.
|
||||
*/
|
||||
static int32_t findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number);
|
||||
|
||||
static double parseArgument(
|
||||
const MessagePattern &pattern, int32_t partIndex,
|
||||
const UnicodeString &source, ParsePosition &pos);
|
||||
|
||||
/**
|
||||
* Matches the pattern string from the end of the partIndex to
|
||||
* the beginning of the limitPartIndex,
|
||||
* including all syntax except SKIP_SYNTAX,
|
||||
* against the source string starting at sourceOffset.
|
||||
* If they match, returns the length of the source string match.
|
||||
* Otherwise returns -1.
|
||||
*/
|
||||
static int32_t matchStringUntilLimitPart(
|
||||
const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
|
||||
const UnicodeString &source, int32_t sourceOffset);
|
||||
|
||||
/**
|
||||
* Some of the ChoiceFormat constructors do not have a UErrorCode paramater.
|
||||
* We need _some_ way to provide one for the MessagePattern constructor.
|
||||
* Alternatively, the MessagePattern could be a pointer field, but that is
|
||||
* not nice either.
|
||||
*/
|
||||
UErrorCode constructorErrorCode;
|
||||
|
||||
/**
|
||||
* The MessagePattern which contains the parsed structure of the pattern string.
|
||||
*
|
||||
* Starting with ICU 4.8, the MessagePattern contains a sequence of
|
||||
* numeric/selector/message parts corresponding to the parsed pattern.
|
||||
* For details see the MessagePattern class API docs.
|
||||
*/
|
||||
MessagePattern msgPattern;
|
||||
|
||||
/**
|
||||
* Docs & fields from before ICU 4.8, before MessagePattern was used.
|
||||
* Commented out, and left only for explanation of semantics.
|
||||
* --------
|
||||
* Each ChoiceFormat divides the range -Inf..+Inf into fCount
|
||||
* intervals. The intervals are:
|
||||
*
|
||||
|
@ -713,12 +650,11 @@ private:
|
|||
*
|
||||
* Because of the nature of interval 0, fClosures[0] has no
|
||||
* effect.
|
||||
|
||||
*/
|
||||
double* fChoiceLimits;
|
||||
UBool* fClosures;
|
||||
UnicodeString* fChoiceFormats;
|
||||
int32_t fCount;
|
||||
// double* fChoiceLimits;
|
||||
// UBool* fClosures;
|
||||
// UnicodeString* fChoiceFormats;
|
||||
// int32_t fCount;
|
||||
};
|
||||
|
||||
inline UnicodeString&
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2010, International Business Machines Corporation and
|
||||
* Copyright (C) 2007-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************************
|
||||
*
|
||||
|
@ -28,106 +28,213 @@
|
|||
|
||||
#include "unicode/format.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/messagepattern.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/plurfmt.h"
|
||||
#include "unicode/plurrule.h"
|
||||
|
||||
U_CDECL_BEGIN
|
||||
// Forward declaration.
|
||||
struct UHashtable;
|
||||
typedef struct UHashtable UHashtable;
|
||||
U_CDECL_END
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class NumberFormat;
|
||||
class AppendableWrapper;
|
||||
class DateFormat;
|
||||
class NumberFormat;
|
||||
|
||||
/**
|
||||
* <p>MessageFormat prepares strings for display to users,
|
||||
* with optional arguments (variables/placeholders).
|
||||
* The arguments can occur in any order, which is necessary for translation
|
||||
* into languages with different grammars.
|
||||
*
|
||||
* MessageFormat produces concatenated messages in a language-neutral
|
||||
* way. Use this whenever concatenating strings that are displayed to
|
||||
* end users.
|
||||
* <p>A MessageFormat is constructed from a <em>pattern</em> string
|
||||
* with arguments in {curly braces} which will be replaced by formatted values.
|
||||
*
|
||||
* <P>A MessageFormat contains an array of <EM>subformats</EM> arranged
|
||||
* within a <EM>template string</EM>. Together, the subformats and
|
||||
* template string determine how the MessageFormat will operate during
|
||||
* formatting and parsing.
|
||||
* <p><code>MessageFormat</code> differs from the other <code>Format</code>
|
||||
* classes in that you create a <code>MessageFormat</code> object with one
|
||||
* of its constructors (not with a <code>createInstance</code> style factory
|
||||
* method). Factory methods aren't necessary because <code>MessageFormat</code>
|
||||
* itself doesn't implement locale-specific behavior. Any locale-specific
|
||||
* behavior is defined by the pattern that you provide and the
|
||||
* subformats used for inserted arguments.
|
||||
*
|
||||
* <P>Typically, both the subformats and the template string are
|
||||
* specified at once in a <EM>pattern</EM>. By using different
|
||||
* patterns for different locales, messages may be localized.
|
||||
* <p>Arguments can be named (using identifiers) or numbered (using small ASCII-digit integers).
|
||||
* Some of the API methods work only with argument numbers and throw an exception
|
||||
* if the pattern has named arguments (see {@link #usesNamedArguments()}).
|
||||
*
|
||||
* <P>When formatting, MessageFormat takes an array of arguments
|
||||
* and produces a user-readable string. Each argument is a
|
||||
* Formattable object; they may be passed in in an array, or as a
|
||||
* single Formattable object which itself contains an array. Each
|
||||
* argument is matched up with its corresponding subformat, which then
|
||||
* formats it into a string. The resulting strings are then assembled
|
||||
* within the string template of the MessageFormat to produce the
|
||||
* final output string.
|
||||
* <p>An argument might not specify any format type. In this case,
|
||||
* a Number value is formatted with a default (for the locale) NumberFormat,
|
||||
* a Date value is formatted with a default (for the locale) DateFormat,
|
||||
* and for any other value its toString() value is used.
|
||||
*
|
||||
* <p><strong>Note:</strong>
|
||||
* In ICU 4.0 MessageFormat supports named arguments. If a named argument
|
||||
* is used, all arguments must be named. Names start with a character in
|
||||
* <code>UCHAR_ID_START</code> and continue with characters in
|
||||
* <code>UCHARID_CONTINUE</code>, in particular they do not start with a digit.
|
||||
* If named arguments are used, {@link #usesNamedArguments()} will return true.
|
||||
* <p>An argument might specify a "simple" type for which the specified
|
||||
* Format object is created, cached and used.
|
||||
*
|
||||
* <p>The other new methods supporting named arguments are
|
||||
* {@link #getFormatNames(UErrorCode& status)},
|
||||
* {@link #getFormat(const UnicodeString& formatName, UErrorCode& status)}
|
||||
* {@link #setFormat(const UnicodeString& formatName, const Format& format, UErrorCode& status)},
|
||||
* {@link #adoptFormat(const UnicodeString& formatName, Format* formatToAdopt, UErrorCode& status)},
|
||||
* {@link #format(const UnicodeString* argumentNames, const Formattable* arguments,
|
||||
* int32_t count, UnicodeString& appendTo,UErrorCode& status)}.
|
||||
* These methods are all compatible with patterns that do not used named arguments--
|
||||
* in these cases the keys in the input or output use <code>UnicodeString</code>s
|
||||
* that name the argument indices, e.g. "0", "1", "2"... etc.
|
||||
* <p>An argument might have a "complex" type with nested MessageFormat sub-patterns.
|
||||
* During formatting, one of these sub-messages is selected according to the argument value
|
||||
* and recursively formatted.
|
||||
*
|
||||
* <p>If this format uses named arguments, certain methods that take or
|
||||
* return arrays do not perform any action, since it is not possible to
|
||||
* identify positions in an array using a name. Of these methods,
|
||||
* UErrorCode is set to U_ILLEGAL_ARGUMENT_ERROR by format, and to
|
||||
* U_ARGUMENT_TYPE_MISMATCH by parse.
|
||||
* These methods are
|
||||
* {@link #adoptFormats(Format** formatsToAdopt, int32_t count)},
|
||||
* {@link #setFormats(const Format** newFormats,int32_t count)},
|
||||
* {@link #adoptFormat(int32_t n, Format *newFormat)},
|
||||
* {@link #setFormat(int32_t n, Format& newFormat)},
|
||||
* {@link #format(const Formattable* source, int32_t count, UnicodeString& appendTo, FieldPosition& ignore, UErrorCode& success)},
|
||||
* {@link #format(const UnicodeString& pattern,const Formattable* arguments,int32_t cnt,UnicodeString& appendTo,UErrorCode& success)},
|
||||
* {@link #format(const Formattable& source, UnicodeString& appendTo, FieldPosition& ignore, UErrorCode& success)},
|
||||
* {@link #format(const Formattable* arguments, int32_t cnt, UnicodeString& appendTo, FieldPosition& status, int32_t recursionProtection,UErrorCode& success)},
|
||||
* {@link #parse(const UnicodeString& source, ParsePosition& pos, int32_t& count)},
|
||||
* {@link #parse(const UnicodeString& source, int32_t& cnt, UErrorCode& status)}
|
||||
* <p>After construction, a custom Format object can be set for
|
||||
* a top-level argument, overriding the default formatting and parsing behavior
|
||||
* for that argument.
|
||||
* However, custom formatting can be achieved more simply by writing
|
||||
* a typeless argument in the pattern string
|
||||
* and supplying it with a preformatted string value.
|
||||
*
|
||||
* <P>
|
||||
* During parsing, an input string is matched against the string
|
||||
* template of the MessageFormat to produce an array of Formattable
|
||||
* objects. Plain text of the template string is matched directly
|
||||
* against input text. At each position in the template string where
|
||||
* a subformat is located, the subformat is called to parse the
|
||||
* corresponding segment of input text to produce an output argument.
|
||||
* In this way, an array of arguments is created which together
|
||||
* constitute the parse result.
|
||||
* <P>
|
||||
* Parsing may fail or produce unexpected results in a number of
|
||||
* circumstances.
|
||||
* <UL>
|
||||
* <LI>If one of the arguments does not occur in the pattern, it
|
||||
* will be returned as a default Formattable.
|
||||
* <LI>If the format of an argument loses information, such as with
|
||||
* a choice format where a large number formats to "many", then the
|
||||
* parse may not correspond to the originally formatted argument.
|
||||
* <LI>MessageFormat does not handle ChoiceFormat recursion during
|
||||
* parsing; such parses will fail.
|
||||
* <LI>Parsing will not always find a match (or the correct match) if
|
||||
* some part of the parse is ambiguous. For example, if the pattern
|
||||
* "{1},{2}" is used with the string arguments {"a,b", "c"}, it will
|
||||
* format as "a,b,c". When the result is parsed, it will return {"a",
|
||||
* "b,c"}.
|
||||
* <LI>If a single argument is formatted more than once in the string,
|
||||
* then the rightmost subformat in the pattern string will produce the
|
||||
* parse result; prior subformats with the same argument index will
|
||||
* have no effect.
|
||||
* </UL>
|
||||
* Here are some examples of usage:
|
||||
* <P>
|
||||
* <p>When formatting, MessageFormat takes a collection of argument values
|
||||
* and writes an output string.
|
||||
* The argument values may be passed as an array
|
||||
* (when the pattern contains only numbered arguments)
|
||||
* or as an array of names and and an array of arguments (which works for both named
|
||||
* and numbered arguments).
|
||||
*
|
||||
* <p>Each argument is matched with one of the input values by array index or argument name
|
||||
* and formatted according to its pattern specification
|
||||
* (or using a custom Format object if one was set).
|
||||
* A numbered pattern argument is matched with an argument name that contains that number
|
||||
* as an ASCII-decimal-digit string (without leading zero).
|
||||
*
|
||||
* <h4><a name="patterns">Patterns and Their Interpretation</a></h4>
|
||||
*
|
||||
* <code>MessageFormat</code> uses patterns of the following form:
|
||||
* <pre>
|
||||
* message = messageText (argument messageText)*
|
||||
* argument = noneArg | simpleArg | complexArg
|
||||
* complexArg = choiceArg | pluralArg | selectArg
|
||||
*
|
||||
* noneArg = '{' argNameOrNumber '}'
|
||||
* simpleArg = '{' argNameOrNumber ',' argType [',' argStyle] '}'
|
||||
* choiceArg = '{' argNameOrNumber ',' "choice" ',' choiceStyle '}'
|
||||
* pluralArg = '{' argNameOrNumber ',' "plural" ',' pluralStyle '}'
|
||||
* selectArg = '{' argNameOrNumber ',' "select" ',' selectStyle '}'
|
||||
*
|
||||
* choiceStyle: see {@link ChoiceFormat}
|
||||
* pluralStyle: see {@link PluralFormat}
|
||||
* selectStyle: see {@link SelectFormat}
|
||||
*
|
||||
* argNameOrNumber = argName | argNumber
|
||||
* argName = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+
|
||||
* argNumber = '0' | ('1'..'9' ('0'..'9')*)
|
||||
*
|
||||
* argType = "number" | "date" | "time" | "spellout" | "ordinal" | "duration"
|
||||
* argStyle = "short" | "medium" | "long" | "full" | "integer" | "currency" | "percent" | argStyleText
|
||||
* </pre>
|
||||
*
|
||||
* <ul>
|
||||
* <li>messageText can contain quoted literal strings including syntax characters.
|
||||
* A quoted literal string begins with an ASCII apostrophe and a syntax character
|
||||
* (usually a {curly brace}) and continues until the next single apostrophe.
|
||||
* A double ASCII apostrohpe inside or outside of a quoted string represents
|
||||
* one literal apostrophe.
|
||||
* <li>Quotable syntax characters are the {curly braces} in all messageText parts,
|
||||
* plus the '#' sign in a messageText immediately inside a pluralStyle,
|
||||
* and the '|' symbol in a messageText immediately inside a choiceStyle.
|
||||
* <li>See also {@link MessagePattern.ApostropheMode}
|
||||
* <li>In argStyleText, every single ASCII apostrophe begins and ends quoted literal text,
|
||||
* and unquoted {curly braces} must occur in matched pairs.
|
||||
* </ul>
|
||||
*
|
||||
* <p>Recommendation: Use the real apostrophe (single quote) character
|
||||
* \htmlonly’\endhtmlonly (U+2019) for
|
||||
* human-readable text, and use the ASCII apostrophe ' (U+0027)
|
||||
* only in program syntax, like quoting in MessageFormat.
|
||||
* See the annotations for U+0027 Apostrophe in The Unicode Standard.
|
||||
*
|
||||
* <p>The <code>argType</code> and <code>argStyle</code> values are used to create
|
||||
* a <code>Format</code> instance for the format element. The following
|
||||
* table shows how the values map to Format instances. Combinations not
|
||||
* shown in the table are illegal. Any <code>argStyleText</code> must
|
||||
* be a valid pattern string for the Format subclass used.
|
||||
*
|
||||
* <p><table border=1>
|
||||
* <tr>
|
||||
* <th>argType
|
||||
* <th>argStyle
|
||||
* <th>resulting Format object
|
||||
* <tr>
|
||||
* <td colspan=2><i>(none)</i>
|
||||
* <td><code>null</code>
|
||||
* <tr>
|
||||
* <td rowspan=5><code>number</code>
|
||||
* <td><i>(none)</i>
|
||||
* <td><code>NumberFormat.createInstance(getLocale(), status)</code>
|
||||
* <tr>
|
||||
* <td><code>integer</code>
|
||||
* <td><code>NumberFormat.createInstance(getLocale(), kNumberStyle, status)</code>
|
||||
* <tr>
|
||||
* <td><code>currency</code>
|
||||
* <td><code>NumberFormat.createCurrencyInstance(getLocale(), status)</code>
|
||||
* <tr>
|
||||
* <td><code>percent</code>
|
||||
* <td><code>NumberFormat.createPercentInstance(getLocale(), status)</code>
|
||||
* <tr>
|
||||
* <td><i>argStyleText</i>
|
||||
* <td><code>new DecimalFormat(argStyleText, new DecimalFormatSymbols(getLocale(), status), status)</code>
|
||||
* <tr>
|
||||
* <td rowspan=6><code>date</code>
|
||||
* <td><i>(none)</i>
|
||||
* <td><code>DateFormat.createDateInstance(kDefault, getLocale(), status)</code>
|
||||
* <tr>
|
||||
* <td><code>short</code>
|
||||
* <td><code>DateFormat.createDateInstance(kShort, getLocale(), status)</code>
|
||||
* <tr>
|
||||
* <td><code>medium</code>
|
||||
* <td><code>DateFormat.createDateInstance(kDefault, getLocale(), status)</code>
|
||||
* <tr>
|
||||
* <td><code>long</code>
|
||||
* <td><code>DateFormat.createDateInstance(kLong, getLocale(), status)</code>
|
||||
* <tr>
|
||||
* <td><code>full</code>
|
||||
* <td><code>DateFormat.createDateInstance(kFull, getLocale(), status)</code>
|
||||
* <tr>
|
||||
* <td><i>argStyleText</i>
|
||||
* <td><code>new SimpleDateFormat(argStyleText, getLocale(), status)
|
||||
* <tr>
|
||||
* <td rowspan=6><code>time</code>
|
||||
* <td><i>(none)</i>
|
||||
* <td><code>DateFormat.createTimeInstance(kDefault, getLocale(), status)</code>
|
||||
* <tr>
|
||||
* <td><code>short</code>
|
||||
* <td><code>DateFormat.createTimeInstance(kShort, getLocale(), status)</code>
|
||||
* <tr>
|
||||
* <td><code>medium</code>
|
||||
* <td><code>DateFormat.createTimeInstance(kDefault, getLocale(), status)</code>
|
||||
* <tr>
|
||||
* <td><code>long</code>
|
||||
* <td><code>DateFormat.createTimeInstance(kLong, getLocale(), status)</code>
|
||||
* <tr>
|
||||
* <td><code>full</code>
|
||||
* <td><code>DateFormat.createTimeInstance(kFull, getLocale(), status)</code>
|
||||
* <tr>
|
||||
* <td><i>argStyleText</i>
|
||||
* <td><code>new SimpleDateFormat(argStyleText, getLocale(), status)
|
||||
* <tr>
|
||||
* <td><code>spellout</code>
|
||||
* <td><i>argStyleText (optional)</i>
|
||||
* <td><code>new RuleBasedNumberFormat(URBNF_SPELLOUT, getLocale(), status)
|
||||
* <br/> .setDefaultRuleset(argStyleText, status);</code>
|
||||
* <tr>
|
||||
* <td><code>ordinal</code>
|
||||
* <td><i>argStyleText (optional)</i>
|
||||
* <td><code>new RuleBasedNumberFormat(URBNF_ORDINAL, getLocale(), status)
|
||||
* <br/> .setDefaultRuleset(argStyleText, status);</code>
|
||||
* <tr>
|
||||
* <td><code>duration</code>
|
||||
* <td><i>argStyleText (optional)</i>
|
||||
* <td><code>new RuleBasedNumberFormat(URBNF_DURATION, getLocale(), status)
|
||||
* <br/> .setDefaultRuleset(argStyleText, status);</code>
|
||||
* </table>
|
||||
* <p>
|
||||
*
|
||||
* <h4>Usage Information</h4>
|
||||
*
|
||||
* <p>Here are some examples of usage:
|
||||
* Example 1:
|
||||
*
|
||||
* <pre>
|
||||
* \code
|
||||
* UErrorCode success = U_ZERO_ERROR;
|
||||
|
@ -148,10 +255,12 @@ class DateFormat;
|
|||
* // in the Force on planet 7.
|
||||
* \endcode
|
||||
* </pre>
|
||||
*
|
||||
* Typically, the message format will come from resources, and the
|
||||
* arguments will be dynamically set at runtime.
|
||||
* <P>
|
||||
* Example 2:
|
||||
*
|
||||
* <p>Example 2:
|
||||
*
|
||||
* <pre>
|
||||
* \code
|
||||
* success = U_ZERO_ERROR;
|
||||
|
@ -171,122 +280,40 @@ class DateFormat;
|
|||
* \endcode
|
||||
* </pre>
|
||||
*
|
||||
* The pattern is of the following form. Legend:
|
||||
* <pre>
|
||||
* \code
|
||||
* {optional item}
|
||||
* (group that may be repeated)*
|
||||
* \endcode
|
||||
* </pre>
|
||||
* Do not confuse optional items with items inside quoted braces, such
|
||||
* as this: "{". Quoted braces are literals.
|
||||
* <pre>
|
||||
* \code
|
||||
* messageFormatPattern := string ( "{" messageFormatElement "}" string )*
|
||||
*
|
||||
* messageFormatElement := argumentIndex | argumentName { "," elementFormat }
|
||||
*
|
||||
* elementFormat := "time" { "," datetimeStyle }
|
||||
* | "date" { "," datetimeStyle }
|
||||
* | "number" { "," numberStyle }
|
||||
* | "choice" "," choiceStyle
|
||||
* | "spellout" { "," spelloutStyle }
|
||||
* | "ordinal" { "," spelloutStyle }
|
||||
* | "duration" { "," spelloutStyle }
|
||||
* | "plural" "," pluralStyle
|
||||
* | "select" "," selectStyle
|
||||
*
|
||||
* datetimeStyle := "short"
|
||||
* | "medium"
|
||||
* | "long"
|
||||
* | "full"
|
||||
* | dateFormatPattern
|
||||
*
|
||||
* numberStyle := "currency"
|
||||
* | "percent"
|
||||
* | "integer"
|
||||
* | numberFormatPattern
|
||||
*
|
||||
* choiceStyle := choiceFormatPattern
|
||||
*
|
||||
* pluralStyle := pluralFormatPattern
|
||||
*
|
||||
* selectStyle := selectFormatPattern
|
||||
*
|
||||
* spelloutStyle := ruleSetName
|
||||
* \endcode
|
||||
* </pre>
|
||||
* If there is no elementFormat, then the argument must be a string,
|
||||
* which is substituted. If there is no dateTimeStyle or numberStyle,
|
||||
* then the default format is used (e.g. NumberFormat::createInstance(),
|
||||
* DateFormat::createTimeInstance(DateFormat::kDefault, ...) or
|
||||
* DateFormat::createDateInstance(DateFormat::kDefault, ...). For
|
||||
* a RuleBasedNumberFormat, if there is no ruleSetName, the default
|
||||
* rule set is used. For a ChoiceFormat or PluralFormat or SelectFormat, the pattern
|
||||
* must always be specified, since there is no default.
|
||||
* <P>
|
||||
* In strings, single quotes can be used to quote syntax characters.
|
||||
* A literal single quote is represented by '', both within and outside
|
||||
* of single-quoted segments. Inside a
|
||||
* messageFormatElement, quotes are <EM>not</EM> removed. For example,
|
||||
* {1,number,$'#',##} will produce a number format with the pound-sign
|
||||
* quoted, with a result such as: "$#31,45".
|
||||
* <P>
|
||||
* If a pattern is used, then unquoted braces in the pattern, if any,
|
||||
* must match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab
|
||||
* {0'}' de" and "ab } de" are not.
|
||||
* <p>
|
||||
* <dl><dt><b>Warning:</b><dd>The rules for using quotes within message
|
||||
* format patterns unfortunately have shown to be somewhat confusing.
|
||||
* In particular, it isn't always obvious to localizers whether single
|
||||
* quotes need to be doubled or not. Make sure to inform localizers about
|
||||
* the rules, and tell them (for example, by using comments in resource
|
||||
* bundle source files) which strings will be processed by MessageFormat.
|
||||
* Note that localizers may need to use single quotes in translated
|
||||
* strings where the original version doesn't have them.
|
||||
* <br>Note also that the simplest way to avoid the problem is to
|
||||
* use the real apostrophe (single quote) character U+2019 (') for
|
||||
* human-readable text, and to use the ASCII apostrophe (U+0027 ' )
|
||||
* only in program syntax, like quoting in MessageFormat.
|
||||
* See the annotations for U+0027 Apostrophe in The Unicode Standard.</p>
|
||||
* </dl>
|
||||
* <P>
|
||||
* The argumentIndex is a non-negative integer, which corresponds to the
|
||||
* index of the arguments presented in an array to be formatted. The
|
||||
* first argument has argumentIndex 0.
|
||||
* <P>
|
||||
* It is acceptable to have unused arguments in the array. With missing
|
||||
* arguments, or arguments that are not of the right class for the
|
||||
* specified format, a failing UErrorCode result is set.
|
||||
* <P>
|
||||
* <strong>Creating internationalized messages that include plural forms, you
|
||||
* can use a PluralFormat:</strong>
|
||||
* <p>For messages that include plural forms, you can use a plural argument:
|
||||
* <pre>
|
||||
* \code
|
||||
* UErrorCode err = U_ZERO_ERROR;
|
||||
* UnicodeString t1("{0, plural, one{C''est # fichier} other{Ce sont # fichiers}} dans la liste.");
|
||||
* MessageFormat* msgFmt = new MessageFormat(t1, Locale("fr"), err);
|
||||
* if (U_FAILURE(err)) {
|
||||
* return err;
|
||||
* }
|
||||
*
|
||||
* Formattable args1[] = {(int32_t)0};
|
||||
* Formattable args2[] = {(int32_t)3};
|
||||
* FieldPosition ignore(FieldPosition::DONT_CARE);
|
||||
* success = U_ZERO_ERROR;
|
||||
* MessageFormat msgFmt(
|
||||
* "{num_files, plural, "
|
||||
* "=0{There are no files on disk \"{disk_name}\".}"
|
||||
* "=1{There is one file on disk \"{disk_name}\".}"
|
||||
* "other{There are # files on disk \"{disk_name}\".}}",
|
||||
* Locale("en"),
|
||||
* success);
|
||||
* FieldPosition fpos = 0;
|
||||
* Formattable testArgs[] = {0L, "MyDisk"};
|
||||
* UnicodeString testArgsNames[] = {"num_files", "disk_name"};
|
||||
* UnicodeString result;
|
||||
* msgFmt->format(args1, 1, result, ignore, status);
|
||||
* cout << result << endl;
|
||||
* result.remove();
|
||||
* msgFmt->format(args2, 1, result, ignore, status);
|
||||
* cout << result << endl;
|
||||
*
|
||||
* // output, with different args
|
||||
* // output: C'est 0,0 fichier dans la liste.
|
||||
* // output: Ce sont 3 fichiers dans la liste."
|
||||
* cout << msgFmt.format(testArgs, testArgsNames, 2, result, fpos, 0, success);
|
||||
* testArgs[0] = 3L;
|
||||
* cout << msgFmt.format(testArgs, testArgsNames, 2, result, fpos, 0, success);
|
||||
* \endcode
|
||||
* <em>output</em>:
|
||||
* There are no files on disk "MyDisk".
|
||||
* There are 3 files on "MyDisk".
|
||||
* </pre>
|
||||
* Please check PluralFormat and PluralRules for details.
|
||||
* </P>
|
||||
* See {@link PluralFormat} and {@link PluralRules} for details.
|
||||
*
|
||||
* <h4><a name="synchronization">Synchronization</a></h4>
|
||||
*
|
||||
* <p>MessageFormats are not synchronized.
|
||||
* It is recommended to create separate format instances for each thread.
|
||||
* If multiple threads access a format concurrently, it must be synchronized
|
||||
* externally.
|
||||
*
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
class U_I18N_API MessageFormat : public Format {
|
||||
public:
|
||||
|
@ -331,8 +358,8 @@ public:
|
|||
* Constructs a new MessageFormat using the given pattern and locale.
|
||||
* @param pattern Pattern used to construct object.
|
||||
* @param newLocale The locale to use for formatting dates and numbers.
|
||||
* @param parseError Struct to recieve information on position
|
||||
* of error within the pattern.
|
||||
* @param parseError Struct to receive information on the position
|
||||
* of an error within the pattern.
|
||||
* @param status Input/output error code. If the
|
||||
* pattern cannot be parsed, set to failure code.
|
||||
* @stable ICU 2.0
|
||||
|
@ -376,15 +403,14 @@ public:
|
|||
virtual UBool operator==(const Format& other) const;
|
||||
|
||||
/**
|
||||
* Sets the locale. This locale is used for fetching default number or date
|
||||
* format information.
|
||||
* Sets the locale to be used for creating argument Format objects.
|
||||
* @param theLocale the new locale value to be set.
|
||||
* @stable ICU 2.0
|
||||
*/
|
||||
virtual void setLocale(const Locale& theLocale);
|
||||
|
||||
/**
|
||||
* Gets the locale. This locale is used for fetching default number or date
|
||||
* Gets the locale used for creating argument Format objects.
|
||||
* format information.
|
||||
* @return the locale of the object.
|
||||
* @stable ICU 2.0
|
||||
|
@ -405,8 +431,8 @@ public:
|
|||
* Applies the given pattern string to this message format.
|
||||
*
|
||||
* @param pattern The pattern to be applied.
|
||||
* @param parseError Struct to recieve information on position
|
||||
* of error within pattern.
|
||||
* @param parseError Struct to receive information on the position
|
||||
* of an error within the pattern.
|
||||
* @param status Input/output error code. If the
|
||||
* pattern cannot be parsed, set to failure code.
|
||||
* @stable ICU 2.0
|
||||
|
@ -415,6 +441,37 @@ public:
|
|||
UParseError& parseError,
|
||||
UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Sets the UMessagePatternApostropheMode and the pattern used by this message format.
|
||||
* Parses the pattern and caches Format objects for simple argument types.
|
||||
* Patterns and their interpretation are specified in the
|
||||
* <a href="#patterns">class description</a>.
|
||||
* <p>
|
||||
* This method is best used only once on a given object to avoid confusion about the mode,
|
||||
* and after constructing the object with an empty pattern string to minimize overhead.
|
||||
*
|
||||
* @param pattern The pattern to be applied.
|
||||
* @param aposMode The new apostrophe mode.
|
||||
* @param parseError Struct to receive information on the position
|
||||
* of an error within the pattern.
|
||||
* Can be NULL.
|
||||
* @param status Input/output error code. If the
|
||||
* pattern cannot be parsed, set to failure code.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
virtual void applyPattern(const UnicodeString& pattern,
|
||||
UMessagePatternApostropheMode aposMode,
|
||||
UParseError* parseError,
|
||||
UErrorCode& status);
|
||||
|
||||
/**
|
||||
* @return this instance's UMessagePatternApostropheMode.
|
||||
* @draft ICU 4.8
|
||||
*/
|
||||
UMessagePatternApostropheMode getApostropheMode() const {
|
||||
return msgPattern.getApostropheMode();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a pattern that can be used to recreate this object.
|
||||
*
|
||||
|
@ -490,7 +547,7 @@ public:
|
|||
/**
|
||||
* Gets format names. This function returns formatNames in StringEnumerations
|
||||
* which can be used with getFormat() and setFormat() to export formattable
|
||||
* array from current MessageFormat to another. It is caller's resposibility
|
||||
* array from current MessageFormat to another. It is the caller's responsibility
|
||||
* to delete the returned formatNames.
|
||||
* @param status output param set to success/failure code.
|
||||
* @stable ICU 4.0
|
||||
|
@ -747,6 +804,7 @@ public:
|
|||
static UnicodeString autoQuoteApostrophe(const UnicodeString& pattern,
|
||||
UErrorCode& status);
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if this MessageFormat uses named arguments,
|
||||
* and false otherwise. See class description.
|
||||
|
@ -795,33 +853,44 @@ public:
|
|||
*/
|
||||
static UClassID U_EXPORT2 getStaticClassID(void);
|
||||
|
||||
/**
|
||||
* Compares two Format objects. This is used for constructing the hash
|
||||
* tables.
|
||||
*
|
||||
* @param left pointer to a Format object. Must not be NULL.
|
||||
* @param right pointer to a Format object. Must not be NULL.
|
||||
*
|
||||
* @return whether the two objects are the same
|
||||
* @internal
|
||||
*/
|
||||
static UBool equalFormats(const void* left, const void* right);
|
||||
|
||||
private:
|
||||
|
||||
Locale fLocale;
|
||||
UnicodeString fPattern;
|
||||
MessagePattern msgPattern;
|
||||
Format** formatAliases; // see getFormats
|
||||
int32_t formatAliasesCapacity;
|
||||
UProperty idStart;
|
||||
UProperty idContinue;
|
||||
|
||||
MessageFormat(); // default constructor not implemented
|
||||
|
||||
/*
|
||||
* A structure representing one subformat of this MessageFormat.
|
||||
* Each subformat has a Format object, an offset into the plain
|
||||
* pattern text fPattern, and an argument number. The argument
|
||||
* number corresponds to the array of arguments to be formatted.
|
||||
* @internal
|
||||
*/
|
||||
class Subformat;
|
||||
/**
|
||||
* This provider helps defer instantiation of a PluralRules object
|
||||
* until we actually need to select a keyword.
|
||||
* For example, if the number matches an explicit-value selector like "=1"
|
||||
* we do not need any PluralRules.
|
||||
*/
|
||||
class PluralSelectorProvider : public PluralFormat::PluralSelector {
|
||||
public:
|
||||
PluralSelectorProvider(const Locale* loc);
|
||||
virtual ~PluralSelectorProvider();
|
||||
virtual UnicodeString select(double number, UErrorCode& ec) const;
|
||||
|
||||
/**
|
||||
* A MessageFormat contains an array of subformats. This array
|
||||
* needs to grow dynamically if the MessageFormat is modified.
|
||||
*/
|
||||
Subformat* subformats;
|
||||
int32_t subformatCount;
|
||||
int32_t subformatCapacity;
|
||||
void reset(const Locale* loc);
|
||||
private:
|
||||
const Locale* locale;
|
||||
PluralRules* rules;
|
||||
};
|
||||
|
||||
/**
|
||||
* A MessageFormat formats an array of arguments. Each argument
|
||||
|
@ -836,14 +905,14 @@ private:
|
|||
int32_t argTypeCapacity;
|
||||
|
||||
/**
|
||||
* Is true iff all argument names are non-negative numbers.
|
||||
*
|
||||
*/
|
||||
UBool isArgNumeric;
|
||||
* TRUE if there are different argTypes for the same argument.
|
||||
* This only matters when the MessageFormat is used in the plain C (umsg_xxx) API
|
||||
* where the pattern argTypes determine how the va_arg list is read.
|
||||
*/
|
||||
UBool hasArgTypeConflicts;
|
||||
|
||||
// Variable-size array management
|
||||
UBool allocateSubformats(int32_t capacity);
|
||||
UBool allocateArgTypes(int32_t capacity);
|
||||
UBool allocateArgTypes(int32_t capacity, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Default Format objects used when no format is specified and a
|
||||
|
@ -855,6 +924,11 @@ private:
|
|||
NumberFormat* defaultNumberFormat;
|
||||
DateFormat* defaultDateFormat;
|
||||
|
||||
UHashtable* cachedFormatters;
|
||||
UHashtable* customFormatArgStarts;
|
||||
|
||||
PluralSelectorProvider pluralProvider;
|
||||
|
||||
/**
|
||||
* Method to retrieve default formats (or NULL on failure).
|
||||
* These are semantically const, but may modify *this.
|
||||
|
@ -872,57 +946,93 @@ private:
|
|||
const UChar * const *list);
|
||||
|
||||
/**
|
||||
* Formats the array of arguments and copies the result into the
|
||||
* result buffer, updates the field position.
|
||||
*
|
||||
* @param arguments The formattable objects array.
|
||||
* @param cnt The array count.
|
||||
* @param appendTo Output parameter to receive result.
|
||||
* Result is appended to existing contents.
|
||||
* @param status Field position status.
|
||||
* @param recursionProtection
|
||||
* Initially zero. Bits 0..9 are used to indicate
|
||||
* that a parameter has already been seen, to
|
||||
* avoid recursion. Currently unused.
|
||||
* @param success The error code status.
|
||||
* @return Reference to 'appendTo' parameter.
|
||||
* Thin wrapper around the format(... AppendableWrapper ...) variant.
|
||||
* Wraps the destination UnicodeString into an AppendableWrapper and
|
||||
* supplies default values for some other parameters.
|
||||
*/
|
||||
UnicodeString& format( const Formattable* arguments,
|
||||
int32_t cnt,
|
||||
UnicodeString& appendTo,
|
||||
FieldPosition& status,
|
||||
int32_t recursionProtection,
|
||||
UErrorCode& success) const;
|
||||
UnicodeString& format(const Formattable* arguments,
|
||||
const UnicodeString *argumentNames,
|
||||
int32_t cnt,
|
||||
UnicodeString& appendTo,
|
||||
FieldPosition* pos,
|
||||
UErrorCode& status) const;
|
||||
|
||||
UnicodeString& format( const Formattable* arguments,
|
||||
const UnicodeString *argumentNames,
|
||||
int32_t cnt,
|
||||
UnicodeString& appendTo,
|
||||
FieldPosition& status,
|
||||
int32_t recursionProtection,
|
||||
UErrorCode& success) const;
|
||||
/**
|
||||
* Formats the arguments and writes the result into the
|
||||
* AppendableWrapper, updates the field position.
|
||||
*
|
||||
* @param msgStart Index to msgPattern part to start formatting from.
|
||||
* @param pluralNumber Zero except when formatting a plural argument sub-message
|
||||
* where a '#' is replaced by the format string for this number.
|
||||
* @param arguments The formattable objects array. (Must not be NULL.)
|
||||
* @param argumentNames NULL if numbered values are used. Otherwise the same
|
||||
* length as "arguments", and each entry is the name of the
|
||||
* corresponding argument in "arguments".
|
||||
* @param cnt The length of arguments (and of argumentNames if that is not NULL).
|
||||
* @param appendTo Output parameter to receive the result.
|
||||
* The result string is appended to existing contents.
|
||||
* @param pos Field position status.
|
||||
* @param success The error code status.
|
||||
*/
|
||||
void format(int32_t msgStart,
|
||||
double pluralNumber,
|
||||
const Formattable* arguments,
|
||||
const UnicodeString *argumentNames,
|
||||
int32_t cnt,
|
||||
AppendableWrapper& appendTo,
|
||||
FieldPosition* pos,
|
||||
UErrorCode& success) const;
|
||||
|
||||
void makeFormat(int32_t offsetNumber,
|
||||
UnicodeString* segments,
|
||||
UParseError& parseError,
|
||||
UErrorCode& success);
|
||||
UnicodeString getArgName(int32_t partIndex);
|
||||
|
||||
void setArgStartFormat(int32_t argStart, Format* formatter, UErrorCode& status);
|
||||
|
||||
void setCustomArgStartFormat(int32_t argStart, Format* formatter, UErrorCode& status);
|
||||
|
||||
int32_t nextTopLevelArgStart(int32_t partIndex) const;
|
||||
|
||||
bool argNameMatches(int32_t partIndex, const UnicodeString& argName, int32_t argNumber);
|
||||
|
||||
void cacheExplicitFormats(UErrorCode& status);
|
||||
|
||||
Format* createAppropriateFormat(UnicodeString& type,
|
||||
UnicodeString& style,
|
||||
Formattable::Type& formattableType,
|
||||
UParseError& parseError,
|
||||
UErrorCode& ec);
|
||||
|
||||
const Formattable* getArgFromListByName(const Formattable* arguments,
|
||||
const UnicodeString *argumentNames,
|
||||
int32_t cnt, UnicodeString& name) const;
|
||||
|
||||
Formattable* parse(int32_t msgStart,
|
||||
const UnicodeString& source,
|
||||
ParsePosition& pos,
|
||||
int32_t& count,
|
||||
UErrorCode& ec) const;
|
||||
|
||||
FieldPosition* updateMetaData(AppendableWrapper& dest, int32_t prevLength,
|
||||
FieldPosition* fp, const Formattable* argId) const;
|
||||
|
||||
Format* getCachedFormatter(int32_t argumentNumber) const;
|
||||
|
||||
UnicodeString getLiteralStringUntilNextArgument(int32_t from) const;
|
||||
|
||||
void copyObjects(const MessageFormat& that, UErrorCode& ec);
|
||||
|
||||
void formatComplexSubMessage(int32_t msgStart,
|
||||
double pluralNumber,
|
||||
const Formattable* arguments,
|
||||
const UnicodeString *argumentNames,
|
||||
int32_t cnt,
|
||||
AppendableWrapper& appendTo,
|
||||
UErrorCode& success) const;
|
||||
|
||||
/**
|
||||
* Convenience method that ought to be in NumberFormat
|
||||
*/
|
||||
NumberFormat* createIntegerFormat(const Locale& locale, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Checks the range of the source text to quote the special
|
||||
* characters, { and ' and copy to target buffer.
|
||||
* @param source
|
||||
* @param start the text offset to start the process of in the source string
|
||||
* @param end the text offset to end the process of in the source string
|
||||
* @param appendTo Output parameter to receive result.
|
||||
* Result is appended to existing contents.
|
||||
*/
|
||||
static void copyAndFixQuotes(const UnicodeString& appendTo, int32_t start, int32_t end, UnicodeString& target);
|
||||
|
||||
/**
|
||||
* Returns array of argument types in the parsed pattern
|
||||
* for use in C API. Only for the use of umsg_vformat(). Not
|
||||
|
@ -937,11 +1047,25 @@ private:
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns FALSE if the argument name is not legal.
|
||||
* @param argName argument name.
|
||||
* @return TRUE if the argument name is legal, otherwise return FALSE.
|
||||
* Resets the internal MessagePattern, and other associated caches.
|
||||
*/
|
||||
UBool isLegalArgName(const UnicodeString& argName) const;
|
||||
void resetPattern();
|
||||
|
||||
// A DummyFormatter that we use solely to store a NULL value. UHash does
|
||||
// not support storing NULL values.
|
||||
class U_I18N_API DummyFormat : public Format {
|
||||
public:
|
||||
virtual UBool operator==(const Format&) const;
|
||||
virtual Format* clone() const;
|
||||
virtual UnicodeString& format(const Formattable&,
|
||||
UnicodeString& appendTo,
|
||||
FieldPosition&,
|
||||
UErrorCode& status) const;
|
||||
virtual void parseObject(const UnicodeString&,
|
||||
Formattable&,
|
||||
ParsePosition&) const;
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
};
|
||||
|
||||
friend class MessageFormatAdapter; // getFormatTypeList() access
|
||||
};
|
||||
|
@ -953,6 +1077,7 @@ MessageFormat::format(const Formattable& obj,
|
|||
return Format::format(obj, appendTo, status);
|
||||
}
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
*******************************************************************************
|
||||
* Copyright (C) 2007-2010, International Business Machines Corporation and
|
||||
* Copyright (C) 2007-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
*******************************************************************************
|
||||
*
|
||||
|
@ -25,6 +25,7 @@
|
|||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#include "unicode/messagepattern.h"
|
||||
#include "unicode/numfmt.h"
|
||||
#include "unicode/plurrule.h"
|
||||
|
||||
|
@ -37,7 +38,7 @@ class Hashtable;
|
|||
* <code>PluralFormat</code> supports the creation of internationalized
|
||||
* messages with plural inflection. It is based on <i>plural
|
||||
* selection</i>, i.e. the caller specifies messages for each
|
||||
* plural case that can appear in the users language and the
|
||||
* plural case that can appear in the user's language and the
|
||||
* <code>PluralFormat</code> selects the appropriate message based on
|
||||
* the number.
|
||||
* </p>
|
||||
|
@ -51,7 +52,7 @@ class Hashtable;
|
|||
* each message and selects the message whose interval contains a
|
||||
* given number. This can only handle a finite number of
|
||||
* intervals. But in some languages, like Polish, one plural case
|
||||
* applies to infinitely many intervals (e.g., paucal applies to
|
||||
* applies to infinitely many intervals (e.g., the plural case applies to
|
||||
* numbers ending with 2, 3, or 4 except those ending with 12, 13, or
|
||||
* 14). Thus <code>ChoiceFormat</code> is not adequate.
|
||||
* </p><p>
|
||||
|
@ -62,17 +63,20 @@ class Hashtable;
|
|||
* conditions for a plural case than just a single interval. These plural
|
||||
* rules define both what plural cases exist in a language, and to
|
||||
* which numbers these cases apply.
|
||||
* <li>It provides predefined plural rules for many locales. Thus, the programmer
|
||||
* need not worry about the plural cases of a language. On the flip side,
|
||||
* the localizer does not have to specify the plural cases; he can simply
|
||||
* <li>It provides predefined plural rules for many languages. Thus, the programmer
|
||||
* need not worry about the plural cases of a language and
|
||||
* does not have to define the plural cases; they can simply
|
||||
* use the predefined keywords. The whole plural formatting of messages can
|
||||
* be done using localized patterns from resource bundles. For predefined plural
|
||||
* rules, see CLDR <i>Language Plural Rules</i> page at
|
||||
* rules, see the CLDR <i>Language Plural Rules</i> page at
|
||||
* http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
|
||||
* </ul>
|
||||
* </p>
|
||||
* <h4>Usage of <code>PluralFormat</code></h4>
|
||||
* <p>
|
||||
* <p>Note: Typically, plural formatting is done via <code>MessageFormat</code>
|
||||
* with a <code>plural</code> argument type,
|
||||
* rather than using a stand-alone <code>PluralFormat</code>.
|
||||
* </p><p>
|
||||
* This discussion assumes that you use <code>PluralFormat</code> with
|
||||
* a predefined set of plural rules. You can create one using one of
|
||||
* the constructors that takes a <code>locale</code> object. To
|
||||
|
@ -85,82 +89,46 @@ class Hashtable;
|
|||
* <h5>Patterns and Their Interpretation</h5>
|
||||
* <p>
|
||||
* The pattern text defines the message output for each plural case of the
|
||||
* used locale. The pattern is a sequence of
|
||||
* <code><i>caseKeyword</i>{<i>message</i>}</code> clauses, separated by white
|
||||
* space characters. Each clause assigns the message <code><i>message</i></code>
|
||||
* to the plural case identified by <code><i>caseKeyword</i></code>.
|
||||
* </p><p>
|
||||
* There are 6 predefined casekeyword in ICU - 'zero', 'one', 'two', 'few', 'many' and
|
||||
* 'other'. You always have to define a message text for the default plural case
|
||||
* "<code>other</code>" which is contained in every rule set. If the plural
|
||||
* rules of the <code>PluralFormat</code> object do not contain a plural case
|
||||
* identified by <code><i>caseKeyword</i></code>, U_DEFAULT_KEYWORD_MISSING
|
||||
* will be set to status.
|
||||
* If you do not specify a message text for a particular plural case, the
|
||||
* message text of the plural case "<code>other</code>" gets assigned to this
|
||||
* plural case. If you specify more than one message for the same plural case,
|
||||
* U_DUPLICATE_KEYWORD will be set to status.
|
||||
* <br>
|
||||
* Spaces between <code><i>caseKeyword</i></code> and
|
||||
* <code><i>message</i></code> will be ignored; spaces within
|
||||
* <code><i>message</i></code> will be preserved.
|
||||
* </p><p>
|
||||
* The message text for a particular plural case may contain other message
|
||||
* format patterns. <code>PluralFormat</code> preserves these so that you
|
||||
* can use the strings produced by <code>PluralFormat</code> with other
|
||||
* formatters. If you are using <code>PluralFormat</code> inside a
|
||||
* <code>MessageFormat</code> pattern, <code>MessageFormat</code> will
|
||||
* automatically evaluate the resulting format pattern.<br>
|
||||
* Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed
|
||||
* in message texts to define a nested format pattern.<br>
|
||||
* The pound sign (<code>#</code>) will be interpreted as the number placeholder
|
||||
* in the message text, if it is not contained in curly braces (to preserve
|
||||
* <code>NumberFormat</code> patterns). <code>PluralFormat</code> will
|
||||
* replace each of those pound signs by the number passed to the
|
||||
* <code>format()</code> method. It will be formatted using a
|
||||
* <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you
|
||||
* need special number formatting, you have to explicitly specify a
|
||||
* <code>NumberFormat</code> for the <code>PluralFormat</code> to use.
|
||||
* </p>
|
||||
* Example
|
||||
* specified locale. Syntax:
|
||||
* <pre>
|
||||
* \code
|
||||
* UErrorCode status = U_ZERO_ERROR;
|
||||
* MessageFormat* msgFmt = new MessageFormat(UnicodeString("{0, plural,
|
||||
* one{{0, number, C''est #,##0.0# fichier}} other {Ce sont # fichiers}} dans la liste."),
|
||||
* Locale("fr"), status);
|
||||
* if (U_FAILURE(status)) {
|
||||
* return;
|
||||
* }
|
||||
* Formattable args1[] = {(int32_t)0};
|
||||
* Formattable args2[] = {(int32_t)3};
|
||||
* FieldPosition ignore(FieldPosition::DONT_CARE);
|
||||
* UnicodeString result;
|
||||
* msgFmt->format(args1, 1, result, ignore, status);
|
||||
* cout << result << endl;
|
||||
* result.remove();
|
||||
* msgFmt->format(args2, 1, result, ignore, status);
|
||||
* cout << result << endl;
|
||||
* \endcode
|
||||
* pluralStyle = [offsetValue] (selector '{' message '}')+
|
||||
* offsetValue = "offset:" number
|
||||
* selector = explicitValue | keyword
|
||||
* explicitValue = '=' number // adjacent, no white space in between
|
||||
* keyword = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+
|
||||
* message: see {@link MessageFormat}
|
||||
* </pre>
|
||||
* Produces the output:<br>
|
||||
* <code>C'est 0,0 fichier dans la liste.</code><br>
|
||||
* <code>Ce sont 3 fichiers dans la liste.</code>
|
||||
* <p>
|
||||
* <strong>Note:</strong><br>
|
||||
* Currently <code>PluralFormat</code>
|
||||
* does not make use of quotes like <code>MessageFormat</code>.
|
||||
* If you use plural format strings with <code>MessageFormat</code> and want
|
||||
* to use a quote sign <code>'</code>, you have to write <code>''</code>.
|
||||
* <code>MessageFormat</code> unquotes this pattern and passes the unquoted
|
||||
* pattern to <code>PluralFormat</code>. It's a bit trickier if you use
|
||||
* nested formats that do quoting. In the example above, we wanted to insert
|
||||
* <code>'</code> in the number format pattern. Since
|
||||
* <code>NumberFormat</code> supports quotes, we had to insert
|
||||
* <code>''</code>. But since <code>MessageFormat</code> unquotes the
|
||||
* pattern before it gets passed to <code>PluralFormat</code>, we have to
|
||||
* double these quotes, i.e. write <code>''''</code>.
|
||||
* Pattern_White_Space between syntax elements is ignored, except
|
||||
* between the {curly braces} and their sub-message,
|
||||
* and between the '=' and the number of an explicitValue.
|
||||
*
|
||||
* </p><p>
|
||||
* There are 6 predefined casekeyword in CLDR/ICU - 'zero', 'one', 'two', 'few', 'many' and
|
||||
* 'other'. You always have to define a message text for the default plural case
|
||||
* <code>other</code> which is contained in every rule set.
|
||||
* If you do not specify a message text for a particular plural case, the
|
||||
* message text of the plural case <code>other</code> gets assigned to this
|
||||
* plural case.
|
||||
* </p><p>
|
||||
* When formatting, the input number is first matched against the explicitValue clauses.
|
||||
* If there is no exact-number match, then a keyword is selected by calling
|
||||
* the <code>PluralRules</code> with the input number <em>minus the offset</em>.
|
||||
* (The offset defaults to 0 if it is omitted from the pattern string.)
|
||||
* If there is no clause with that keyword, then the "other" clauses is returned.
|
||||
* </p><p>
|
||||
* An unquoted pound sign (<code>#</code>) in the selected sub-message
|
||||
* itself (i.e., outside of arguments nested in the sub-message)
|
||||
* is replaced by the input number minus the offset.
|
||||
* The number-minus-offset value is formatted using a
|
||||
* <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you
|
||||
* need special number formatting, you have to use a <code>MessageFormat</code>
|
||||
* and explicitly specify a <code>NumberFormat</code> argument.
|
||||
* <strong>Note:</strong> That argument is formatting without subtracting the offset!
|
||||
* If you need a custom format and have a non-zero offset, then you need to pass the
|
||||
* number-minus-offset value as a separate parameter.
|
||||
* </p>
|
||||
* For a usage example, see the {@link MessageFormat} class documentation.
|
||||
*
|
||||
* <h4>Defining Custom Plural Rules</h4>
|
||||
* <p>If you need to use <code>PluralFormat</code> with custom rules, you can
|
||||
* create a <code>PluralRules</code> object and pass it to
|
||||
|
@ -511,34 +479,63 @@ public:
|
|||
*/
|
||||
virtual UClassID getDynamicClassID() const;
|
||||
|
||||
private:
|
||||
typedef enum fmtToken {
|
||||
none,
|
||||
tLetter,
|
||||
tNumber,
|
||||
tSpace,
|
||||
tNumberSign,
|
||||
tLeftBrace,
|
||||
tRightBrace
|
||||
}fmtToken;
|
||||
private:
|
||||
|
||||
class PluralSelector {
|
||||
public:
|
||||
/**
|
||||
* Given a number, returns the appropriate PluralFormat keyword.
|
||||
*
|
||||
* @param number The number to be plural-formatted.
|
||||
* @param ec Error code.
|
||||
* @return The selected PluralFormat keyword.
|
||||
*/
|
||||
virtual UnicodeString select(double number, UErrorCode& ec) const = 0;
|
||||
};
|
||||
|
||||
class PluralSelectorAdapter : public PluralSelector {
|
||||
public:
|
||||
PluralSelectorAdapter() : pluralRules(NULL) {
|
||||
}
|
||||
|
||||
virtual ~PluralSelectorAdapter();
|
||||
|
||||
virtual UnicodeString select(double number, UErrorCode& /*ec*/) const;
|
||||
|
||||
void reset();
|
||||
|
||||
PluralRules* pluralRules;
|
||||
};
|
||||
|
||||
Locale locale;
|
||||
PluralRules* pluralRules;
|
||||
UnicodeString pattern;
|
||||
Hashtable *fParsedValuesHash;
|
||||
MessagePattern msgPattern;
|
||||
NumberFormat* numberFormat;
|
||||
NumberFormat* replacedNumberFormat;
|
||||
double offset;
|
||||
PluralSelectorAdapter pluralRulesWrapper;
|
||||
|
||||
PluralFormat(); // default constructor not implemented
|
||||
void init(const PluralRules* rules, const Locale& curlocale, UErrorCode& status);
|
||||
UBool inRange(UChar ch, fmtToken& type);
|
||||
UBool checkSufficientDefinition();
|
||||
void parsingFailure();
|
||||
UnicodeString insertFormattedNumber(double number,
|
||||
UnicodeString& message,
|
||||
UnicodeString& appendTo,
|
||||
FieldPosition& pos) const;
|
||||
void copyHashtable(Hashtable *other, UErrorCode& status);
|
||||
void init(const PluralRules* rules, UErrorCode& status);
|
||||
/**
|
||||
* Copies dynamically allocated values (pointer fields).
|
||||
* Others are copied using their copy constructors and assignment operators.
|
||||
*/
|
||||
void copyObjects(const PluralFormat& other);
|
||||
|
||||
/**
|
||||
* Finds the PluralFormat sub-message for the given number, or the "other" sub-message.
|
||||
* @param pattern A MessagePattern.
|
||||
* @param partIndex the index of the first PluralFormat argument style part.
|
||||
* @param selector the PluralSelector for mapping the number (minus offset) to a keyword.
|
||||
* @param number a number to be matched to one of the PluralFormat argument's explicit values,
|
||||
* or mapped via the PluralSelector.
|
||||
* @param ec ICU error code.
|
||||
* @return the sub-message start part index.
|
||||
*/
|
||||
static int32_t findSubMessage(
|
||||
const MessagePattern& pattern, int32_t partIndex,
|
||||
const PluralSelector& selector, double number, UErrorCode& ec);
|
||||
|
||||
friend class MessageFormat;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -84,15 +84,18 @@ class PluralKeywordEnumeration;
|
|||
* \endcode
|
||||
* </pre></p>
|
||||
* <p>
|
||||
* The difference between 'in' and 'within' is that 'in' only includes
|
||||
* integers in the specified range, while 'within' includes all values.</p>
|
||||
* <p>
|
||||
* Keywords
|
||||
* could be defined by users or from ICU locale data. There are 6
|
||||
* predefined values in ICU - 'zero', 'one', 'two', 'few', 'many' and
|
||||
* 'other'. Callers need to check the value of keyword returned by
|
||||
* {@link #select} method.
|
||||
* </p>
|
||||
* An "identifier" is a sequence of characters that do not have the
|
||||
* Unicode Pattern_Syntax or Pattern_White_Space properties.
|
||||
* <p>
|
||||
* The difference between 'in' and 'within' is that 'in' only includes
|
||||
* integers in the specified range, while 'within' includes all values.</p>
|
||||
* <p>
|
||||
* Keywords
|
||||
* could be defined by users or from ICU locale data. There are 6
|
||||
* predefined values in ICU - 'zero', 'one', 'two', 'few', 'many' and
|
||||
* 'other'. Callers need to check the value of keyword returned by
|
||||
* {@link #select} method.
|
||||
* </p>
|
||||
*
|
||||
* Examples:<pre>
|
||||
* UnicodeString keyword = pl->select(number);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2010, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
* Copyright (C) 2010 , Yahoo! Inc.
|
||||
********************************************************************
|
||||
|
@ -16,8 +16,9 @@
|
|||
#ifndef SELFMT
|
||||
#define SELFMT
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/messagepattern.h"
|
||||
#include "unicode/numfmt.h"
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
/**
|
||||
* \file
|
||||
|
@ -28,7 +29,7 @@
|
|||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class Hashtable;
|
||||
class MessageFormat;
|
||||
|
||||
/**
|
||||
* <p><code>SelectFormat</code> supports the creation of internationalized
|
||||
|
@ -40,6 +41,10 @@ class Hashtable;
|
|||
*
|
||||
* <h4>Using <code>SelectFormat</code> for Gender Agreement</h4>
|
||||
*
|
||||
* <p>Note: Typically, select formatting is done via <code>MessageFormat</code>
|
||||
* with a <code>select</code> argument type,
|
||||
* rather than using a stand-alone <code>SelectFormat</code>.</p>
|
||||
*
|
||||
* <p>The main use case for the select format is gender based inflection.
|
||||
* When names or nouns are inserted into sentences, their gender can affect pronouns,
|
||||
* verb forms, articles, and adjectives. Special care needs to be
|
||||
|
@ -73,6 +78,9 @@ class Hashtable;
|
|||
* but similar in grammatical use.
|
||||
* Some African languages have around 20 noun classes.</p>
|
||||
*
|
||||
* <p><b>Note:</b>For the gender of a <i>person</i> in a given sentence,
|
||||
* we usually need to distinguish only between female, male and other/unknown.</p>
|
||||
*
|
||||
* <p>To enable localizers to create sentence patterns that take their
|
||||
* language's gender dependencies into consideration, software has to provide
|
||||
* information about the gender associated with a noun or name to
|
||||
|
@ -81,8 +89,8 @@ class Hashtable;
|
|||
*
|
||||
* <ul>
|
||||
* <li>For people, natural gender information should be maintained for each person.
|
||||
* The keywords "male", "female", "mixed" (for groups of people)
|
||||
* and "unknown" are used.
|
||||
* Keywords like "male", "female", "mixed" (for groups of people)
|
||||
* and "unknown" could be used.
|
||||
*
|
||||
* <li>For nouns, grammatical gender information should be maintained for
|
||||
* each noun and per language, e.g., in resource bundles.
|
||||
|
@ -100,6 +108,11 @@ class Hashtable;
|
|||
*
|
||||
* <pre>{0} went to {2}.</pre>
|
||||
*
|
||||
* <p><b>Note:</b> The entire sentence should be included (and partially repeated)
|
||||
* inside each phrase. Otherwise translators would have to be trained on how to
|
||||
* move bits of the sentence in and out of the select argument of a message.
|
||||
* (The examples below do not follow this recommendation!)</p>
|
||||
*
|
||||
* <p>The sentence pattern for French, where the gender of the person affects
|
||||
* the form of the participle, uses a select format based on argument 1:</p>
|
||||
*
|
||||
|
@ -121,39 +134,24 @@ class Hashtable;
|
|||
*
|
||||
* <h4>Patterns and Their Interpretation</h4>
|
||||
*
|
||||
* <p>The <code>SelectFormat</code> pattern text defines the phrase output
|
||||
* <p>The <code>SelectFormat</code> pattern string defines the phrase output
|
||||
* for each user-defined keyword.
|
||||
* The pattern is a sequence of <code><i>keyword</i>{<i>phrase</i>}</code>
|
||||
* clauses.
|
||||
* Each clause assigns the phrase <code><i>phrase</i></code>
|
||||
* to the user-defined <code><i>keyword</i></code>.</p>
|
||||
* The pattern is a sequence of (keyword, message) pairs.
|
||||
* A keyword is a "pattern identifier": [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+</p>
|
||||
*
|
||||
* <p>Keywords must match the pattern [a-zA-Z][a-zA-Z0-9_-]*; keywords
|
||||
* that don't match this pattern result in the error code
|
||||
* <code>U_ILLEGAL_CHARACTER</code>.
|
||||
* You always have to define a phrase for the default keyword
|
||||
* <p>Each message is a MessageFormat pattern string enclosed in {curly braces}.</p>
|
||||
*
|
||||
* <p>You always have to define a phrase for the default keyword
|
||||
* <code>other</code>; this phrase is returned when the keyword
|
||||
* provided to
|
||||
* the <code>format</code> method matches no other keyword.
|
||||
* If a pattern does not provide a phrase for <code>other</code>, the method
|
||||
* it's provided to returns the error <code>U_DEFAULT_KEYWORD_MISSING</code>.
|
||||
* If a pattern provides more than one phrase for the same keyword, the
|
||||
* error <code>U_DUPLICATE_KEYWORD</code> is returned.
|
||||
* <br>
|
||||
* Spaces between <code><i>keyword</i></code> and
|
||||
* <code>{<i>phrase</i>}</code> will be ignored; spaces within
|
||||
* <code>{<i>phrase</i>}</code> will be preserved.<p>
|
||||
* Pattern_White_Space between keywords and messages is ignored.
|
||||
* Pattern_White_Space within a message is preserved and output.</p>
|
||||
*
|
||||
* <p>The phrase for a particular select case may contain other message
|
||||
* format patterns. <code>SelectFormat</code> preserves these so that you
|
||||
* can use the strings produced by <code>SelectFormat</code> with other
|
||||
* formatters. If you are using <code>SelectFormat</code> inside a
|
||||
* <code>MessageFormat</code> pattern, <code>MessageFormat</code> will
|
||||
* automatically evaluate the resulting format pattern.
|
||||
* Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed
|
||||
* in phrases to define a nested format pattern.</p>
|
||||
*
|
||||
* <p>Example:
|
||||
* <p><pre>Example:
|
||||
* \htmlonly
|
||||
*
|
||||
* UErrorCode status = U_ZERO_ERROR;
|
||||
|
@ -342,31 +340,22 @@ public:
|
|||
virtual UClassID getDynamicClassID() const;
|
||||
|
||||
private:
|
||||
typedef enum classesForSelectFormat{
|
||||
tStartKeyword,
|
||||
tContinueKeyword,
|
||||
tLeftBrace,
|
||||
tRightBrace,
|
||||
tSpace,
|
||||
tOther
|
||||
}CharacterClass;
|
||||
|
||||
UnicodeString pattern;
|
||||
//Hash to store the keyword, phrase pairs.
|
||||
Hashtable *parsedValuesHash;
|
||||
friend class MessageFormat;
|
||||
|
||||
SelectFormat(); // default constructor not implemented.
|
||||
void initHashTable(UErrorCode &status);
|
||||
void cleanHashTable();
|
||||
|
||||
//For the applyPattern , classifies char.s in one of the characterClass.
|
||||
CharacterClass classifyCharacter(UChar ch) const;
|
||||
//Checks if the "other" keyword is present in pattern.
|
||||
UBool checkSufficientDefinition();
|
||||
//Checks if the keyword passed is valid.
|
||||
UBool checkValidKeyword(const UnicodeString& argKeyword) const;
|
||||
void parsingFailure();
|
||||
void copyHashtable(Hashtable *other, UErrorCode& status);
|
||||
/**
|
||||
* Finds the SelectFormat sub-message for the given keyword, or the "other" sub-message.
|
||||
* @param pattern A MessagePattern.
|
||||
* @param partIndex the index of the first SelectFormat argument style part.
|
||||
* @param keyword a keyword to be matched to one of the SelectFormat argument's keywords.
|
||||
* @param ec Error code.
|
||||
* @return the sub-message start part index.
|
||||
*/
|
||||
static int32_t findSubMessage(const MessagePattern& pattern, int32_t partIndex,
|
||||
const UnicodeString& keyword, UErrorCode& ec);
|
||||
|
||||
MessagePattern msgPattern;
|
||||
};
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2010, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
* Copyright (C) 2010 , Yahoo! Inc.
|
||||
********************************************************************
|
||||
|
@ -13,7 +13,6 @@
|
|||
* Change history:
|
||||
*
|
||||
* 08/5/2001 Ram Added C wrappers for C++ API.
|
||||
*
|
||||
********************************************************************/
|
||||
|
||||
#ifndef UMSG_H
|
||||
|
@ -27,19 +26,30 @@
|
|||
#include "unicode/uloc.h"
|
||||
#include "unicode/parseerr.h"
|
||||
#include <stdarg.h>
|
||||
|
||||
/**
|
||||
* \file
|
||||
* \brief C API: MessageFormat
|
||||
*
|
||||
* <h2>Message Format C API </h2>
|
||||
* <h2>MessageFormat C API </h2>
|
||||
*
|
||||
* Provides means to produce concatenated messages in language-neutral way.
|
||||
* Use this for all concatenations that show up to end users.
|
||||
* <P>
|
||||
* Takes a set of objects, formats them, then inserts the formatted
|
||||
* strings into the pattern at the appropriate places.
|
||||
* <P>
|
||||
* Here are some examples of usage:
|
||||
* <p>MessageFormat prepares strings for display to users,
|
||||
* with optional arguments (variables/placeholders).
|
||||
* The arguments can occur in any order, which is necessary for translation
|
||||
* into languages with different grammars.
|
||||
*
|
||||
* <p>The opaque UMessageFormat type is a thin C wrapper around
|
||||
* a C++ MessageFormat. It is constructed from a <em>pattern</em> string
|
||||
* with arguments in {curly braces} which will be replaced by formatted values.
|
||||
*
|
||||
* <p>Currently, the C API supports only numbered arguments.
|
||||
*
|
||||
* <p>For details about the pattern syntax and behavior,
|
||||
* especially about the ASCII apostrophe vs. the
|
||||
* real apostrophe (single quote) character \htmlonly’\endhtmlonly (U+2019),
|
||||
* see the C++ MessageFormat class documentation.
|
||||
*
|
||||
* <p>Here are some examples of C API usage:
|
||||
* Example 1:
|
||||
* <pre>
|
||||
* \code
|
||||
|
@ -143,102 +153,6 @@
|
|||
* }
|
||||
* \endcode
|
||||
* </pre>
|
||||
*
|
||||
|
||||
* The pattern is of the following form. Legend:
|
||||
* <pre>
|
||||
* \code
|
||||
* {optional item}
|
||||
* (group that may be repeated)*
|
||||
* \endcode
|
||||
* </pre>
|
||||
* Do not confuse optional items with items inside quotes braces, such
|
||||
* as this: "{". Quoted braces are literals.
|
||||
* <pre>
|
||||
* \code
|
||||
* messageFormatPattern := string ( "{" messageFormatElement "}" string )*
|
||||
*
|
||||
* messageFormatElement := argument { "," elementFormat }
|
||||
*
|
||||
* elementFormat := "time" { "," datetimeStyle }
|
||||
* | "date" { "," datetimeStyle }
|
||||
* | "number" { "," numberStyle }
|
||||
* | "choice" "," choiceStyle
|
||||
* | "select" "," selectStyle
|
||||
*
|
||||
* datetimeStyle := "short"
|
||||
* | "medium"
|
||||
* | "long"
|
||||
* | "full"
|
||||
* | dateFormatPattern
|
||||
*
|
||||
* numberStyle := "currency"
|
||||
* | "percent"
|
||||
* | "integer"
|
||||
* | numberFormatPattern
|
||||
*
|
||||
* choiceStyle := choiceFormatPattern
|
||||
*
|
||||
* selectStyle := selectFormatPattern
|
||||
* \endcode
|
||||
* </pre>
|
||||
* If there is no elementFormat, then the argument must be a string,
|
||||
* which is substituted. If there is no dateTimeStyle or numberStyle,
|
||||
* then the default format is used (e.g. NumberFormat.getInstance(),
|
||||
* DateFormat.getDefaultTime() or DateFormat.getDefaultDate(). For
|
||||
* a ChoiceFormat, the pattern must always be specified, since there
|
||||
* is no default.
|
||||
* <P>
|
||||
* In strings, single quotes can be used to quote the "{" sign if
|
||||
* necessary. A real single quote is represented by ''. Inside a
|
||||
* messageFormatElement, quotes are [not] removed. For example,
|
||||
* {1,number,$'#',##} will produce a number format with the pound-sign
|
||||
* quoted, with a result such as: "$#31,45".
|
||||
* <P>
|
||||
* If a pattern is used, then unquoted braces in the pattern, if any,
|
||||
* must match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab
|
||||
* {0'}' de" and "ab } de" are not.
|
||||
* <p>
|
||||
* <dl><dt><b>Warning:</b><dd>The rules for using quotes within message
|
||||
* format patterns unfortunately have shown to be somewhat confusing.
|
||||
* In particular, it isn't always obvious to localizers whether single
|
||||
* quotes need to be doubled or not. Make sure to inform localizers about
|
||||
* the rules, and tell them (for example, by using comments in resource
|
||||
* bundle source files) which strings will be processed by MessageFormat.
|
||||
* Note that localizers may need to use single quotes in translated
|
||||
* strings where the original version doesn't have them.
|
||||
* <br>Note also that the simplest way to avoid the problem is to
|
||||
* use the real apostrophe (single quote) character U+2019 (') for
|
||||
* human-readable text, and to use the ASCII apostrophe (U+0027 ' )
|
||||
* only in program syntax, like quoting in MessageFormat.
|
||||
* See the annotations for U+0027 Apostrophe in The Unicode Standard.</p>
|
||||
* </dl>
|
||||
* <P>
|
||||
* The argument is a number from 0 to 9, which corresponds to the
|
||||
* arguments presented in an array to be formatted.
|
||||
* <P>
|
||||
* It is ok to have unused arguments in the array. With missing
|
||||
* arguments or arguments that are not of the right class for the
|
||||
* specified format, a failing UErrorCode result is set.
|
||||
* <P>
|
||||
|
||||
* <P>
|
||||
* [Note:] As we see above, the string produced by a choice Format in
|
||||
* MessageFormat is treated specially; occurances of '{' are used to
|
||||
* indicated subformats.
|
||||
* <P>
|
||||
* [Note:] Formats are numbered by order of variable in the string.
|
||||
* This is [not] the same as the argument numbering!
|
||||
* <pre>
|
||||
* \code
|
||||
* For example: with "abc{2}def{3}ghi{0}...",
|
||||
*
|
||||
* format0 affects the first variable {2}
|
||||
* format1 affects the second variable {3}
|
||||
* format2 affects the second variable {0}
|
||||
* \endcode
|
||||
* </pre>
|
||||
* and so on.
|
||||
*/
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2010, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************
|
||||
*
|
||||
|
@ -736,8 +736,14 @@ static void TestMsgFormatChoice(void)
|
|||
str=(UChar*)malloc(sizeof(UChar) * 25);
|
||||
u_uastrcpy(str, "MyDisk");
|
||||
log_verbose("Testing message format with choice test #6\n:");
|
||||
/*There {0,choice,0#are no files|1#is one file|1<are {0,number,integer} files}.*/
|
||||
u_uastrcpy(pattern, "The disk {1} contains {0,choice,0#no files|1#one file|1<{0,number,integer} files}");
|
||||
/*
|
||||
* Before ICU 4.8, umsg_xxx() did not detect conflicting argument types,
|
||||
* and this pattern had {0,number,integer} as the inner argument.
|
||||
* The choice argument has kDouble type while {0,number,integer} has kLong (int32_t).
|
||||
* ICU 4.8 and above detects this as an error.
|
||||
* We changed this pattern to work as intended.
|
||||
*/
|
||||
u_uastrcpy(pattern, "The disk {1} contains {0,choice,0#no files|1#one file|1<{0,number} files}");
|
||||
u_uastrcpy(expected, "The disk MyDisk contains 100 files");
|
||||
resultlength=0;
|
||||
resultLengthOut=u_formatMessage( "en_US", pattern, u_strlen(pattern), NULL, resultlength, &status, 100., str);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/***********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2009, International Business Machines Corporation
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation
|
||||
* and others. All Rights Reserved.
|
||||
***********************************************************************/
|
||||
|
||||
|
@ -25,36 +25,33 @@
|
|||
|
||||
#define CASE(id,test) case id: name = #test; if (exec) { logln(#test "---"); logln((UnicodeString)""); test(); } break;
|
||||
|
||||
void
|
||||
void
|
||||
MessageFormatRegressionTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
|
||||
{
|
||||
// if (exec) logln((UnicodeString)"TestSuite MessageFormatRegressionTest");
|
||||
switch (index) {
|
||||
CASE(0,Test4074764)
|
||||
CASE(1,Test4058973)
|
||||
CASE(2,Test4031438)
|
||||
CASE(3,Test4052223)
|
||||
CASE(4,Test4104976)
|
||||
CASE(5,Test4106659)
|
||||
CASE(6,Test4106660)
|
||||
CASE(7,Test4111739)
|
||||
CASE(8,Test4114743)
|
||||
CASE(9,Test4116444)
|
||||
CASE(10,Test4114739)
|
||||
CASE(11,Test4113018)
|
||||
CASE(12,Test4106661)
|
||||
CASE(13,Test4094906)
|
||||
CASE(14,Test4118592)
|
||||
CASE(15,Test4118594)
|
||||
CASE(16,Test4105380)
|
||||
CASE(17,Test4120552)
|
||||
CASE(18,Test4142938)
|
||||
CASE(19,TestChoicePatternQuote)
|
||||
CASE(20,Test4112104)
|
||||
CASE(21,TestAPI)
|
||||
|
||||
default: name = ""; break;
|
||||
}
|
||||
TESTCASE_AUTO_BEGIN;
|
||||
TESTCASE_AUTO(Test4074764)
|
||||
//TESTCASE_AUTO(Test4058973) -- disabled/obsolete in ICU 4.8
|
||||
TESTCASE_AUTO(Test4031438)
|
||||
TESTCASE_AUTO(Test4052223)
|
||||
TESTCASE_AUTO(Test4104976)
|
||||
TESTCASE_AUTO(Test4106659)
|
||||
TESTCASE_AUTO(Test4106660)
|
||||
TESTCASE_AUTO(Test4111739)
|
||||
TESTCASE_AUTO(Test4114743)
|
||||
TESTCASE_AUTO(Test4116444)
|
||||
TESTCASE_AUTO(Test4114739)
|
||||
TESTCASE_AUTO(Test4113018)
|
||||
TESTCASE_AUTO(Test4106661)
|
||||
TESTCASE_AUTO(Test4094906)
|
||||
TESTCASE_AUTO(Test4118592)
|
||||
TESTCASE_AUTO(Test4118594)
|
||||
TESTCASE_AUTO(Test4105380)
|
||||
TESTCASE_AUTO(Test4120552)
|
||||
TESTCASE_AUTO(Test4142938)
|
||||
TESTCASE_AUTO(TestChoicePatternQuote)
|
||||
TESTCASE_AUTO(Test4112104)
|
||||
TESTCASE_AUTO(TestAPI)
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
UBool
|
||||
|
@ -149,8 +146,13 @@ void MessageFormatRegressionTest::Test4074764() {
|
|||
|
||||
/* @bug 4058973
|
||||
* MessageFormat.toPattern has weird rounding behavior.
|
||||
*
|
||||
* ICU 4.8: This test is commented out because toPattern() has been changed to return
|
||||
* the original pattern string, rather than reconstituting a new (equivalent) one.
|
||||
* This trivially eliminates issues with rounding or any other pattern string differences.
|
||||
*/
|
||||
void MessageFormatRegressionTest::Test4058973()
|
||||
/*
|
||||
void MessageFormatRegressionTest::Test4058973()
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
MessageFormat *fmt = new MessageFormat("{0,choice,0#no files|1#one file|1< {0,number,integer} files}", status);
|
||||
|
@ -166,7 +168,7 @@ void MessageFormatRegressionTest::Test4058973()
|
|||
}
|
||||
|
||||
delete fmt;
|
||||
}
|
||||
}*/
|
||||
/* @bug 4031438
|
||||
* More robust message formats.
|
||||
*/
|
||||
|
@ -258,7 +260,7 @@ void MessageFormatRegressionTest::Test4031438()
|
|||
failure(status, "messageFormatter->applyPattern", possibleDataError);
|
||||
tempBuffer.remove();
|
||||
tempBuffer = messageFormatter->format(params, 1, tempBuffer, pos, status);
|
||||
if (tempBuffer != "Double ' Quotes 7 test and quoted {1} test plus other {2} stuff.")
|
||||
if (tempBuffer != "Double ' Quotes 7 test and quoted {1} test plus 'other {2} stuff'.")
|
||||
dataerrln("quote format test (w/ params) failed. - %s", u_errorName(status));
|
||||
logln("Formatted with params : " + tempBuffer);
|
||||
|
||||
|
@ -911,12 +913,21 @@ void MessageFormatRegressionTest::Test4142938()
|
|||
*/
|
||||
void MessageFormatRegressionTest::TestChoicePatternQuote()
|
||||
{
|
||||
// ICU 4.8 ChoiceFormat (like PluralFormat & SelectFormat)
|
||||
// returns the chosen string unmodified, so that it is usable in a MessageFormat.
|
||||
// We modified the test strings accordingly.
|
||||
// Note: Without further formatting/trimming/etc., it is not possible
|
||||
// to get a single apostrophe as the last character of a non-final choice sub-message
|
||||
// because the single apostrophe before the pipe '|' would start quoted text.
|
||||
// Normally, ChoiceFormat is used inside a MessageFormat, where a double apostrophe
|
||||
// can be used in that case and will be formatted as a single one.
|
||||
// (Better: Use a "real" apostrophe, U+2019.)
|
||||
UnicodeString DATA [] = {
|
||||
// Pattern 0 value 1 value
|
||||
// {sfb} hacked - changed \u2264 to = (copied from Character Map)
|
||||
(UnicodeString)"0#can''t|1#can", (UnicodeString)"can't", (UnicodeString)"can",
|
||||
(UnicodeString)"0#'pound(#)=''#'''|1#xyz", (UnicodeString)"pound(#)='#'", (UnicodeString)"xyz",
|
||||
(UnicodeString)"0#'1<2 | 1=1'|1#''", (UnicodeString)"1<2 | 1=1", (UnicodeString)"'",
|
||||
"0#can't|1#can", "can't", "can",
|
||||
"0#pound(#)='#''|1#xyz", "pound(#)='#''", "xyz",
|
||||
"0#1<2 '| 1=1'|1#'", "1<2 '| 1=1'", "'",
|
||||
};
|
||||
for (int i=0; i<9; i+=3) {
|
||||
//try {
|
||||
|
@ -929,7 +940,7 @@ void MessageFormatRegressionTest::TestChoicePatternQuote()
|
|||
out = cf->format((double)j, out, pos);
|
||||
if (out != DATA[i+1+j])
|
||||
errln("Fail: Pattern \"" + DATA[i] + "\" x "+j+" -> " +
|
||||
out + "; want \"" + DATA[i+1+j] + '"');
|
||||
out + "; want \"" + DATA[i+1+j] + "\"");
|
||||
}
|
||||
UnicodeString pat;
|
||||
pat = cf->toPattern(pat);
|
||||
|
@ -937,9 +948,9 @@ void MessageFormatRegressionTest::TestChoicePatternQuote()
|
|||
ChoiceFormat *cf2 = new ChoiceFormat(pat, status);
|
||||
pat2 = cf2->toPattern(pat2);
|
||||
if (pat != pat2)
|
||||
errln("Fail: Pattern \"" + DATA[i] + "\" x toPattern -> \"" + pat + '"');
|
||||
errln("Fail: Pattern \"" + DATA[i] + "\" x toPattern -> \"" + pat + "\"");
|
||||
else
|
||||
logln("Ok: Pattern \"" + DATA[i] + "\" x toPattern -> \"" + pat + '"');
|
||||
logln("Ok: Pattern \"" + DATA[i] + "\" x toPattern -> \"" + pat + "\"");
|
||||
/*}
|
||||
catch (IllegalArgumentException e) {
|
||||
errln("Fail: Pattern \"" + DATA[i] + "\" -> " + e);
|
||||
|
@ -980,12 +991,12 @@ void MessageFormatRegressionTest::TestAPI() {
|
|||
|
||||
// Test adoptFormat
|
||||
MessageFormat *fmt = new MessageFormat("",status);
|
||||
format->adoptFormat("",fmt,status);
|
||||
format->adoptFormat("some_name",fmt,status); // Must at least pass a valid identifier.
|
||||
failure(status, "adoptFormat");
|
||||
|
||||
// Test getFormat
|
||||
format->setFormat((int32_t)0,*fmt);
|
||||
format->getFormat("",status);
|
||||
format->getFormat("some_other_name",status); // Must at least pass a valid identifier.
|
||||
failure(status, "getFormat");
|
||||
delete format;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 2007-2010, International Business Machines Corporation and
|
||||
* Copyright (c) 2007-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
|
@ -35,6 +35,8 @@ void PluralFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n
|
|||
TESTCASE(0, pluralFormatBasicTest);
|
||||
TESTCASE(1, pluralFormatUnitTest);
|
||||
TESTCASE(2, pluralFormatLocaleTest);
|
||||
TESTCASE(3, pluralFormatExtendedTest);
|
||||
TESTCASE(4, pluralFormatExtendedParseTest);
|
||||
default: name = "";
|
||||
break;
|
||||
}
|
||||
|
@ -159,26 +161,28 @@ void PluralFormatTest::pluralFormatUnitTest(/*char *par*/)
|
|||
UNICODE_STRING_SIMPLE("odd {# is odd.} other{# is even.}"),
|
||||
UNICODE_STRING_SIMPLE("other{# is odd or even.}"),
|
||||
UNICODE_STRING_SIMPLE("odd{The number {0, number, #.#0} is odd.}other{The number {0, number, #.#0} is even.}"),
|
||||
UNICODE_STRING_SIMPLE("odd{The number {#} is odd.}other{The number {#} is even.}"),
|
||||
UNICODE_STRING_SIMPLE("odd{The number {1, number, #} is odd.}other{The number {2, number, #} is even.}"),
|
||||
};
|
||||
UnicodeString patternOddTestResult[PLURAL_PATTERN_DATA] = {
|
||||
UNICODE_STRING_SIMPLE(" is odd."),
|
||||
UNICODE_STRING_SIMPLE(" is odd or even."),
|
||||
UNICODE_STRING_SIMPLE("The number {0, number, #.#0} is odd."),
|
||||
UNICODE_STRING_SIMPLE("The number {#} is odd."),
|
||||
UNICODE_STRING_SIMPLE("The number {1, number, #} is odd."),
|
||||
};
|
||||
UnicodeString patternEvenTestResult[PLURAL_PATTERN_DATA] = {
|
||||
UNICODE_STRING_SIMPLE(" is even."),
|
||||
UNICODE_STRING_SIMPLE(" is odd or even."),
|
||||
UNICODE_STRING_SIMPLE("The number {0, number, #.#0} is even."),
|
||||
UNICODE_STRING_SIMPLE("The number {#} is even."),
|
||||
UNICODE_STRING_SIMPLE("The number {2, number, #} is even."),
|
||||
};
|
||||
UnicodeString checkSyntaxtData[PLURAL_SYNTAX_DATA] = {
|
||||
UNICODE_STRING_SIMPLE("odd{foo} odd{bar} other{foobar}"),
|
||||
UNICODE_STRING_SIMPLE("odd{foo} other{bar} other{foobar}"),
|
||||
// ICU 4.8 does not check for duplicate keywords any more.
|
||||
//UNICODE_STRING_SIMPLE("odd{foo} odd{bar} other{foobar}"),
|
||||
//UNICODE_STRING_SIMPLE("odd{foo} other{bar} other{foobar}"),
|
||||
UNICODE_STRING_SIMPLE("odd{foo}"),
|
||||
UNICODE_STRING_SIMPLE("otto{foo} other{bar}"),
|
||||
UNICODE_STRING_SIMPLE("1odd{foo} other{bar}"),
|
||||
// ICU 4.8 does not check for unknown keywords any more.
|
||||
//UNICODE_STRING_SIMPLE("otto{foo} other{bar}"),
|
||||
UNICODE_STRING_SIMPLE("*odd{foo} other{bar}"),
|
||||
UNICODE_STRING_SIMPLE("odd{foo},other{bar}"),
|
||||
UNICODE_STRING_SIMPLE("od d{foo} other{bar}"),
|
||||
UNICODE_STRING_SIMPLE("odd{foo}{foobar}other{foo}"),
|
||||
|
@ -264,7 +268,7 @@ void PluralFormatTest::pluralFormatUnitTest(/*char *par*/)
|
|||
}
|
||||
numberFormatTest(&pluralFmt, numFmt, 5, 5, NULL, NULL, FALSE, &message);
|
||||
pluralFmt.applyPattern(UNICODE_STRING_SIMPLE("odd__{odd} other{even}"), status);
|
||||
if (U_SUCCESS(status)) {
|
||||
if (pluralFmt.format(1, status) != UNICODE_STRING_SIMPLE("even")) {
|
||||
errln("SetLocale should reset rules but did not.");
|
||||
}
|
||||
status = U_ZERO_ERROR;
|
||||
|
@ -491,6 +495,73 @@ PluralFormatTest::pluralFormatLocaleTest(/*char *par*/)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
PluralFormatTest::pluralFormatExtendedTest(void) {
|
||||
const char *targets[] = {
|
||||
"There are no widgets.",
|
||||
"There is one widget.",
|
||||
"There is a bling widget and one other widget.",
|
||||
"There is a bling widget and 2 other widgets.",
|
||||
"There is a bling widget and 3 other widgets.",
|
||||
"Widgets, five (5-1=4) there be.",
|
||||
"There is a bling widget and 5 other widgets.",
|
||||
"There is a bling widget and 6 other widgets.",
|
||||
};
|
||||
|
||||
const char* fmt =
|
||||
"offset:1.0 "
|
||||
"=0 {There are no widgets.} "
|
||||
"=1.0 {There is one widget.} "
|
||||
"=5 {Widgets, five (5-1=#) there be.} "
|
||||
"one {There is a bling widget and one other widget.} "
|
||||
"other {There is a bling widget and # other widgets.}";
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeString fmtString(fmt, -1, US_INV);
|
||||
PluralFormat pf(fmtString, status);
|
||||
if (U_FAILURE(status)) {
|
||||
errln("Failed to apply pattern - %s\n", u_errorName(status));
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < 7; ++i) {
|
||||
UnicodeString result = pf.format(i, status);
|
||||
if (U_FAILURE(status)) {
|
||||
errln("Failed to format - %s\n", u_errorName(status));
|
||||
}
|
||||
UnicodeString expected(targets[i], -1, US_INV);
|
||||
if (expected != result) {
|
||||
UnicodeString message("Expected '", -1, US_INV);
|
||||
message.append(expected);
|
||||
message.append(UnicodeString("' but got '", -1, US_INV));
|
||||
message.append(result);
|
||||
message.append("'", -1, US_INV);
|
||||
errln(message);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
PluralFormatTest::pluralFormatExtendedParseTest(void) {
|
||||
const char *failures[] = {
|
||||
"offset:1..0 =0 {Foo}",
|
||||
"offset:1.0 {Foo}",
|
||||
"=0= {Foo}",
|
||||
"=0 {Foo} =0.0 {Bar}",
|
||||
" = {Foo}",
|
||||
};
|
||||
int len = sizeof(failures)/sizeof(failures[0]);
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeString fmt(failures[i], -1, US_INV);
|
||||
PluralFormat pf(fmt, status);
|
||||
if (U_SUCCESS(status)) {
|
||||
errln("expected failure when parsing '" + fmt + "'");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
PluralFormatTest::numberFormatTest(PluralFormat* plFmt,
|
||||
NumberFormat *numFmt,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2001, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
|
@ -29,6 +29,8 @@ private:
|
|||
void pluralFormatBasicTest(/* char* par */);
|
||||
void pluralFormatUnitTest(/* char* par */);
|
||||
void pluralFormatLocaleTest(/* char* par */);
|
||||
void pluralFormatExtendedTest();
|
||||
void pluralFormatExtendedParseTest();
|
||||
void numberFormatTest(PluralFormat* plFmt,
|
||||
NumberFormat *numFmt,
|
||||
int32_t start,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2010, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
* Copyright (C) 2010 , Yahoo! Inc.
|
||||
********************************************************************/
|
||||
|
@ -12,7 +12,8 @@
|
|||
#include "selfmts.h"
|
||||
#include "cmemory.h"
|
||||
#include "unicode/selfmt.h"
|
||||
#include "stdio.h"
|
||||
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
#define SIMPLE_PATTERN_STRING "feminine {feminineVerbValue} other{otherVerbValue}"
|
||||
|
||||
|
@ -81,10 +82,8 @@ void SelectFormatTest::selectFormatUnitTest(/*char *par*/)
|
|||
};
|
||||
|
||||
UnicodeString checkSyntaxData[SELECT_SYNTAX_DATA] = {
|
||||
UNICODE_STRING_SIMPLE("odd{foo} odd{bar} other{foobar}"),
|
||||
UNICODE_STRING_SIMPLE("odd{foo} other{bar} other{foobar}"),
|
||||
UNICODE_STRING_SIMPLE("odd{foo}"),
|
||||
UNICODE_STRING_SIMPLE("1odd{foo} other{bar}"),
|
||||
UNICODE_STRING_SIMPLE("*odd{foo} other{bar}"),
|
||||
UNICODE_STRING_SIMPLE("odd{foo},other{bar}"),
|
||||
UNICODE_STRING_SIMPLE("od d{foo} other{bar}"),
|
||||
UNICODE_STRING_SIMPLE("odd{foo}{foobar}other{foo}"),
|
||||
|
@ -93,19 +92,6 @@ void SelectFormatTest::selectFormatUnitTest(/*char *par*/)
|
|||
UNICODE_STRING_SIMPLE("odd{fo{o1}other{foo2}}")
|
||||
};
|
||||
|
||||
UErrorCode expErrorCodes[SELECT_SYNTAX_DATA]={
|
||||
U_DUPLICATE_KEYWORD,
|
||||
U_DUPLICATE_KEYWORD,
|
||||
U_DEFAULT_KEYWORD_MISSING,
|
||||
U_PATTERN_SYNTAX_ERROR,
|
||||
U_PATTERN_SYNTAX_ERROR,
|
||||
U_PATTERN_SYNTAX_ERROR,
|
||||
U_PATTERN_SYNTAX_ERROR,
|
||||
U_PATTERN_SYNTAX_ERROR,
|
||||
U_PATTERN_SYNTAX_ERROR,
|
||||
U_DEFAULT_KEYWORD_MISSING
|
||||
};
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
VERBOSE_USTRING(SIMPLE_PATTERN);
|
||||
SelectFormat* selFmt = new SelectFormat( SIMPLE_PATTERN , status);
|
||||
|
@ -113,7 +99,7 @@ void SelectFormatTest::selectFormatUnitTest(/*char *par*/)
|
|||
dataerrln("ERROR: SelectFormat Unit Test constructor failed in unit tests.- exitting");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// ======= Test SelectFormat pattern syntax.
|
||||
logln("SelectFormat Unit Test : Testing SelectFormat pattern syntax.");
|
||||
for (int32_t i=0; i<SELECT_SYNTAX_DATA; ++i) {
|
||||
|
@ -121,11 +107,23 @@ void SelectFormatTest::selectFormatUnitTest(/*char *par*/)
|
|||
VERBOSE_INT(i);
|
||||
VERBOSE_USTRING(checkSyntaxData[i]);
|
||||
selFmt->applyPattern(checkSyntaxData[i], status);
|
||||
if( status!= expErrorCodes[i] ){
|
||||
errln("\nERROR: Unexpected result - SelectFormat Unit Test failed to detect syntax error with pattern: "+checkSyntaxData[i]+" and expected status="+ u_errorName(expErrorCodes[i]) + " and resulted status="+u_errorName(status));
|
||||
if (U_SUCCESS(status)){
|
||||
errln("\nERROR: Unexpected result - SelectFormat Unit Test failed to detect syntax error with pattern: "+checkSyntaxData[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// ICU 4.8 does not check for duplicate keywords any more.
|
||||
status = U_ZERO_ERROR;
|
||||
selFmt->applyPattern("odd{foo} odd{bar} other{foobar}", status);
|
||||
FieldPosition format_ignore(FieldPosition::DONT_CARE);
|
||||
UnicodeString format_result;
|
||||
selFmt->format(UnicodeString("odd"), format_result, format_ignore, status);
|
||||
assertEquals("should use first occurrence of the 'odd' keyword", "foo", format_result);
|
||||
format_result.remove();
|
||||
selFmt->applyPattern("odd{foo} other{bar} other{foobar}", status);
|
||||
selFmt->format(UnicodeString("other"), format_result, format_ignore, status);
|
||||
assertEquals("should use first occurrence of the 'other' keyword", "bar", format_result);
|
||||
|
||||
delete selFmt;
|
||||
selFmt = NULL;
|
||||
|
||||
|
@ -166,27 +164,31 @@ void SelectFormatTest::selectFormatUnitTest(/*char *par*/)
|
|||
}
|
||||
|
||||
//Test with an invalid keyword
|
||||
// one which contains Pattern_Syntax or Pattern_White_Space.
|
||||
logln("SelectFormat Unit test: Testing format() with keyword method and with invalid keywords...");
|
||||
status = U_ZERO_ERROR;
|
||||
result.remove();
|
||||
UnicodeString keywords[] = {
|
||||
"9Keyword-_", //Starts with a digit
|
||||
"-Keyword-_", //Starts with a hyphen
|
||||
"_Keyword-_", //Starts with a underscore
|
||||
"\\u00E9Keyword-_", //Starts with non-ASCII character
|
||||
"Key*word-_", //Contains a sepial character not allowed
|
||||
"*Keyword-_" //Starts with a sepial character not allowed
|
||||
"9Keyword-_",
|
||||
"-Keyword-_",
|
||||
"_Keyword-_",
|
||||
"\\u00E9Keyword-_",
|
||||
"Key word-_",
|
||||
" Keyword-_",
|
||||
"Key*word-_",
|
||||
"*Keyword-_"
|
||||
};
|
||||
|
||||
delete selFmt;
|
||||
selFmt = NULL;
|
||||
|
||||
selFmt = new SelectFormat( SIMPLE_PATTERN , status);
|
||||
for (int32_t i = 0; i< 6; i++ ){
|
||||
for (int32_t i = 0; i < LENGTHOF(keywords); i++ ){
|
||||
status = U_ZERO_ERROR;
|
||||
selFmt->format( keywords[i], result , ignore , status);
|
||||
if (!U_FAILURE(status)) {
|
||||
errln("ERROR: SelectFormat Unit test failed in format() with keyWord and with an invalid keyword as : "+ keywords[i]);
|
||||
errln("ERROR: SelectFormat Unit test failed in format() with keyWord and with an invalid keyword as : "+
|
||||
keywords[i]+" ("+u_errorName(status)+")");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2009, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
|
@ -80,11 +80,23 @@ TestChoiceFormat::TestSimpleExample( void )
|
|||
}
|
||||
delete formequal;
|
||||
delete formnew;
|
||||
|
||||
|
||||
//Testing getLimits()
|
||||
double *gotLimits=0;
|
||||
int32_t count=0;
|
||||
gotLimits=(double*)form->getLimits(count);
|
||||
const double *gotLimits=form->getLimits(count);
|
||||
#if 1 // ICU 4.8 deprecates and disables the ChoiceFormat getters.
|
||||
if(count != 0 || gotLimits != NULL) {
|
||||
errln("getLimits() returns something, should be disabled");
|
||||
}
|
||||
const UnicodeString *gotFormats=form->getFormats(count);
|
||||
if(count != 0 || gotFormats != NULL) {
|
||||
errln("getFormats() returns something, should be disabled");
|
||||
}
|
||||
const UBool *gotClosures=form->getClosures(count);
|
||||
if(count != 0 || gotClosures != NULL) {
|
||||
errln("getClosures() returns something, should be disabled");
|
||||
}
|
||||
#else
|
||||
if(count != 7){
|
||||
errln("getLimits didn't update the count correctly\n");
|
||||
}
|
||||
|
@ -93,10 +105,9 @@ TestChoiceFormat::TestSimpleExample( void )
|
|||
errln((UnicodeString)"getLimits didn't get the limits correctly. Expected " + limits[ix] + " Got " + gotLimits[ix]);
|
||||
}
|
||||
}
|
||||
//Testing getFormat()
|
||||
//Testing getFormats()
|
||||
count=0;
|
||||
UnicodeString *gotFormats=0;
|
||||
gotFormats=(UnicodeString*)form->getFormats(count);
|
||||
const UnicodeString *gotFormats=form->getFormats(count);
|
||||
if(count != 7){
|
||||
errln("getFormats didn't update the count correctly\n");
|
||||
}
|
||||
|
@ -105,10 +116,9 @@ TestChoiceFormat::TestSimpleExample( void )
|
|||
errln((UnicodeString)"getFormats didn't get the Formats correctly. Expected " + monthNames[ix] + " Got " + gotFormats[ix]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
delete form;
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -216,6 +226,7 @@ TestChoiceFormat::TestComplexExample( void )
|
|||
it_logln("------ additional testing in complex test ------");
|
||||
it_logln();
|
||||
//
|
||||
#if 0 // ICU 4.8 deprecates and disables the ChoiceFormat getters.
|
||||
int32_t retCount;
|
||||
const double* retLimits = fileform->getLimits( retCount );
|
||||
if ((retCount == 4) && (retLimits)
|
||||
|
@ -238,6 +249,7 @@ TestChoiceFormat::TestComplexExample( void )
|
|||
}else{
|
||||
it_errln("*** getFormats unexpected result!");
|
||||
}
|
||||
#endif
|
||||
|
||||
UnicodeString checkstr2[] = {
|
||||
"There is no folder on Disk_A",
|
||||
|
@ -486,6 +498,7 @@ void TestChoiceFormat::TestClosures(void) {
|
|||
errln("FAIL: fmt1 != fmt2");
|
||||
}
|
||||
|
||||
#if 0 // ICU 4.8 deprecates and disables the ChoiceFormat getters.
|
||||
int32_t i;
|
||||
int32_t count2 = 0;
|
||||
const double *limits2 = fmt2.getLimits(count2);
|
||||
|
@ -507,6 +520,7 @@ void TestChoiceFormat::TestClosures(void) {
|
|||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Now test both format objects
|
||||
UnicodeString exp[] = {
|
||||
|
@ -596,6 +610,7 @@ void TestChoiceFormat::TestPatterns(void) {
|
|||
1.0, "b",
|
||||
1.0 + 1e-9, "c");
|
||||
|
||||
#if 0 // ICU 4.8 only checks the pattern syntax, not whether the ranges make sense.
|
||||
// Try an invalid pattern that isolates a single value.
|
||||
// [-Inf,1.0) [1.0,1.0) [1.0,+Inf]
|
||||
_testPattern("0.0#a|1.0#b|1.0#c", FALSE,
|
||||
|
@ -614,6 +629,7 @@ void TestChoiceFormat::TestPatterns(void) {
|
|||
// [-Inf,2.0) [2.0,1.0) [1.0,+Inf]
|
||||
_testPattern("0.0#a|2.0#b|1.0#c", FALSE,
|
||||
0, 0, 0, 0, 0, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
void TestChoiceFormat::TestChoiceFormatToPatternOverflow()
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2010, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************
|
||||
* File TMSGFMT.CPP
|
||||
|
@ -25,43 +25,46 @@
|
|||
#include "unicode/msgfmt.h"
|
||||
#include "unicode/numfmt.h"
|
||||
#include "unicode/choicfmt.h"
|
||||
#include "unicode/messagepattern.h"
|
||||
#include "unicode/selfmt.h"
|
||||
#include "unicode/gregocal.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#define E_WITH_ACUTE ((char)0x00E9)
|
||||
static const char E_ACCENTED[]={E_WITH_ACUTE,0};
|
||||
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
|
||||
|
||||
void
|
||||
TestMessageFormat::runIndexedTest(int32_t index, UBool exec,
|
||||
const char* &name, char* /*par*/) {
|
||||
switch (index) {
|
||||
TESTCASE(0,testBug1);
|
||||
TESTCASE(1,testBug2);
|
||||
TESTCASE(2,sample);
|
||||
TESTCASE(3,PatternTest);
|
||||
TESTCASE(4,testStaticFormat);
|
||||
TESTCASE(5,testSimpleFormat);
|
||||
TESTCASE(6,testMsgFormatChoice);
|
||||
TESTCASE(7,testCopyConstructor);
|
||||
TESTCASE(8,testAssignment);
|
||||
TESTCASE(9,testClone);
|
||||
TESTCASE(10,testEquals);
|
||||
TESTCASE(11,testNotEquals);
|
||||
TESTCASE(12,testSetLocale);
|
||||
TESTCASE(13,testFormat);
|
||||
TESTCASE(14,testParse);
|
||||
TESTCASE(15,testAdopt);
|
||||
TESTCASE(16,testCopyConstructor2);
|
||||
TESTCASE(17,TestUnlimitedArgsAndSubformats);
|
||||
TESTCASE(18,TestRBNF);
|
||||
TESTCASE(19,TestTurkishCasing);
|
||||
TESTCASE(20,testAutoQuoteApostrophe);
|
||||
TESTCASE(21,testMsgFormatPlural);
|
||||
TESTCASE(22,testCoverage);
|
||||
TESTCASE(23,testMsgFormatSelect);
|
||||
default: name = ""; break;
|
||||
}
|
||||
TESTCASE_AUTO_BEGIN;
|
||||
TESTCASE_AUTO(testBug1);
|
||||
TESTCASE_AUTO(testBug2);
|
||||
TESTCASE_AUTO(sample);
|
||||
TESTCASE_AUTO(PatternTest);
|
||||
TESTCASE_AUTO(testStaticFormat);
|
||||
TESTCASE_AUTO(testSimpleFormat);
|
||||
TESTCASE_AUTO(testMsgFormatChoice);
|
||||
TESTCASE_AUTO(testCopyConstructor);
|
||||
TESTCASE_AUTO(testAssignment);
|
||||
TESTCASE_AUTO(testClone);
|
||||
TESTCASE_AUTO(testEquals);
|
||||
TESTCASE_AUTO(testNotEquals);
|
||||
TESTCASE_AUTO(testSetLocale);
|
||||
TESTCASE_AUTO(testFormat);
|
||||
TESTCASE_AUTO(testParse);
|
||||
TESTCASE_AUTO(testAdopt);
|
||||
TESTCASE_AUTO(testCopyConstructor2);
|
||||
TESTCASE_AUTO(TestUnlimitedArgsAndSubformats);
|
||||
TESTCASE_AUTO(TestRBNF);
|
||||
TESTCASE_AUTO(TestTurkishCasing);
|
||||
TESTCASE_AUTO(testAutoQuoteApostrophe);
|
||||
TESTCASE_AUTO(testMsgFormatPlural);
|
||||
TESTCASE_AUTO(testMsgFormatSelect);
|
||||
TESTCASE_AUTO(testApostropheInPluralAndSelect);
|
||||
TESTCASE_AUTO(TestApostropheMode);
|
||||
TESTCASE_AUTO(TestCompatibleApostrophe);
|
||||
TESTCASE_AUTO(testCoverage);
|
||||
TESTCASE_AUTO(TestTrimArgumentName);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
void TestMessageFormat::testBug3()
|
||||
|
@ -261,7 +264,9 @@ void TestMessageFormat::PatternTest()
|
|||
"'{1,number,#,##}' {1,number,#,##}",
|
||||
};
|
||||
|
||||
UnicodeString testResultPatterns[] = {
|
||||
// ICU 4.8 returns the original pattern (testCases),
|
||||
// rather than toPattern() reconstituting a new, equivalent pattern string (testResultPatterns).
|
||||
/*UnicodeString testResultPatterns[] = {
|
||||
"Quotes '', '{', a {0} '{'0}",
|
||||
"Quotes '', '{', a {0,number} '{'0}",
|
||||
"'{'1,number,#,##} {1,number,'#'#,##}",
|
||||
|
@ -271,12 +276,12 @@ void TestMessageFormat::PatternTest()
|
|||
"'{'1,date,full}, {1,date,full},",
|
||||
"'{'3,date,full}, {3,date,full},",
|
||||
"'{'1,number,#,##} {1,number,#,##}"
|
||||
};
|
||||
};*/
|
||||
|
||||
UnicodeString testResultStrings[] = {
|
||||
"Quotes ', {, a 1 {0}",
|
||||
"Quotes ', {, a 1 {0}",
|
||||
"{1,number,#,##} #34,56",
|
||||
"Quotes ', {, 'a' 1 {0}",
|
||||
"Quotes ', {, 'a' 1 {0}",
|
||||
"{1,number,'#',##} #34,56",
|
||||
"There are 3,456 files on Disk at 1/12/70 5:46 AM.",
|
||||
"On Disk, there are 3,456 files, with $1.00.",
|
||||
"{1,number,percent}, 345,600%,",
|
||||
|
@ -298,11 +303,17 @@ void TestMessageFormat::PatternTest()
|
|||
logln(((UnicodeString)"MessageFormat for ") + testCases[i] + " creation failed.\n");
|
||||
continue;
|
||||
}
|
||||
if (form->toPattern(buffer) != testResultPatterns[i]) {
|
||||
// ICU 4.8 returns the original pattern (testCases),
|
||||
// rather than toPattern() reconstituting a new, equivalent pattern string (testResultPatterns).
|
||||
if (form->toPattern(buffer) != testCases[i]) {
|
||||
// Note: An alternative test would be to build MessagePattern objects for
|
||||
// both the input and output patterns and compare them, taking SKIP_SYNTAX etc.
|
||||
// into account.
|
||||
// (Too much trouble...)
|
||||
errln(UnicodeString("TestMessageFormat::PatternTest failed test #2, i = ") + i);
|
||||
//form->toPattern(buffer);
|
||||
errln(((UnicodeString)" Orig: ") + testCases[i]);
|
||||
errln(((UnicodeString)" Exp: ") + testResultPatterns[i]);
|
||||
errln(((UnicodeString)" Exp: ") + testCases[i]);
|
||||
errln(((UnicodeString)" Got: ") + buffer);
|
||||
}
|
||||
|
||||
|
@ -322,7 +333,7 @@ void TestMessageFormat::PatternTest()
|
|||
logln(UnicodeString(" Result: ") + result );
|
||||
logln(UnicodeString(" Expected: ") + testResultStrings[i] );
|
||||
}
|
||||
|
||||
|
||||
|
||||
//it_out << "Result: " << result);
|
||||
#if 0
|
||||
|
@ -534,7 +545,7 @@ void TestMessageFormat::testMsgFormatPlural(/* char* par */)
|
|||
UnicodeString t2("{argument, plural, one{C''est # fichier} other {Ce sont # fichiers}} dans la liste.");
|
||||
UnicodeString t3("There {0, plural, one{is # zavod}few{are {0, number,###.0} zavoda} other{are # zavodov}} in the directory.");
|
||||
UnicodeString t4("There {argument, plural, one{is # zavod}few{are {argument, number,###.0} zavoda} other{are #zavodov}} in the directory.");
|
||||
UnicodeString t5("{0, plural, one {{0, number,C''''est #,##0.0# fichier}} other {Ce sont # fichiers}} dans la liste.");
|
||||
UnicodeString t5("{0, plural, one {{0, number,C''est #,##0.0# fichier}} other {Ce sont # fichiers}} dans la liste.");
|
||||
MessageFormat* mfNum = new MessageFormat(t1, Locale("fr"), err);
|
||||
if (U_FAILURE(err)) {
|
||||
dataerrln("TestMessageFormat::testMsgFormatPlural #1 - argumentIndex - %s", u_errorName(err));
|
||||
|
@ -611,15 +622,32 @@ void TestMessageFormat::testMsgFormatPlural(/* char* par */)
|
|||
errln("TestMessageFormat::test nested PluralFormat with argumentName");
|
||||
}
|
||||
if ( argNameResult!= UnicodeString("C'est 0,0 fichier dans la liste.")) {
|
||||
errln(UnicodeString("TestMessageFormat::test nested named PluralFormat."));
|
||||
errln(UnicodeString("TestMessageFormat::test nested named PluralFormat: ") + argNameResult);
|
||||
logln(UnicodeString("The unexpected nested named PluralFormat."));
|
||||
}
|
||||
delete msgFmt;
|
||||
}
|
||||
|
||||
void TestMessageFormat::testApostropheInPluralAndSelect() {
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
MessageFormat msgFmt(UNICODE_STRING_SIMPLE(
|
||||
"abc_{0,plural,other{#'#'#'{'#''}}_def_{1,select,other{sel'}'ect''}}_xyz"),
|
||||
Locale::getEnglish(),
|
||||
errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
errln("MessageFormat constructor failed - %s\n", u_errorName(errorCode));
|
||||
return;
|
||||
}
|
||||
UnicodeString expected = UNICODE_STRING_SIMPLE("abc_3#3{3'_def_sel}ect'_xyz");
|
||||
Formattable args[] = { 3, UNICODE_STRING_SIMPLE("x") };
|
||||
internalFormat(
|
||||
&msgFmt, args, 2, expected,
|
||||
"MessageFormat with apostrophes in plural/select arguments failed:\n");
|
||||
}
|
||||
|
||||
void TestMessageFormat::internalFormat(MessageFormat* msgFmt ,
|
||||
Formattable* args , int32_t numOfArgs ,
|
||||
UnicodeString expected ,char* errMsg)
|
||||
UnicodeString expected, const char* errMsg)
|
||||
{
|
||||
UnicodeString result;
|
||||
FieldPosition ignore(FieldPosition::DONT_CARE);
|
||||
|
@ -1236,7 +1264,12 @@ void TestMessageFormat::testAdopt()
|
|||
}
|
||||
|
||||
assertEquals("msgCmp.toPattern()", formatStr, msgCmp.toPattern(patCmp.remove()));
|
||||
assertEquals("msg.toPattern()", formatStr, msg.toPattern(patAct.remove()));
|
||||
// ICU 4.8 does not support toPattern() when there are custom formats (from setFormat() etc.).
|
||||
// assertEquals("msg.toPattern()", formatStr, msg.toPattern(patAct.remove()));
|
||||
msg.toPattern(patCmp.remove());
|
||||
if (!patCmp.isBogus()) {
|
||||
errln("msg.setFormat().toPattern() succeeds.");
|
||||
}
|
||||
|
||||
for (i = 0; i < countAct; i++) {
|
||||
a = formatsAct[i];
|
||||
|
@ -1279,7 +1312,8 @@ void TestMessageFormat::testAdopt()
|
|||
delete[] formatsToAdopt;
|
||||
|
||||
assertEquals("msgCmp.toPattern()", formatStr, msgCmp.toPattern(patCmp.remove()));
|
||||
assertEquals("msg.toPattern()", formatStr, msg.toPattern(patAct.remove()));
|
||||
// ICU 4.8 does not support toPattern() when there are custom formats (from setFormat() etc.).
|
||||
// assertEquals("msg.toPattern()", formatStr, msg.toPattern(patAct.remove()));
|
||||
|
||||
formatsAct = msg.getFormats(countAct);
|
||||
if (!formatsAct || (countAct <=0) || (countAct != countCmp)) {
|
||||
|
@ -1330,7 +1364,8 @@ void TestMessageFormat::testAdopt()
|
|||
delete[] formatsToAdopt; // array itself not needed in this case;
|
||||
|
||||
assertEquals("msgCmp.toPattern()", formatStr, msgCmp.toPattern(patCmp.remove()));
|
||||
assertEquals("msg.toPattern()", formatStr, msg.toPattern(patAct.remove()));
|
||||
// ICU 4.8 does not support toPattern() when there are custom formats (from setFormat() etc.).
|
||||
// assertEquals("msg.toPattern()", formatStr, msg.toPattern(patAct.remove()));
|
||||
|
||||
formatsAct = msg.getFormats(countAct);
|
||||
if (!formatsAct || (countAct <=0) || (countAct != countCmp)) {
|
||||
|
@ -1519,6 +1554,116 @@ void TestMessageFormat::TestRBNF(void) {
|
|||
delete numFmt;
|
||||
}
|
||||
|
||||
UnicodeString TestMessageFormat::GetPatternAndSkipSyntax(const MessagePattern& pattern) {
|
||||
UnicodeString us(pattern.getPatternString());
|
||||
int count = pattern.countParts();
|
||||
for (int i = count; i > 0;) {
|
||||
const MessagePattern::Part& part = pattern.getPart(--i);
|
||||
if (part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
|
||||
us.remove(part.getIndex(), part.getLimit() - part.getIndex());
|
||||
}
|
||||
}
|
||||
return us;
|
||||
}
|
||||
|
||||
void TestMessageFormat::TestApostropheMode() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
MessagePattern *ado_mp = new MessagePattern(UMSGPAT_APOS_DOUBLE_OPTIONAL, status);
|
||||
MessagePattern *adr_mp = new MessagePattern(UMSGPAT_APOS_DOUBLE_REQUIRED, status);
|
||||
if (ado_mp->getApostropheMode() != UMSGPAT_APOS_DOUBLE_OPTIONAL) {
|
||||
errln("wrong value from ado_mp->getApostropheMode().");
|
||||
}
|
||||
if (adr_mp->getApostropheMode() != UMSGPAT_APOS_DOUBLE_REQUIRED) {
|
||||
errln("wrong value from adr_mp->getApostropheMode().");
|
||||
}
|
||||
|
||||
|
||||
UnicodeString tuples[] = {
|
||||
// Desired output
|
||||
// DOUBLE_OPTIONAL pattern
|
||||
// DOUBLE_REQUIRED pattern (empty=same as DOUBLE_OPTIONAL)
|
||||
"I see {many}", "I see '{many}'", "",
|
||||
"I said {'Wow!'}", "I said '{''Wow!''}'", "",
|
||||
"I dont know", "I dont know", "I don't know",
|
||||
"I don't know", "I don't know", "I don''t know",
|
||||
"I don't know", "I don''t know", "I don''t know"
|
||||
};
|
||||
int32_t tuples_count = LENGTHOF(tuples);
|
||||
|
||||
for (int i = 0; i < tuples_count; i += 3) {
|
||||
UnicodeString& desired = tuples[i];
|
||||
UnicodeString& ado_pattern = tuples[i + 1];
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
assertEquals("DOUBLE_OPTIONAL failure",
|
||||
desired,
|
||||
GetPatternAndSkipSyntax(ado_mp->parse(ado_pattern, NULL, status)));
|
||||
UnicodeString& adr_pattern = tuples[i + 2].isEmpty() ? ado_pattern : tuples[i + 2];
|
||||
assertEquals("DOUBLE_REQUIRED failure", desired,
|
||||
GetPatternAndSkipSyntax(adr_mp->parse(adr_pattern, NULL, status)));
|
||||
}
|
||||
delete adr_mp;
|
||||
delete ado_mp;
|
||||
}
|
||||
|
||||
|
||||
// Compare behavior of DOUBLE_OPTIONAL (new default) and DOUBLE_REQUIRED JDK-compatibility mode.
|
||||
void TestMessageFormat::TestCompatibleApostrophe() {
|
||||
// Message with choice argument which does not contain another argument.
|
||||
// The JDK performs only one apostrophe-quoting pass on this pattern.
|
||||
UnicodeString pattern = "ab{0,choice,0#1'2''3'''4''''.}yz";
|
||||
|
||||
UErrorCode ec = U_ZERO_ERROR;
|
||||
MessageFormat compMsg("", Locale::getUS(), ec);
|
||||
compMsg.applyPattern(pattern, UMSGPAT_APOS_DOUBLE_REQUIRED, NULL, ec);
|
||||
if (compMsg.getApostropheMode() != UMSGPAT_APOS_DOUBLE_REQUIRED) {
|
||||
errln("wrong value from compMsg.getApostropheMode().");
|
||||
}
|
||||
|
||||
MessageFormat icuMsg("", Locale::getUS(), ec);
|
||||
icuMsg.applyPattern(pattern, UMSGPAT_APOS_DOUBLE_OPTIONAL, NULL, ec);
|
||||
if (icuMsg.getApostropheMode() != UMSGPAT_APOS_DOUBLE_OPTIONAL) {
|
||||
errln("wrong value from icuMsg.getApostropheMode().");
|
||||
}
|
||||
|
||||
Formattable zero0[] = { 0 };
|
||||
FieldPosition fieldpos(0);
|
||||
UnicodeString buffer1, buffer2;
|
||||
assertEquals("incompatible ICU MessageFormat compatibility-apostrophe behavior",
|
||||
"ab12'3'4''.yz",
|
||||
compMsg.format(zero0, 1, buffer1, fieldpos, ec));
|
||||
assertEquals("unexpected ICU MessageFormat double-apostrophe-optional behavior",
|
||||
"ab1'2'3''4''.yz",
|
||||
icuMsg.format(zero0, 1, buffer2, fieldpos, ec));
|
||||
|
||||
// Message with choice argument which contains a nested simple argument.
|
||||
// The DOUBLE_REQUIRED version performs two apostrophe-quoting passes.
|
||||
buffer1.remove();
|
||||
buffer2.remove();
|
||||
pattern = "ab{0,choice,0#1'2''3'''4''''.{0,number,'#x'}}yz";
|
||||
compMsg.applyPattern(pattern, ec);
|
||||
icuMsg.applyPattern(pattern, ec);
|
||||
assertEquals("incompatible ICU MessageFormat compatibility-apostrophe behavior",
|
||||
"ab1234'.0xyz",
|
||||
compMsg.format(zero0, 1, buffer1, fieldpos, ec));
|
||||
assertEquals("unexpected ICU MessageFormat double-apostrophe-optional behavior",
|
||||
"ab1'2'3''4''.#x0yz",
|
||||
icuMsg.format(zero0, 1, buffer2, fieldpos, ec));
|
||||
|
||||
// This part is copied over from Java tests but cannot be properly tested here
|
||||
// because we do not have a live reference implementation with JDK behavior.
|
||||
// The JDK ChoiceFormat itself always performs one apostrophe-quoting pass.
|
||||
/*
|
||||
ChoiceFormat choice = new ChoiceFormat("0#1'2''3'''4''''.");
|
||||
assertEquals("unexpected JDK ChoiceFormat apostrophe behavior",
|
||||
"12'3'4''.",
|
||||
choice.format(0));
|
||||
choice.applyPattern("0#1'2''3'''4''''.{0,number,'#x'}");
|
||||
assertEquals("unexpected JDK ChoiceFormat apostrophe behavior",
|
||||
"12'3'4''.{0,number,#x}",
|
||||
choice.format(0));
|
||||
*/
|
||||
}
|
||||
|
||||
void TestMessageFormat::testAutoQuoteApostrophe(void) {
|
||||
const char* patterns[] = { // pattern, expected pattern
|
||||
"'", "''",
|
||||
|
@ -1595,7 +1740,10 @@ void TestMessageFormat::testCoverage(void) {
|
|||
}
|
||||
}
|
||||
|
||||
msgfmt->adoptFormat("adopt", &cf, status);
|
||||
// adoptFormat() takes ownership of the input Format object.
|
||||
// We need to clone the stack-allocated cf so that we do not attempt to delete cf.
|
||||
Format *cfClone = cf.clone();
|
||||
msgfmt->adoptFormat("adopt", cfClone, status);
|
||||
|
||||
delete en;
|
||||
delete msgfmt;
|
||||
|
@ -1609,18 +1757,38 @@ void TestMessageFormat::testCoverage(void) {
|
|||
errln("FAIL: Unable to detect usage of named arguments.");
|
||||
}
|
||||
|
||||
// Starting with ICU 4.8, we support setFormat(name, ...) and getFormatNames()
|
||||
// on a MessageFormat without named arguments.
|
||||
msgfmt->setFormat("formatName", cf, status);
|
||||
if (!U_FAILURE(status)) {
|
||||
errln("FAIL: Should fail to setFormat instead of passing.");
|
||||
if (U_FAILURE(status)) {
|
||||
errln("FAIL: Should work to setFormat(name, ...) regardless of pattern.");
|
||||
}
|
||||
status = U_ZERO_ERROR;
|
||||
en = msgfmt->getFormatNames(status);
|
||||
if (!U_FAILURE(status)) {
|
||||
errln("FAIL: Should fail to get format names enumeration instead of passing.");
|
||||
if (U_FAILURE(status)) {
|
||||
errln("FAIL: Should work to get format names enumeration regardless of pattern.");
|
||||
}
|
||||
|
||||
delete en;
|
||||
delete msgfmt;
|
||||
}
|
||||
|
||||
void TestMessageFormat::TestTrimArgumentName() {
|
||||
// ICU 4.8 allows and ignores white space around argument names and numbers.
|
||||
IcuTestErrorCode errorCode(*this, "TestTrimArgumentName");
|
||||
MessageFormat m("a { 0 , number , '#,#'#.0 } z", Locale::getEnglish(), errorCode);
|
||||
Formattable args[1] = { 2 };
|
||||
FieldPosition ignore(0);
|
||||
UnicodeString result;
|
||||
assertEquals("trim-numbered-arg format() failed", "a #,#2.0 z",
|
||||
m.format(args, 1, result, ignore, errorCode));
|
||||
|
||||
m.applyPattern("x { _oOo_ , number , integer } y", errorCode);
|
||||
UnicodeString argName = UNICODE_STRING_SIMPLE("_oOo_");
|
||||
args[0].setLong(3);
|
||||
result.remove();
|
||||
assertEquals("trim-named-arg format() failed", "x 3 y",
|
||||
m.format(&argName, args, 1, result, errorCode));
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2010, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
#ifndef _TESTMESSAGEFORMAT
|
||||
|
@ -65,12 +65,14 @@ public:
|
|||
**/
|
||||
void testMsgFormatSelect(/* char* par */);
|
||||
|
||||
void testApostropheInPluralAndSelect();
|
||||
|
||||
/**
|
||||
* Internal method to format a MessageFormat object with passed args
|
||||
**/
|
||||
void internalFormat(MessageFormat* msgFmt ,
|
||||
Formattable* args , int32_t numOfArgs ,
|
||||
UnicodeString expected ,char* errMsg);
|
||||
UnicodeString expected, const char* errMsg);
|
||||
|
||||
/**
|
||||
* Internal method to create a MessageFormat object with passed args
|
||||
|
@ -89,7 +91,10 @@ public:
|
|||
*/
|
||||
void TestRBNF();
|
||||
|
||||
//
|
||||
void TestApostropheMode();
|
||||
|
||||
void TestCompatibleApostrophe();
|
||||
|
||||
/**
|
||||
* ------------ API tests ----------
|
||||
* These routines test various API functionality.
|
||||
|
@ -108,11 +113,13 @@ public:
|
|||
void testAdopt(void);
|
||||
void TestTurkishCasing(void);
|
||||
void testAutoQuoteApostrophe(void);
|
||||
void TestTrimArgumentName();
|
||||
|
||||
/* Provide better code coverage */
|
||||
void testCoverage(void);
|
||||
|
||||
private:
|
||||
UnicodeString GetPatternAndSkipSyntax(const MessagePattern& pattern);
|
||||
};
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2010, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
|
@ -10,6 +10,7 @@
|
|||
#include "unicode/putil.h"
|
||||
#include "cstring.h"
|
||||
#include "hash.h"
|
||||
#include "patternprops.h"
|
||||
#include "normalizer2impl.h"
|
||||
#include "uparse.h"
|
||||
#include "ucdtest.h"
|
||||
|
@ -50,13 +51,15 @@ UnicodeTest::~UnicodeTest()
|
|||
|
||||
void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
|
||||
{
|
||||
if (exec) logln("TestSuite UnicodeTest: ");
|
||||
switch (index) {
|
||||
case 0: name = "TestAdditionalProperties"; if(exec) TestAdditionalProperties(); break;
|
||||
case 1: name = "TestBinaryValues"; if(exec) TestBinaryValues(); break;
|
||||
case 2: name = "TestConsistency"; if(exec) TestConsistency(); break;
|
||||
default: name = ""; break; //needed to end loop
|
||||
if(exec) {
|
||||
logln("TestSuite UnicodeTest: ");
|
||||
}
|
||||
TESTCASE_AUTO_BEGIN;
|
||||
TESTCASE_AUTO(TestAdditionalProperties);
|
||||
TESTCASE_AUTO(TestBinaryValues);
|
||||
TESTCASE_AUTO(TestConsistency);
|
||||
TESTCASE_AUTO(TestPatternProperties);
|
||||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
//====================================================
|
||||
|
@ -339,7 +342,7 @@ void UnicodeTest::TestConsistency() {
|
|||
IcuTestErrorCode errorCode(*this, "TestConsistency");
|
||||
const Normalizer2 *nfd=Normalizer2::getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode);
|
||||
const Normalizer2Impl *nfcImpl=Normalizer2Factory::getNFCImpl(errorCode);
|
||||
if(errorCode.isFailure()) {
|
||||
if(!nfcImpl->ensureCanonIterData(errorCode) || errorCode.isFailure()) {
|
||||
dataerrln("Normalizer2::getInstance(NFD) or Normalizer2Factory::getNFCImpl() failed - %s\n",
|
||||
errorCode.errorName());
|
||||
errorCode.reset();
|
||||
|
@ -369,3 +372,57 @@ void UnicodeTest::TestConsistency() {
|
|||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Test various implementations of Pattern_Syntax & Pattern_White_Space.
|
||||
*/
|
||||
void UnicodeTest::TestPatternProperties() {
|
||||
IcuTestErrorCode errorCode(*this, "TestPatternProperties()");
|
||||
UnicodeSet syn_pp;
|
||||
UnicodeSet syn_prop(UNICODE_STRING_SIMPLE("[:Pattern_Syntax:]"), errorCode);
|
||||
UnicodeSet syn_list(UNICODE_STRING_SIMPLE(
|
||||
"[!-/\\:-@\\[-\\^`\\{-~"
|
||||
"\\u00A1-\\u00A7\\u00A9\\u00AB\\u00AC\\u00AE\\u00B0\\u00B1\\u00B6\\u00BB\\u00BF\\u00D7\\u00F7"
|
||||
"\\u2010-\\u2027\\u2030-\\u203E\\u2041-\\u2053\\u2055-\\u205E\\u2190-\\u245F\\u2500-\\u2775"
|
||||
"\\u2794-\\u2BFF\\u2E00-\\u2E7F\\u3001-\\u3003\\u3008-\\u3020\\u3030\\uFD3E\\uFD3F\\uFE45\\uFE46]"), errorCode);
|
||||
UnicodeSet ws_pp;
|
||||
UnicodeSet ws_prop(UNICODE_STRING_SIMPLE("[:Pattern_White_Space:]"), errorCode);
|
||||
UnicodeSet ws_list(UNICODE_STRING_SIMPLE("[\\u0009-\\u000D\\ \\u0085\\u200E\\u200F\\u2028\\u2029]"), errorCode);
|
||||
UnicodeSet syn_ws_pp;
|
||||
UnicodeSet syn_ws_prop(syn_prop);
|
||||
syn_ws_prop.addAll(ws_prop);
|
||||
for(UChar32 c=0; c<=0xffff; ++c) {
|
||||
if(PatternProps::isSyntax(c)) {
|
||||
syn_pp.add(c);
|
||||
}
|
||||
if(PatternProps::isWhiteSpace(c)) {
|
||||
ws_pp.add(c);
|
||||
}
|
||||
if(PatternProps::isSyntaxOrWhiteSpace(c)) {
|
||||
syn_ws_pp.add(c);
|
||||
}
|
||||
}
|
||||
compareUSets(syn_pp, syn_prop,
|
||||
"PatternProps.isSyntax()", "[:Pattern_Syntax:]", TRUE);
|
||||
compareUSets(syn_pp, syn_list,
|
||||
"PatternProps.isSyntax()", "[Pattern_Syntax ranges]", TRUE);
|
||||
compareUSets(ws_pp, ws_prop,
|
||||
"PatternProps.isWhiteSpace()", "[:Pattern_White_Space:]", TRUE);
|
||||
compareUSets(ws_pp, ws_list,
|
||||
"PatternProps.isWhiteSpace()", "[Pattern_White_Space ranges]", TRUE);
|
||||
compareUSets(syn_ws_pp, syn_ws_prop,
|
||||
"PatternProps.isSyntaxOrWhiteSpace()",
|
||||
"[[:Pattern_Syntax:][:Pattern_White_Space:]]", TRUE);
|
||||
}
|
||||
|
||||
// So far only minimal port of Java & cucdtst.c compareUSets().
|
||||
UBool
|
||||
UnicodeTest::compareUSets(const UnicodeSet &a, const UnicodeSet &b,
|
||||
const char *a_name, const char *b_name,
|
||||
UBool diffIsError) {
|
||||
UBool same= a==b;
|
||||
if(!same && diffIsError) {
|
||||
errln("Sets are different: %s vs. %s\n", a_name, b_name);
|
||||
}
|
||||
return same;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/********************************************************************
|
||||
* COPYRIGHT:
|
||||
* Copyright (c) 1997-2010, International Business Machines Corporation and
|
||||
* Copyright (c) 1997-2011, International Business Machines Corporation and
|
||||
* others. All Rights Reserved.
|
||||
********************************************************************/
|
||||
|
||||
|
@ -36,6 +36,7 @@ public:
|
|||
void TestAdditionalProperties();
|
||||
void TestBinaryValues();
|
||||
void TestConsistency();
|
||||
void TestPatternProperties();
|
||||
|
||||
private:
|
||||
|
||||
|
@ -50,5 +51,8 @@ private:
|
|||
|
||||
UnicodeSet derivedProps[30];
|
||||
U_NAMESPACE_QUALIFIER Hashtable *unknownPropertyNames;
|
||||
};
|
||||
|
||||
UBool compareUSets(const UnicodeSet &a, const UnicodeSet &b,
|
||||
const char *a_name, const char *b_name,
|
||||
UBool diffIsError);
|
||||
};
|
||||
|
|
|
@ -322,7 +322,6 @@ void UObjectTest::testIDs()
|
|||
{
|
||||
ids_count = 0;
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
static const UChar SMALL_STR[] = {0x51, 0x51, 0x51, 0}; // "QQQ"
|
||||
|
||||
#if !UCONFIG_NO_TRANSLITERATION || !UCONFIG_NO_FORMATTING
|
||||
UParseError parseError;
|
||||
|
@ -364,6 +363,8 @@ void UObjectTest::testIDs()
|
|||
TESTCLASSID_CTOR(DecimalFormatSymbols, (status));
|
||||
TESTCLASSID_DEFAULT(FieldPosition);
|
||||
TESTCLASSID_DEFAULT(Formattable);
|
||||
|
||||
static const UChar SMALL_STR[] = {0x51, 0x51, 0x51, 0}; // "QQQ"
|
||||
TESTCLASSID_CTOR(CurrencyAmount, (1.0, SMALL_STR, status));
|
||||
TESTCLASSID_CTOR(CurrencyUnit, (SMALL_STR, status));
|
||||
TESTCLASSID_NONE_FACTORY(LocaleDisplayNames, LocaleDisplayNames::createInstance("de"));
|
||||
|
@ -570,6 +571,7 @@ void UObjectTest::TestMFCCompatibility() {
|
|||
}
|
||||
|
||||
void UObjectTest::TestCompilerRTTI() {
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
NumberFormat *nf = NumberFormat::createInstance("de", errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
|
@ -587,6 +589,7 @@ void UObjectTest::TestCompilerRTTI() {
|
|||
errln("typeid(NumberFormat) failed");
|
||||
}
|
||||
delete nf;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* --------------- */
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 2004-2010, International Business Machines
|
||||
* Copyright (C) 2004-2011, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
* file name: filetst.c
|
||||
|
@ -910,8 +910,8 @@ static void TestCodepage(void) {
|
|||
}
|
||||
|
||||
static void TestCodepageFlush(void) {
|
||||
#if UCONFIG_NO_LEGACY_CONVERSION
|
||||
log_verbose("Skipping, legacy conversion is disabled.");
|
||||
#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_FORMATTING
|
||||
log_verbose("Skipping, legacy conversion or formatting is disabled.");
|
||||
#else
|
||||
UChar utf16String[] = { 0x39, 0x39, 0x39, 0x20, 0x65E0, 0x6CD6, 0x5728, 0x0000 };
|
||||
uint8_t inBuf[200];
|
||||
|
|
Loading…
Add table
Reference in a new issue