ICU-8319 merge MessageFormat 2011q1 work into trunk, from icu/branches/markus/msg48 -r 29400:29882

X-SVN-Rev: 29886
This commit is contained in:
Markus Scherer 2011-04-25 20:47:32 +00:00
parent fb5332c296
commit d743bb693e
38 changed files with 5467 additions and 3229 deletions

View file

@ -3,8 +3,7 @@
<html lang="en-US" xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US">
<head>
<title>ReadMe for ICU 4.7.1 (4.8M1)</title>
<title>ReadMe for ICU 4.8</title>
<meta name="COPYRIGHT" content=
"Copyright (c) 1997-2011 IBM Corporation and others. All Rights Reserved." />
<meta name="KEYWORDS" content=
@ -214,6 +213,11 @@
this release, see the <a href="http://site.icu-project.org/download">ICU
download page</a>.</p>
<h3>MessageFormat Changes</h3>
<p>MessageFormat and related classes (choice/plural/select) have been reimplemented,
with several improvements and some incompatible changes.
See the <a href="http://site.icu-project.org/download/48">ICU 4.8 download</a> page for details.</p>
<h2><a name="Download" href="#Download" id="Download">How To Download the
Source Code</a></h2>

View file

@ -84,7 +84,7 @@ ucnv.o ucnv_bld.o ucnv_cnv.o ucnv_io.o ucnv_cb.o ucnv_err.o ucnvlat1.o \
ucnv_u7.o ucnv_u8.o ucnv_u16.o ucnv_u32.o ucnvscsu.o ucnvbocu.o \
ucnv_ext.o ucnvmbcs.o ucnv2022.o ucnvhz.o ucnv_lmb.o ucnvisci.o ucnvdisp.o ucnv_set.o ucnv_ct.o \
uresbund.o ures_cnv.o uresdata.o resbund.o resbund_cnv.o \
ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o loclikely.o locresdata.o \
messagepattern.o ucat.o locmap.o uloc.o locid.o locutil.o locavailable.o locdispnames.o loclikely.o locresdata.o \
bytestream.o stringpiece.o \
stringtriebuilder.o bytestriebuilder.o \
bytestrie.o bytestrieiterator.o \
@ -93,7 +93,7 @@ appendable.o ustr_cnv.o unistr_cnv.o unistr.o unistr_case.o unistr_props.o \
utf_impl.o ustring.o ustrcase.o ucasemap.o cstring.o ustrfmt.o ustrtrns.o ustr_wcs.o utext.o \
normalizer2impl.o normalizer2.o filterednormalizer2.o normlzr.o unorm.o unormcmp.o unorm_it.o \
chariter.o schriter.o uchriter.o uiter.o \
uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
patternprops.o uchar.o uprops.o ucase.o propname.o ubidi_props.o ubidi.o ubidiwrt.o ubidiln.o ushape.o \
uscript.o usc_impl.o unames.o \
utrie.o utrie2.o utrie2_builder.o bmpset.o unisetspan.o uset_props.o uniset_props.o uset.o uniset.o usetiter.o ruleiter.o caniter.o unifilt.o unifunct.o \
uarrsort.o brkiter.o ubrk.o brkeng.o dictbe.o triedict.o \

View file

@ -374,6 +374,7 @@
<ClCompile Include="unorm_it.c" />
<ClCompile Include="unormcmp.cpp" />
<ClCompile Include="bmpset.cpp" />
<ClCompile Include="patternprops.cpp" />
<ClCompile Include="propname.cpp" />
<ClCompile Include="ruleiter.cpp" />
<ClCompile Include="ucase.c" />
@ -408,6 +409,7 @@
<ClCompile Include="charstr.cpp" />
<ClCompile Include="cstring.c" />
<ClCompile Include="cwchar.c" />
<ClCompile Include="messagepattern.cpp" />
<ClCompile Include="schriter.cpp" />
<ClCompile Include="stringpiece.cpp" />
<ClCompile Include="stringtriebuilder.cpp" />
@ -1193,6 +1195,8 @@
<ClInclude Include="unorm_it.h" />
<ClInclude Include="unormimp.h" />
<ClInclude Include="bmpset.h" />
<ClInclude Include="messageimpl.h" />
<ClInclude Include="patternprops.h" />
<ClInclude Include="propname.h" />
<ClInclude Include="ruleiter.h" />
<CustomBuild Include="unicode\symtable.h">
@ -1432,6 +1436,20 @@
<ClInclude Include="charstr.h" />
<ClInclude Include="cstring.h" />
<ClInclude Include="cwchar.h" />
<CustomBuild Include="unicode\messagepattern.h">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy "%(FullPath)" ..\..\include\unicode
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\include\unicode\%(Filename)%(Extension);%(Outputs)</Outputs>
</CustomBuild>
<CustomBuild Include="unicode\rep.h">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">copy "%(FullPath)" ..\..\include\unicode
</Command>

View file

@ -0,0 +1,63 @@
/*
*******************************************************************************
* Copyright (C) 2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: messageimpl.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2011apr04
* created by: Markus W. Scherer
*/
#ifndef __MESSAGEIMPL_H__
#define __MESSAGEIMPL_H__
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/messagepattern.h"
U_NAMESPACE_BEGIN
/**
* Helper functions for use of MessagePattern.
* In Java, these are package-private methods in MessagePattern itself.
* In C++, they are declared here and implemented in messagepattern.cpp.
*/
class U_COMMON_API MessageImpl {
public:
/**
* @return TRUE if getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED
*/
static UBool jdkAposMode(const MessagePattern &msgPattern) {
return msgPattern.getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED;
}
/**
* Appends the s[start, limit[ substring to sb, but with only half of the apostrophes
* according to JDK pattern behavior.
*/
static void appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit,
UnicodeString &sb);
/**
* Appends the sub-message to the result string.
* Omits SKIP_SYNTAX and appends whole arguments using appendReducedApostrophes().
*/
static UnicodeString &appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern,
int32_t msgStart,
UnicodeString &result);
private:
MessageImpl(); // no constructor: all static methods
};
U_NAMESPACE_END
#endif // !UCONFIG_NO_FORMATTING
#endif // __MESSAGEIMPL_H__

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,218 @@
/*
*******************************************************************************
* Copyright (C) 2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: patternprops.cpp
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2011mar13
* created by: Markus W. Scherer
*/
#include "unicode/utypes.h"
#include "patternprops.h"
U_NAMESPACE_BEGIN
/*
* One byte per Latin-1 character.
* Bit 0 is set if either Pattern property is true,
* bit 1 if Pattern_Syntax is true,
* bit 2 if Pattern_White_Space is true.
* That is, Pattern_Syntax is encoded as 3 and Pattern_White_Space as 5.
*/
static const uint8_t latin1[256]={
// WS: 9..D
0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// WS: 20 Syntax: 21..2F
5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
// Syntax: 3A..40
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3,
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// Syntax: 5B..5E
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
// Syntax: 60
3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// Syntax: 7B..7E
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0,
// WS: 85
0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// Syntax: A1..A7, A9, AB, AC, AE
0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 0,
// Syntax: B0, B1, B6, BB, BF
3, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 3,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// Syntax: D7
0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
// Syntax: F7
0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0
};
/*
* One byte per 32 characters from U+2000..U+303F indexing into
* a small table of 32-bit data words.
* The first two data words are all-zeros and all-ones.
*/
static const uint8_t index2000[130]={
2, 3, 4, 0, 0, 0, 0, 0, // 20xx
0, 0, 0, 0, 5, 1, 1, 1, // 21xx
1, 1, 1, 1, 1, 1, 1, 1, // 22xx
1, 1, 1, 1, 1, 1, 1, 1, // 23xx
1, 1, 1, 0, 0, 0, 0, 0, // 24xx
1, 1, 1, 1, 1, 1, 1, 1, // 25xx
1, 1, 1, 1, 1, 1, 1, 1, // 26xx
1, 1, 1, 6, 7, 1, 1, 1, // 27xx
1, 1, 1, 1, 1, 1, 1, 1, // 28xx
1, 1, 1, 1, 1, 1, 1, 1, // 29xx
1, 1, 1, 1, 1, 1, 1, 1, // 2Axx
1, 1, 1, 1, 1, 1, 1, 1, // 2Bxx
0, 0, 0, 0, 0, 0, 0, 0, // 2Cxx
0, 0, 0, 0, 0, 0, 0, 0, // 2Dxx
1, 1, 1, 1, 0, 0, 0, 0, // 2Exx
0, 0, 0, 0, 0, 0, 0, 0, // 2Fxx
8, 9 // 3000..303F
};
/*
* One 32-bit integer per 32 characters. Ranges of all-false and all-true
* are mapped to the first two values, other ranges map to appropriate bit patterns.
*/
static const uint32_t syntax2000[]={
0,
0xffffffff,
0xffff0000, // 2: 2010..201F
0x7fff00ff, // 3: 2020..2027, 2030..203E
0x7feffffe, // 4: 2041..2053, 2055..205E
0xffff0000, // 5: 2190..219F
0x003fffff, // 6: 2760..2775
0xfff00000, // 7: 2794..279F
0xffffff0e, // 8: 3001..3003, 3008..301F
0x00010001 // 9: 3020, 3030
};
/*
* Same as syntax2000, but with additional bits set for the
* Pattern_White_Space characters 200E 200F 2028 2029.
*/
static const uint32_t syntaxOrWhiteSpace2000[]={
0,
0xffffffff,
0xffffc000, // 2: 200E..201F
0x7fff03ff, // 3: 2020..2029, 2030..203E
0x7feffffe, // 4: 2041..2053, 2055..205E
0xffff0000, // 5: 2190..219F
0x003fffff, // 6: 2760..2775
0xfff00000, // 7: 2794..279F
0xffffff0e, // 8: 3001..3003, 3008..301F
0x00010001 // 9: 3020, 3030
};
UBool
PatternProps::isSyntax(UChar32 c) {
if(c<0) {
return FALSE;
} else if(c<=0xff) {
return (UBool)(latin1[c]>>1)&1;
} else if(c<0x2010) {
return FALSE;
} else if(c<=0x3030) {
uint32_t bits=syntax2000[index2000[(c-0x2000)>>5]];
return (UBool)((bits>>(c&0x1f))&1);
} else if(0xfd3e<=c && c<=0xfe46) {
return c<=0xfd3f || 0xfe45<=c;
} else {
return FALSE;
}
}
UBool
PatternProps::isSyntaxOrWhiteSpace(UChar32 c) {
if(c<0) {
return FALSE;
} else if(c<=0xff) {
return (UBool)(latin1[c]&1);
} else if(c<0x200e) {
return FALSE;
} else if(c<=0x3030) {
uint32_t bits=syntaxOrWhiteSpace2000[index2000[(c-0x2000)>>5]];
return (UBool)((bits>>(c&0x1f))&1);
} else if(0xfd3e<=c && c<=0xfe46) {
return c<=0xfd3f || 0xfe45<=c;
} else {
return FALSE;
}
}
UBool
PatternProps::isWhiteSpace(UChar32 c) {
if(c<0) {
return FALSE;
} else if(c<=0xff) {
return (UBool)(latin1[c]>>2)&1;
} else if(0x200e<=c && c<=0x2029) {
return c<=0x200f || 0x2028<=c;
} else {
return FALSE;
}
}
const UChar *
PatternProps::skipWhiteSpace(const UChar *s, int32_t length) {
while(length>0 && isWhiteSpace(*s)) {
++s;
--length;
}
return s;
}
const UChar *
PatternProps::trimWhiteSpace(const UChar *s, int32_t &length) {
if(length<=0 || (!isWhiteSpace(s[0]) && !isWhiteSpace(s[length-1]))) {
return s;
}
int32_t start=0;
int32_t limit=length;
while(start<limit && isWhiteSpace(s[start])) {
++start;
}
if(start<limit) {
// There is non-white space at start; we will not move limit below that,
// so we need not test start<limit in the loop.
while(isWhiteSpace(s[limit-1])) {
--limit;
}
}
length=limit-start;
return s+start;
}
UBool
PatternProps::isIdentifier(const UChar *s, int32_t length) {
if(length<=0) {
return FALSE;
}
const UChar *limit=s+length;
do {
if(isSyntaxOrWhiteSpace(*s++)) {
return FALSE;
}
} while(s<limit);
return TRUE;
}
const UChar *
PatternProps::skipIdentifier(const UChar *s, int32_t length) {
while(length>0 && !isSyntaxOrWhiteSpace(*s)) {
++s;
--length;
}
return s;
}
U_NAMESPACE_END

View file

@ -0,0 +1,89 @@
/*
*******************************************************************************
* Copyright (C) 2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: patternprops.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2011mar13
* created by: Markus W. Scherer
*/
#ifndef __PATTERNPROPS_H__
#define __PATTERNPROPS_H__
#include "unicode/utypes.h"
U_NAMESPACE_BEGIN
/**
* Implements the immutable Unicode properties Pattern_Syntax and Pattern_White_Space.
* Hardcodes these properties, does not load data, does not depend on other ICU classes.
* <p>
* Note: Both properties include ASCII as well as non-ASCII, non-Latin-1 code points,
* and both properties only include BMP code points (no supplementary ones).
* Pattern_Syntax includes some unassigned code points.
* <p>
* [:Pattern_White_Space:] =
* [\u0009-\u000D\ \u0085\u200E\u200F\u2028\u2029]
* <p>
* [:Pattern_Syntax:] =
* [!-/\:-@\[-\^`\{-~\u00A1-\u00A7\u00A9\u00AB\u00AC\u00AE
* \u00B0\u00B1\u00B6\u00BB\u00BF\u00D7\u00F7
* \u2010-\u2027\u2030-\u203E\u2041-\u2053\u2055-\u205E
* \u2190-\u245F\u2500-\u2775\u2794-\u2BFF\u2E00-\u2E7F
* \u3001-\u3003\u3008-\u3020\u3030\uFD3E\uFD3F\uFE45\uFE46]
* @author mscherer
*/
class U_COMMON_API PatternProps {
public:
/**
* @return TRUE if c is a Pattern_Syntax code point.
*/
static UBool isSyntax(UChar32 c);
/**
* @return TRUE if c is a Pattern_Syntax or Pattern_White_Space code point.
*/
static UBool isSyntaxOrWhiteSpace(UChar32 c);
/**
* @return TRUE if c is a Pattern_White_Space character.
*/
static UBool isWhiteSpace(UChar32 c);
/**
* Skips over Pattern_White_Space starting at s.
* @return The smallest pointer at or after s with a non-white space character.
*/
static const UChar *skipWhiteSpace(const UChar *s, int32_t length);
/**
* @return s except with leading and trailing Pattern_White_Space removed and length adjusted.
*/
static const UChar *trimWhiteSpace(const UChar *s, int32_t &length);
/**
* Tests whether the string contains a "pattern identifier", that is,
* whether it contains only non-Pattern_White_Space, non-Pattern_Syntax characters.
* @return TRUE if there are no Pattern_White_Space or Pattern_Syntax characters in s.
*/
static UBool isIdentifier(const UChar *s, int32_t length);
/**
* Skips over a "pattern identifier" starting at index s.
* @return The smallest pointer at or after s with
* a Pattern_White_Space or Pattern_Syntax character.
*/
static const UChar *skipIdentifier(const UChar *s, int32_t length);
private:
PatternProps(); // no constructor: all static methods
};
U_NAMESPACE_END
#endif // __PATTERNPROPS_H__

View file

@ -0,0 +1,918 @@
/*
*******************************************************************************
* Copyright (C) 2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: messagepattern.h
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2011mar14
* created by: Markus W. Scherer
*/
#ifndef __MESSAGEPATTERN_H__
#define __MESSAGEPATTERN_H__
/**
* \file
* \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
*/
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#include "unicode/parseerr.h"
#include "unicode/unistr.h"
/**
* Mode for when an apostrophe starts quoted literal text for MessageFormat output.
* The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
* (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
* <p>
* A pair of adjacent apostrophes always results in a single apostrophe in the output,
* even when the pair is between two single, text-quoting apostrophes.
* <p>
* The following table shows examples of desired MessageFormat.format() output
* with the pattern strings that yield that output.
* <p>
* <table>
* <tr>
* <th>Desired output</th>
* <th>DOUBLE_OPTIONAL</th>
* <th>DOUBLE_REQUIRED</th>
* </tr>
* <tr>
* <td>I see {many}</td>
* <td>I see '{many}'</td>
* <td>(same)</td>
* </tr>
* <tr>
* <td>I said {'Wow!'}</td>
* <td>I said '{''Wow!''}'</td>
* <td>(same)</td>
* </tr>
* <tr>
* <td>I don't know</td>
* <td>I don't know OR<br> I don''t know</td>
* <td>I don''t know</td>
* </tr>
* </table>
* @draft ICU 4.8
* @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
*/
enum UMessagePatternApostropheMode {
/**
* A literal apostrophe is represented by
* either a single or a double apostrophe pattern character.
* Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
* if it immediately precedes a curly brace {},
* or a pipe symbol | if inside a choice format,
* or a pound symbol # if inside a plural format.
* <p>
* This is the default behavior starting with ICU 4.8.
* @draft ICU 4.8
*/
UMSGPAT_APOS_DOUBLE_OPTIONAL,
/**
* A literal apostrophe must be represented by
* a double apostrophe pattern character.
* A single apostrophe always starts quoted literal text.
* <p>
* This is the behavior of ICU 4.6 and earlier, and of the JDK.
* @draft ICU 4.8
*/
UMSGPAT_APOS_DOUBLE_REQUIRED
};
typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
/**
* MessagePattern::Part type constants.
* @draft ICU 4.8
*/
enum UMessagePatternPartType {
/**
* Start of a message pattern (main or nested).
* The length is 0 for the top-level message
* and for a choice argument sub-message, otherwise 1 for the '{'.
* The value indicates the nesting level, starting with 0 for the main message.
* <p>
* There is always a later MSG_LIMIT part.
* @draft ICU 4.8
*/
UMSGPAT_PART_TYPE_MSG_START,
/**
* End of a message pattern (main or nested).
* The length is 0 for the top-level message and
* the last sub-message of a choice argument,
* otherwise 1 for the '}' or (in a choice argument style) the '|'.
* The value indicates the nesting level, starting with 0 for the main message.
* @draft ICU 4.8
*/
UMSGPAT_PART_TYPE_MSG_LIMIT,
/**
* Indicates a substring of the pattern string which is to be skipped when formatting.
* For example, an apostrophe that begins or ends quoted text
* would be indicated with such a part.
* The value is undefined and currently always 0.
* @draft ICU 4.8
*/
UMSGPAT_PART_TYPE_SKIP_SYNTAX,
/**
* Indicates that a syntax character needs to be inserted for auto-quoting.
* The length is 0.
* The value is the character code of the insertion character. (U+0027=APOSTROPHE)
* @draft ICU 4.8
*/
UMSGPAT_PART_TYPE_INSERT_CHAR,
/**
* Indicates a syntactic (non-escaped) # symbol in a plural variant.
* When formatting, replace this part's substring with the
* (value-offset) for the plural argument value.
* The value is undefined and currently always 0.
* @draft ICU 4.8
*/
UMSGPAT_PART_TYPE_REPLACE_NUMBER,
/**
* Start of an argument.
* The length is 1 for the '{'.
* The value is the ordinal value of the ArgType. Use getArgType().
* @draft ICU 4.8
*/
UMSGPAT_PART_TYPE_ARG_START,
/**
* End of an argument.
* The length is 1 for the '}'.
* The value is the ordinal value of the ArgType. Use getArgType().
* <p>
* This part is followed by either an ARG_NUMBER or ARG_NAME,
* followed by optional argument sub-parts (see UMessagePatternArgType constants)
* and finally an ARG_LIMIT part.
* @draft ICU 4.8
*/
UMSGPAT_PART_TYPE_ARG_LIMIT,
/**
* The argument number, provided by the value.
* @draft ICU 4.8
*/
UMSGPAT_PART_TYPE_ARG_NUMBER,
/**
* The argument name.
* The value is undefined and currently always 0.
* @draft ICU 4.8
*/
UMSGPAT_PART_TYPE_ARG_NAME,
/**
* The argument type.
* The value is undefined and currently always 0.
* @draft ICU 4.8
*/
UMSGPAT_PART_TYPE_ARG_TYPE,
/**
* The argument style text.
* The value is undefined and currently always 0.
* @draft ICU 4.8
*/
UMSGPAT_PART_TYPE_ARG_STYLE,
/**
* A selector substring in a "complex" argument style.
* The value is undefined and currently always 0.
* @draft ICU 4.8
*/
UMSGPAT_PART_TYPE_ARG_SELECTOR,
/**
* An integer value, for example the offset or an explicit selector value
* in a PluralFormat style.
* The part value is the integer value.
* @draft ICU 4.8
*/
UMSGPAT_PART_TYPE_ARG_INT,
/**
* A numeric value, for example the offset or an explicit selector value
* in a PluralFormat style.
* The part value is an index into an internal array of numeric values;
* use getNumericValue().
* @draft ICU 4.8
*/
UMSGPAT_PART_TYPE_ARG_DOUBLE
};
typedef enum UMessagePatternPartType UMessagePatternPartType;
/**
* Argument type constants.
* Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
*
* Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
* with a nesting level one greater than the surrounding message.
* @draft ICU 4.8
*/
enum UMessagePatternArgType {
/**
* The argument has no specified type.
* @draft ICU 4.8
*/
UMSGPAT_ARG_TYPE_NONE,
/**
* The argument has a "simple" type which is provided by the ARG_TYPE part.
* An ARG_STYLE part might follow that.
* @draft ICU 4.8
*/
UMSGPAT_ARG_TYPE_SIMPLE,
/**
* The argument is a ChoiceFormat with one or more
* ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
* @draft ICU 4.8
*/
UMSGPAT_ARG_TYPE_CHOICE,
/**
* The argument is a PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
* (e.g., offset:1)
* and one or more (ARG_SELECTOR [explicit-value] message) tuples.
* If the selector has an explicit value (e.g., =2), then
* that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
* Otherwise the message immediately follows the ARG_SELECTOR.
* @draft ICU 4.8
*/
UMSGPAT_ARG_TYPE_PLURAL,
/**
* The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
* @draft ICU 4.8
*/
UMSGPAT_ARG_TYPE_SELECT
};
typedef enum UMessagePatternArgType UMessagePatternArgType;
enum {
/**
* Return value from MessagePattern.validateArgumentName() for when
* the string is a valid "pattern identifier" but not a number.
* @draft ICU 4.8
*/
UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
/**
* Return value from MessagePattern.validateArgumentName() for when
* the string is invalid.
* It might not be a valid "pattern identifier",
* or it have only ASCII digits but there is a leading zero or the number is too large.
* @draft ICU 4.8
*/
UMSGPAT_ARG_NAME_NOT_VALID=-2
};
/**
* Special value that is returned by getNumericValue(Part) when no
* numeric value is defined for a part.
* @see MessagePattern.getNumericValue()
* @draft ICU 4.8
*/
#define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
U_NAMESPACE_BEGIN
class MessagePatternDoubleList;
class MessagePatternPartsList;
/**
* Parses and represents ICU MessageFormat patterns.
* Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
* Used in the implementations of those classes as well as in tools
* for message validation, translation and format conversion.
* <p>
* The parser handles all syntax relevant for identifying message arguments.
* This includes "complex" arguments whose style strings contain
* nested MessageFormat pattern substrings.
* For "simple" arguments (with no nested MessageFormat pattern substrings),
* the argument style is not parsed any further.
* <p>
* The parser handles named and numbered message arguments and allows both in one message.
* <p>
* Once a pattern has been parsed successfully, iterate through the parsed data
* with countParts(), getPart() and related methods.
* <p>
* The data logically represents a parse tree, but is stored and accessed
* as a list of "parts" for fast and simple parsing and to minimize object allocations.
* Arguments and nested messages are best handled via recursion.
* For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
* the index of the corresponding _LIMIT "part".
* <p>
* List of "parts":
* <pre>
* message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
* argument = noneArg | simpleArg | complexArg
* complexArg = choiceArg | pluralArg | selectArg
*
* noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
* simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
* choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
* pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
* selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
*
* choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
* pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
* selectStyle = (ARG_SELECTOR message)+
* </pre>
* <ul>
* <li>Literal output text is not represented directly by "parts" but accessed
* between parts of a message, from one part's getLimit() to the next part's getIndex().
* <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
* <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
* the less-than-or-equal-to sign (U+2264).
* <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
* The optional numeric Part between each (ARG_SELECTOR, message) pair
* is the value of an explicit-number selector like "=2",
* otherwise the selector is a non-numeric identifier.
* <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
* <p>
* This class is not intended for public subclassing.
*
* @draft ICU 4.8
*/
class U_COMMON_API MessagePattern : public UObject {
public:
/**
* Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @draft ICU 4.8
*/
MessagePattern(UErrorCode &errorCode);
/**
* Constructs an empty MessagePattern.
* @param mode Explicit UMessagePatternApostropheMode.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @draft ICU 4.8
*/
MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
/**
* Constructs a MessagePattern with default UMessagePatternApostropheMode and
* parses the MessageFormat pattern string.
* @param pattern a MessageFormat pattern string
* @param parseError Struct to receive information on the position
* of an error within the pattern.
* Can be NULL.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* TODO: turn @throws into UErrorCode specifics?
* @throws IllegalArgumentException for syntax errors in the pattern string
* @throws IndexOutOfBoundsException if certain limits are exceeded
* (e.g., argument number too high, argument name too long, etc.)
* @throws NumberFormatException if a number could not be parsed
* @draft ICU 4.8
*/
MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
/**
* Copy constructor.
* @param other Object to copy.
* @draft ICU 4.8
*/
MessagePattern(const MessagePattern &other);
/**
* Assignment operator.
* @param other Object to copy.
* @return *this=other
* @draft ICU 4.8
*/
MessagePattern &operator=(const MessagePattern &other);
/**
* Destructor.
* @draft ICU 4.8
*/
virtual ~MessagePattern();
/**
* Parses a MessageFormat pattern string.
* @param pattern a MessageFormat pattern string
* @param parseError Struct to receive information on the position
* of an error within the pattern.
* Can be NULL.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return *this
* @throws IllegalArgumentException for syntax errors in the pattern string
* @throws IndexOutOfBoundsException if certain limits are exceeded
* (e.g., argument number too high, argument name too long, etc.)
* @throws NumberFormatException if a number could not be parsed
* @draft ICU 4.8
*/
MessagePattern &parse(const UnicodeString &pattern,
UParseError *parseError, UErrorCode &errorCode);
/**
* Parses a ChoiceFormat pattern string.
* @param pattern a ChoiceFormat pattern string
* @param parseError Struct to receive information on the position
* of an error within the pattern.
* Can be NULL.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return *this
* @throws IllegalArgumentException for syntax errors in the pattern string
* @throws IndexOutOfBoundsException if certain limits are exceeded
* (e.g., argument number too high, argument name too long, etc.)
* @throws NumberFormatException if a number could not be parsed
* @draft ICU 4.8
*/
MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
UParseError *parseError, UErrorCode &errorCode);
/**
* Parses a PluralFormat pattern string.
* @param pattern a PluralFormat pattern string
* @param parseError Struct to receive information on the position
* of an error within the pattern.
* Can be NULL.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return *this
* @throws IllegalArgumentException for syntax errors in the pattern string
* @throws IndexOutOfBoundsException if certain limits are exceeded
* (e.g., argument number too high, argument name too long, etc.)
* @throws NumberFormatException if a number could not be parsed
* @draft ICU 4.8
*/
MessagePattern &parsePluralStyle(const UnicodeString &pattern,
UParseError *parseError, UErrorCode &errorCode);
/**
* Parses a SelectFormat pattern string.
* @param pattern a SelectFormat pattern string
* @param parseError Struct to receive information on the position
* of an error within the pattern.
* Can be NULL.
* @param errorCode Standard ICU error code. Its input value must
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return *this
* @throws IllegalArgumentException for syntax errors in the pattern string
* @throws IndexOutOfBoundsException if certain limits are exceeded
* (e.g., argument number too high, argument name too long, etc.)
* @throws NumberFormatException if a number could not be parsed
* @draft ICU 4.8
*/
MessagePattern &parseSelectStyle(const UnicodeString &pattern,
UParseError *parseError, UErrorCode &errorCode);
/**
* Clears this MessagePattern.
* countParts() will return 0.
* @draft ICU 4.8
*/
void clear();
/**
* Clears this MessagePattern and sets the UMessagePatternApostropheMode.
* countParts() will return 0.
* @param mode The new UMessagePatternApostropheMode.
* @draft ICU 4.8
* @provisional This API might change or be removed in a future release.
*/
void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
clear();
aposMode=mode;
}
/**
* @param other another object to compare with.
* @return TRUE if this object is equivalent to the other one.
* @draft ICU 4.8
*/
UBool operator==(const MessagePattern &other) const;
/**
* @param other another object to compare with.
* @return FALSE if this object is equivalent to the other one.
* @draft ICU 4.8
*/
inline UBool operator!=(const MessagePattern &other) const {
return !operator==(other);
}
/**
* @return A hash code for this object.
* @draft ICU 4.8
*/
int32_t hashCode() const;
/**
* @return this instance's UMessagePatternApostropheMode.
* @draft ICU 4.8
*/
UMessagePatternApostropheMode getApostropheMode() const {
return aposMode;
}
// Java has package-private jdkAposMode() here.
// In C++, this is declared in the MessageImpl class.
/**
* @return the parsed pattern string (null if none was parsed).
* @draft ICU 4.8
*/
const UnicodeString &getPatternString() const {
return msg;
}
/**
* Does the parsed pattern have named arguments like {first_name}?
* @return TRUE if the parsed pattern has at least one named argument.
* @draft ICU 4.8
*/
UBool hasNamedArguments() const {
return hasArgNames;
}
/**
* Does the parsed pattern have numbered arguments like {2}?
* @return TRUE if the parsed pattern has at least one numbered argument.
* @draft ICU 4.8
*/
UBool hasNumberedArguments() const {
return hasArgNumbers;
}
/**
* Validates and parses an argument name or argument number string.
* An argument name must be a "pattern identifier", that is, it must contain
* no Unicode Pattern_Syntax or Pattern_White_Space characters.
* If it only contains ASCII digits, then it must be a small integer with no leading zero.
* @param name Input string.
* @return &gt;=0 if the name is a valid number,
* ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
* ARG_NAME_NOT_VALID (-2) if it is neither.
* @draft ICU 4.8
*/
static int32_t validateArgumentName(const UnicodeString &name);
/**
* Returns a version of the parsed pattern string where each ASCII apostrophe
* is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
* <p>
* For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
* into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
* @return the deep-auto-quoted version of the parsed pattern string.
* @see MessageFormat.autoQuoteApostrophe()
* @draft ICU 4.8
*/
UnicodeString autoQuoteApostropheDeep() const;
class Part;
/**
* Returns the number of "parts" created by parsing the pattern string.
* Returns 0 if no pattern has been parsed or clear() was called.
* @return the number of pattern parts.
* @draft ICU 4.8
*/
int32_t countParts() const {
return partsLength;
}
/**
* Gets the i-th pattern "part".
* @param i The index of the Part data. (0..countParts()-1)
* @return the i-th pattern "part".
* @draft ICU 4.8
*/
const Part &getPart(int32_t i) const {
return parts[i];
}
/**
* Returns the UMessagePatternPartType of the i-th pattern "part".
* Convenience method for getPart(i).getType().
* @param i The index of the Part data. (0..countParts()-1)
* @return The UMessagePatternPartType of the i-th Part.
* @draft ICU 4.8
*/
UMessagePatternPartType getPartType(int32_t i) const {
return getPart(i).type;
}
/**
* Returns the pattern index of the specified pattern "part".
* Convenience method for getPart(partIndex).getIndex().
* @param partIndex The index of the Part data. (0..countParts()-1)
* @return The pattern index of this Part.
* @draft ICU 4.8
*/
int32_t getPatternIndex(int32_t partIndex) const {
return getPart(partIndex).index;
}
/**
* Returns the substring of the pattern string indicated by the Part.
* Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
* @param part a part of this MessagePattern.
* @return the substring associated with part.
* @draft ICU 4.8
*/
UnicodeString getSubstring(const Part &part) const {
return msg.tempSubString(part.index, part.length);
}
/**
* Compares the part's substring with the input string s.
* @param part a part of this MessagePattern.
* @param s a string.
* @return TRUE if getSubstring(part).equals(s).
* @draft ICU 4.8
*/
UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
return 0==msg.compare(part.index, part.length, s);
}
/**
* Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
* @param part a part of this MessagePattern.
* @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
* @draft ICU 4.8
*/
double getNumericValue(const Part &part) const;
/**
* Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
* @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
* @return the "offset:" value.
* @draft ICU 4.8
*/
double getPluralOffset(int32_t pluralStart) const;
/**
* Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
* @param start The index of some Part data (0..countParts()-1);
* this Part should be of Type ARG_START or MSG_START.
* @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
* or start itself if getPartType(msgStart)!=ARG|MSG_START.
* @draft ICU 4.8
*/
int32_t getLimitPartIndex(int32_t start) const {
int32_t limit=getPart(start).limitPartIndex;
if(limit<start) {
return start;
}
return limit;
}
/**
* A message pattern "part", representing a pattern parsing event.
* There is a part for the start and end of a message or argument,
* for quoting and escaping of and with ASCII apostrophes,
* and for syntax elements of "complex" arguments.
* @draft ICU 4.8
*/
class Part : public UMemory {
public:
/**
* Default constructor, do not use.
* @internal
*/
Part() {}
/**
* Returns the type of this part.
* @return the part type.
* @draft ICU 4.8
*/
UMessagePatternPartType getType() const {
return type;
}
/**
* Returns the pattern string index associated with this Part.
* @return this part's pattern string index.
* @draft ICU 4.8
*/
int32_t getIndex() const {
return index;
}
/**
* Returns the length of the pattern substring associated with this Part.
* This is 0 for some parts.
* @return this part's pattern string index.
* @draft ICU 4.8
*/
int32_t getLength() const {
return length;
}
/**
* Returns the pattern string limit (exclusive-end) index associated with this Part.
* Convenience method for getIndex()+getLength().
* @return this part's pattern string limit index, same as getIndex()+getLength().
* @draft ICU 4.8
*/
int32_t getLimit() const {
return index+length;
}
/**
* Returns a value associated with this part.
* See the documentation of each part type for details.
* @return the part value.
* @draft ICU 4.8
*/
int32_t getValue() const {
return value;
}
/**
* Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
* otherwise UMSGPAT_ARG_TYPE_NONE.
* @return the argument type for this part.
* @draft ICU 4.8
*/
UMessagePatternArgType getArgType() const {
UMessagePatternPartType type=getType();
if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
return (UMessagePatternArgType)value;
} else {
return UMSGPAT_ARG_TYPE_NONE;
}
}
/**
* Indicates whether the Part type has a numeric value.
* If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
* @param type The Part type to be tested.
* @return TRUE if the Part type has a numeric value.
* @draft ICU 4.8
*/
static UBool hasNumericValue(UMessagePatternPartType type) {
return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
}
/**
* @param other another object to compare with.
* @return TRUE if this object is equivalent to the other one.
* @draft ICU 4.8
*/
UBool operator==(const Part &other) const;
/**
* @param other another object to compare with.
* @return FALSE if this object is equivalent to the other one.
* @draft ICU 4.8
*/
inline UBool operator!=(const Part &other) const {
return !operator==(other);
}
/**
* @return A hash code for this object.
* @draft ICU 4.8
*/
int32_t hashCode() const {
return ((type*37+index)*37+length)*37+value;
}
private:
friend class MessagePattern;
static const int32_t MAX_LENGTH=0xffff;
static const int32_t MAX_VALUE=0x7fff;
// Some fields are not final because they are modified during pattern parsing.
// After pattern parsing, the parts are effectively immutable.
UMessagePatternPartType type;
int32_t index;
uint16_t length;
int16_t value;
int32_t limitPartIndex;
};
private:
void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
void postParse();
int32_t parseMessage(int32_t index, int32_t msgStartLength,
int32_t nestingLevel, UMessagePatternArgType parentType,
UParseError *parseError, UErrorCode &errorCode);
int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
UParseError *parseError, UErrorCode &errorCode);
int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
UParseError *parseError, UErrorCode &errorCode);
int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
UParseError *parseError, UErrorCode &errorCode);
/**
* Validates and parses an argument name or argument number string.
* This internal method assumes that the input substring is a "pattern identifier".
* @return &gt;=0 if the name is a valid number,
* ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
* ARG_NAME_NOT_VALID (-2) if it is neither.
* @see #validateArgumentName(String)
*/
static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
int32_t parseArgNumber(int32_t start, int32_t limit) {
return parseArgNumber(msg, start, limit);
}
/**
* Parses a number from the specified message substring.
* @param start start index into the message string
* @param limit limit index into the message string, must be start<limit
* @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
*/
void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
UParseError *parseError, UErrorCode &errorCode);
// Java has package-private appendReducedApostrophes() here.
// In C++, this is declared in the MessageImpl class.
int32_t skipWhiteSpace(int32_t index);
int32_t skipIdentifier(int32_t index);
/**
* Skips a sequence of characters that could occur in a double value.
* Does not fully parse or validate the value.
*/
int32_t skipDouble(int32_t index);
static UBool isArgTypeChar(UChar32 c);
UBool isChoice(int32_t index);
UBool isPlural(int32_t index);
UBool isSelect(int32_t index);
/**
* @return TRUE if we are inside a MessageFormat (sub-)pattern,
* as opposed to inside a top-level choice/plural/select pattern.
*/
UBool inMessageFormatPattern(int32_t nestingLevel);
/**
* @return TRUE if we are in a MessageFormat sub-pattern
* of a top-level ChoiceFormat pattern.
*/
UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
int32_t value, UErrorCode &errorCode);
void addLimitPart(int32_t start,
UMessagePatternPartType type, int32_t index, int32_t length,
int32_t value, UErrorCode &errorCode);
void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
void setParseError(UParseError *parseError, int32_t index);
// No ICU "poor man's RTTI" for this class nor its subclasses.
virtual UClassID getDynamicClassID() const;
UBool init(UErrorCode &errorCode);
UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
UMessagePatternApostropheMode aposMode;
UnicodeString msg;
// ArrayList<Part> parts=new ArrayList<Part>();
MessagePatternPartsList *partsList;
Part *parts;
int32_t partsLength;
// ArrayList<Double> numericValues;
MessagePatternDoubleList *numericValuesList;
double *numericValues;
int32_t numericValuesLength;
UBool hasArgNames;
UBool hasArgNumbers;
UBool needsAutoQuoting;
};
U_NAMESPACE_END
#endif // !UCONFIG_NO_FORMATTING
#endif // __MESSAGEPATTERN_H__

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2002-2009, International Business Machines
* Copyright (C) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: uconfig.h
@ -176,6 +176,17 @@
# define UCONFIG_NO_IDNA 0
#endif
/**
* \def UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
* Determines the default UMessagePatternApostropheMode.
* See the documentation for that enum.
*
* @draft ICU 4.8
*/
#ifndef UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
# define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL
#endif
/* i18n library switches ---------------------------------------------------- */
/**

View file

@ -1,6 +1,6 @@
/*
***************************************************************************
* Copyright (C) 1999-2010, International Business Machines Corporation
* Copyright (C) 1999-2011, International Business Machines Corporation
* and others. All Rights Reserved.
***************************************************************************
* Date Name Description
@ -575,8 +575,8 @@ public:
/**
* Modifies this set to represent the set specified by the given
* pattern, optionally ignoring white space. See the class
* description for the syntax of the pattern language.
* pattern, ignoring Unicode Pattern_White_Space characters.
* See the class description for the syntax of the pattern language.
* A frozen set will not be modified.
* @param pattern a string specifying what characters are in the set
* @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
@ -590,8 +590,8 @@ public:
/**
* Modifies this set to represent the set specified by the given
* pattern, optionally ignoring white space. See the class
* description for the syntax of the pattern language.
* pattern, optionally ignoring Unicode Pattern_White_Space characters.
* See the class description for the syntax of the pattern language.
* A frozen set will not be modified.
* @param pattern a string specifying what characters are in the set
* @param options bitmask for options to apply to the pattern.
@ -1540,8 +1540,8 @@ private:
* \\p{foo} \\P{foo} - white space not allowed within "\\p" or "\\P"
* \\N{name} - white space not allowed within "\\N"
*
* Other than the above restrictions, white space is ignored. Case
* is ignored except in "\\p" and "\\P" and "\\N". In 'name' leading
* Other than the above restrictions, Unicode Pattern_White_Space characters are ignored.
* Case is ignored except in "\\p" and "\\P" and "\\N". In 'name' leading
* and trailing space is deleted, and internal runs of whitespace
* are collapsed to a single space.
*

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 1999-2009, International Business Machines
* Copyright (C) 1999-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
@ -15,6 +15,7 @@
#include "ruleiter.h"
#include "cmemory.h"
#include "cstring.h"
#include "patternprops.h"
#include "uhash.h"
#include "util.h"
#include "uvector.h"
@ -1926,7 +1927,7 @@ escapeUnprintable) {
break;
default:
// Escape whitespace
if (uprv_isRuleWhiteSpace(c)) {
if (PatternProps::isWhiteSpace(c)) {
buf.append(BACKSLASH);
}
break;

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2010, International Business Machines
* Copyright (C) 1999-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -399,20 +399,6 @@ UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
// Public API
//----------------------------------------------------------------
/**
* Modifies this set to represent the set specified by the given
* pattern, optionally ignoring white space. See the class
* description for the syntax of the pattern language.
* @param pattern a string specifying what characters are in the set
* @param ignoreSpaces if <code>true</code>, all spaces in the
* pattern are ignored. Spaces are those characters for which
* <code>uprv_isRuleWhiteSpace()</code> is <code>true</code>.
* Characters preceded by '\\' are escaped, losing any special
* meaning they otherwise have. Spaces may be included by
* escaping them.
* @exception <code>IllegalArgumentException</code> if the pattern
* contains a syntax error.
*/
UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern,
UErrorCode& status) {
return applyPattern(pattern, USET_IGNORE_SPACE, NULL, status);

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 1997-2009, International Business Machines Corporation and *
* Copyright (C) 1997-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
@ -32,7 +32,9 @@
#include "unicode/locid.h"
#include "cpputils.h"
#include "cstring.h"
#include "messageimpl.h"
#include "putilimp.h"
#include "uassert.h"
#include <stdio.h>
#include <float.h>
@ -54,6 +56,9 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ChoiceFormat)
#define VERTICAL_BAR ((UChar)0x007C) /*|*/
#define MINUS ((UChar)0x002D) /*-*/
static const UChar LEFT_CURLY_BRACE = 0x7B; /*{*/
static const UChar RIGHT_CURLY_BRACE = 0x7D; /*}*/
#ifdef INFINITY
#undef INFINITY
#endif
@ -69,10 +74,8 @@ static const UChar gNegativeInfinity[] = {MINUS, INFINITY, 0};
ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
UErrorCode& status)
: fChoiceLimits(0),
fClosures(0),
fChoiceFormats(0),
fCount(0)
: constructorErrorCode(status),
msgPattern(status)
{
applyPattern(newPattern, status);
}
@ -84,12 +87,10 @@ ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
ChoiceFormat::ChoiceFormat(const double* limits,
const UnicodeString* formats,
int32_t cnt )
: fChoiceLimits(0),
fClosures(0),
fChoiceFormats(0),
fCount(0)
: constructorErrorCode(U_ZERO_ERROR),
msgPattern(constructorErrorCode)
{
setChoices(limits, formats, cnt );
setChoices(limits, NULL, formats, cnt, constructorErrorCode);
}
// -------------------------------------
@ -98,12 +99,10 @@ ChoiceFormat::ChoiceFormat(const double* limits,
const UBool* closures,
const UnicodeString* formats,
int32_t cnt )
: fChoiceLimits(0),
fClosures(0),
fChoiceFormats(0),
fCount(0)
: constructorErrorCode(U_ZERO_ERROR),
msgPattern(constructorErrorCode)
{
setChoices(limits, closures, formats, cnt );
setChoices(limits, closures, formats, cnt, constructorErrorCode);
}
// -------------------------------------
@ -111,11 +110,9 @@ ChoiceFormat::ChoiceFormat(const double* limits,
ChoiceFormat::ChoiceFormat(const ChoiceFormat& that)
: NumberFormat(that),
fChoiceLimits(0),
fClosures(0),
fChoiceFormats(0)
constructorErrorCode(that.constructorErrorCode),
msgPattern(that.msgPattern)
{
*this = that;
}
// -------------------------------------
@ -126,10 +123,8 @@ ChoiceFormat::ChoiceFormat(const ChoiceFormat& that)
ChoiceFormat::ChoiceFormat(const UnicodeString& newPattern,
UParseError& parseError,
UErrorCode& status)
: fChoiceLimits(0),
fClosures(0),
fChoiceFormats(0),
fCount(0)
: constructorErrorCode(status),
msgPattern(status)
{
applyPattern(newPattern,parseError, status);
}
@ -141,16 +136,7 @@ ChoiceFormat::operator==(const Format& that) const
if (this == &that) return TRUE;
if (!NumberFormat::operator==(that)) return FALSE;
ChoiceFormat& thatAlias = (ChoiceFormat&)that;
if (fCount != thatAlias.fCount) return FALSE;
// Checks the limits, the corresponding format string and LE or LT flags.
// LE means less than and equal to, LT means less than.
for (int32_t i = 0; i < fCount; i++) {
if ((fChoiceLimits[i] != thatAlias.fChoiceLimits[i]) ||
(fClosures[i] != thatAlias.fClosures[i]) ||
(fChoiceFormats[i] != thatAlias.fChoiceFormats[i]))
return FALSE;
}
return TRUE;
return msgPattern == thatAlias.msgPattern;
}
// -------------------------------------
@ -161,37 +147,8 @@ ChoiceFormat::operator=(const ChoiceFormat& that)
{
if (this != &that) {
NumberFormat::operator=(that);
fCount = that.fCount;
uprv_free(fChoiceLimits);
fChoiceLimits = NULL;
uprv_free(fClosures);
fClosures = NULL;
delete [] fChoiceFormats;
fChoiceFormats = NULL;
fChoiceLimits = (double*) uprv_malloc( sizeof(double) * fCount);
fClosures = (UBool*) uprv_malloc( sizeof(UBool) * fCount);
fChoiceFormats = new UnicodeString[fCount];
// check for memory allocation error
if (!fChoiceLimits || !fClosures || !fChoiceFormats) {
if (fChoiceLimits) {
uprv_free(fChoiceLimits);
fChoiceLimits = NULL;
}
if (fClosures) {
uprv_free(fClosures);
fClosures = NULL;
}
if (fChoiceFormats) {
delete[] fChoiceFormats;
fChoiceFormats = NULL;
}
} else {
uprv_arrayCopy(that.fChoiceLimits, fChoiceLimits, fCount);
uprv_arrayCopy(that.fClosures, fClosures, fCount);
uprv_arrayCopy(that.fChoiceFormats, fChoiceFormats, fCount);
}
constructorErrorCode = that.constructorErrorCode;
msgPattern = that.msgPattern;
}
return *this;
}
@ -200,32 +157,12 @@ ChoiceFormat::operator=(const ChoiceFormat& that)
ChoiceFormat::~ChoiceFormat()
{
uprv_free(fChoiceLimits);
fChoiceLimits = NULL;
uprv_free(fClosures);
fClosures = NULL;
delete [] fChoiceFormats;
fChoiceFormats = NULL;
fCount = 0;
}
/**
* Convert a string to a double value
*/
double
ChoiceFormat::stod(const UnicodeString& string)
{
char source[256];
char* end;
string.extract(0, string.length(), source, (int32_t)sizeof(source), US_INV); /* invariant codepage */
return uprv_strtod(source,&end);
}
// -------------------------------------
/**
* Convert a double value to a string without the overhead of ICU.
* Convert a double value to a string without the overhead of NumberFormat.
*/
UnicodeString&
ChoiceFormat::dtos(double value,
@ -286,8 +223,8 @@ void
ChoiceFormat::applyPattern(const UnicodeString& pattern,
UErrorCode& status)
{
UParseError parseError;
applyPattern(pattern, parseError, status);
msgPattern.parseChoiceStyle(pattern, NULL, status);
constructorErrorCode = status;
}
// -------------------------------------
@ -298,217 +235,16 @@ ChoiceFormat::applyPattern(const UnicodeString& pattern,
UParseError& parseError,
UErrorCode& status)
{
if (U_FAILURE(status))
{
return;
}
// Clear error struct
parseError.offset = -1;
parseError.preContext[0] = parseError.postContext[0] = (UChar)0;
// Perform 2 passes. The first computes the number of limits in
// this pattern (fCount), which is 1 more than the number of
// literal VERTICAL_BAR characters.
int32_t count = 1;
int32_t i;
for (i=0; i<pattern.length(); ++i) {
UChar c = pattern[i];
if (c == SINGLE_QUOTE) {
// Skip over the entire quote, including embedded
// contiguous pairs of SINGLE_QUOTE.
for (;;) {
do {
++i;
} while (i<pattern.length() &&
pattern[i] != SINGLE_QUOTE);
if ((i+1)<pattern.length() &&
pattern[i+1] == SINGLE_QUOTE) {
// SINGLE_QUOTE pair; skip over it
++i;
} else {
break;
}
}
} else if (c == VERTICAL_BAR) {
++count;
}
}
// Allocate the required storage.
double *newLimits = (double*) uprv_malloc( sizeof(double) * count);
/* test for NULL */
if (newLimits == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
UBool *newClosures = (UBool*) uprv_malloc( sizeof(UBool) * count);
/* test for NULL */
if (newClosures == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
uprv_free(newLimits);
return;
}
UnicodeString *newFormats = new UnicodeString[count];
/* test for NULL */
if (newFormats == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
uprv_free(newLimits);
uprv_free(newClosures);
return;
}
// Perform the second pass
int32_t k = 0; // index into newXxx[] arrays
UnicodeString buf; // scratch buffer
UBool inQuote = FALSE;
UBool inNumber = TRUE; // TRUE before < or #, FALSE after
for (i=0; i<pattern.length(); ++i) {
UChar c = pattern[i];
if (c == SINGLE_QUOTE) {
// Check for SINGLE_QUOTE pair indicating a literal quote
if ((i+1) < pattern.length() &&
pattern[i+1] == SINGLE_QUOTE) {
buf += SINGLE_QUOTE;
++i;
} else {
inQuote = !inQuote;
}
} else if (inQuote) {
buf += c;
} else if (c == LESS_THAN || c == LESS_EQUAL || c == LESS_EQUAL2) {
if (!inNumber || buf.length() == 0) {
goto error;
}
inNumber = FALSE;
double limit;
buf.trim();
if (!buf.compare(gPositiveInfinity, POSITIVE_INF_STRLEN)) {
limit = uprv_getInfinity();
} else if (!buf.compare(gNegativeInfinity, NEGATIVE_INF_STRLEN)) {
limit = -uprv_getInfinity();
} else {
limit = stod(buf);
}
if (k == count) {
// This shouldn't happen. If it does, it means that
// the count determined in the first pass did not
// match the number of elements found in the second
// pass.
goto error;
}
newLimits[k] = limit;
newClosures[k] = (c == LESS_THAN);
if (k > 0 && limit <= newLimits[k-1]) {
// Each limit must be strictly > than the previous
// limit. One exception: Two subsequent limits may be
// == if the first closure is FALSE and the second
// closure is TRUE. This places the limit value in
// the second interval.
if (!(limit == newLimits[k-1] &&
!newClosures[k-1] &&
newClosures[k])) {
goto error;
}
}
buf.truncate(0);
} else if (c == VERTICAL_BAR) {
if (inNumber) {
goto error;
}
inNumber = TRUE;
newFormats[k] = buf;
++k;
buf.truncate(0);
} else {
buf += c;
}
}
if (k != (count-1) || inNumber || inQuote) {
goto error;
}
newFormats[k] = buf;
// Don't modify this object until the parse succeeds
uprv_free(fChoiceLimits);
uprv_free(fClosures);
delete[] fChoiceFormats;
fCount = count;
fChoiceLimits = newLimits;
fClosures = newClosures;
fChoiceFormats = newFormats;
return;
error:
status = U_ILLEGAL_ARGUMENT_ERROR;
syntaxError(pattern,i,parseError);
uprv_free(newLimits);
uprv_free(newClosures);
delete[] newFormats;
return;
msgPattern.parseChoiceStyle(pattern, &parseError, status);
constructorErrorCode = status;
}
// -------------------------------------
// Reconstruct the original input pattern.
// Returns the input pattern string.
UnicodeString&
ChoiceFormat::toPattern(UnicodeString& result) const
{
result.remove();
for (int32_t i = 0; i < fCount; ++i) {
if (i != 0) {
result += VERTICAL_BAR;
}
UnicodeString buf;
if (uprv_isPositiveInfinity(fChoiceLimits[i])) {
result += INFINITY;
} else if (uprv_isNegativeInfinity(fChoiceLimits[i])) {
result += MINUS;
result += INFINITY;
} else {
result += dtos(fChoiceLimits[i], buf);
}
if (fClosures[i]) {
result += LESS_THAN;
} else {
result += LESS_EQUAL;
}
// Append fChoiceFormats[i], using quotes if there are special
// characters. Single quotes themselves must be escaped in
// either case.
const UnicodeString& text = fChoiceFormats[i];
UBool needQuote = text.indexOf(LESS_THAN) >= 0
|| text.indexOf(LESS_EQUAL) >= 0
|| text.indexOf(LESS_EQUAL2) >= 0
|| text.indexOf(VERTICAL_BAR) >= 0;
if (needQuote) {
result += SINGLE_QUOTE;
}
if (text.indexOf(SINGLE_QUOTE) < 0) {
result += text;
}
else {
for (int32_t j = 0; j < text.length(); ++j) {
UChar c = text[j];
result += c;
if (c == SINGLE_QUOTE) {
result += c;
}
}
}
if (needQuote) {
result += SINGLE_QUOTE;
}
}
return result;
return result = msgPattern.getPatternString();
}
// -------------------------------------
@ -518,7 +254,8 @@ ChoiceFormat::setChoices( const double* limits,
const UnicodeString* formats,
int32_t cnt )
{
setChoices(limits, 0, formats, cnt);
UErrorCode errorCode = U_ZERO_ERROR;
setChoices(limits, NULL, formats, cnt, errorCode);
}
// -------------------------------------
@ -529,54 +266,76 @@ ChoiceFormat::setChoices( const double* limits,
const UnicodeString* formats,
int32_t cnt )
{
if(limits == 0 || formats == 0)
return;
UErrorCode errorCode = U_ZERO_ERROR;
setChoices(limits, closures, formats, cnt, errorCode);
}
if (fChoiceLimits) {
uprv_free(fChoiceLimits);
}
if (fClosures) {
uprv_free(fClosures);
}
if (fChoiceFormats) {
delete [] fChoiceFormats;
}
// Note that the old arrays are deleted and this owns
// the created array.
fCount = cnt;
fChoiceLimits = (double*) uprv_malloc( sizeof(double) * fCount);
fClosures = (UBool*) uprv_malloc( sizeof(UBool) * fCount);
fChoiceFormats = new UnicodeString[fCount];
//check for memory allocation error
if (!fChoiceLimits || !fClosures || !fChoiceFormats) {
if (fChoiceLimits) {
uprv_free(fChoiceLimits);
fChoiceLimits = NULL;
}
if (fClosures) {
uprv_free(fClosures);
fClosures = NULL;
}
if (fChoiceFormats) {
delete[] fChoiceFormats;
fChoiceFormats = NULL;
}
void
ChoiceFormat::setChoices(const double* limits,
const UBool* closures,
const UnicodeString* formats,
int32_t count,
UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) {
return;
}
uprv_arrayCopy(limits, fChoiceLimits, fCount);
uprv_arrayCopy(formats, fChoiceFormats, fCount);
if (closures != 0) {
uprv_arrayCopy(closures, fClosures, fCount);
} else {
int32_t i;
for (i=0; i<fCount; ++i) {
fClosures[i] = FALSE;
if (limits == NULL || formats == NULL) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// Reconstruct the original input pattern.
// Modified version of the pre-ICU 4.8 toPattern() implementation.
UnicodeString result;
for (int32_t i = 0; i < count; ++i) {
if (i != 0) {
result += VERTICAL_BAR;
}
UnicodeString buf;
if (uprv_isPositiveInfinity(limits[i])) {
result += INFINITY;
} else if (uprv_isNegativeInfinity(limits[i])) {
result += MINUS;
result += INFINITY;
} else {
result += dtos(limits[i], buf);
}
if (closures != NULL && closures[i]) {
result += LESS_THAN;
} else {
result += LESS_EQUAL;
}
// Append formats[i], using quotes if there are special
// characters. Single quotes themselves must be escaped in
// either case.
const UnicodeString& text = formats[i];
int32_t textLength = text.length();
int32_t nestingLevel = 0;
for (int32_t j = 0; j < textLength; ++j) {
UChar c = text[j];
if (c == SINGLE_QUOTE && nestingLevel == 0) {
// Double each top-level apostrophe.
result.append(c);
} else if (c == VERTICAL_BAR && nestingLevel == 0) {
// Surround each pipe symbol with apostrophes for quoting.
// If the next character is an apostrophe, then that will be doubled,
// and although the parser will see the apostrophe pairs beginning
// and ending one character earlier than our doubling, the result
// is as desired.
// | -> '|'
// |' -> '|'''
// |'' -> '|''''' etc.
result.append(SINGLE_QUOTE).append(c).append(SINGLE_QUOTE);
continue; // Skip the append(c) at the end of the loop body.
} else if (c == LEFT_CURLY_BRACE) {
++nestingLevel;
} else if (c == RIGHT_CURLY_BRACE && nestingLevel > 0) {
--nestingLevel;
}
result.append(c);
}
}
// Apply the reconstructed pattern.
applyPattern(result, errorCode);
}
// -------------------------------------
@ -585,8 +344,8 @@ ChoiceFormat::setChoices( const double* limits,
const double*
ChoiceFormat::getLimits(int32_t& cnt) const
{
cnt = fCount;
return fChoiceLimits;
cnt = 0;
return NULL;
}
// -------------------------------------
@ -595,8 +354,8 @@ ChoiceFormat::getLimits(int32_t& cnt) const
const UBool*
ChoiceFormat::getClosures(int32_t& cnt) const
{
cnt = fCount;
return fClosures;
cnt = 0;
return NULL;
}
// -------------------------------------
@ -605,8 +364,8 @@ ChoiceFormat::getClosures(int32_t& cnt) const
const UnicodeString*
ChoiceFormat::getFormats(int32_t& cnt) const
{
cnt = fCount;
return fChoiceFormats;
cnt = 0;
return NULL;
}
// -------------------------------------
@ -623,9 +382,8 @@ ChoiceFormat::format(int64_t number,
}
// -------------------------------------
// Formats a long number, it's actually formatted as
// a double. The returned format string may differ
// from the input number because of this.
// Formats an int32_t number, it's actually formatted as
// a double.
UnicodeString&
ChoiceFormat::format(int32_t number,
@ -643,26 +401,63 @@ ChoiceFormat::format(double number,
UnicodeString& appendTo,
FieldPosition& /*pos*/) const
{
// find the number
int32_t i;
for (i = 0; i < fCount; ++i) {
if (fClosures[i]) {
if (!(number > fChoiceLimits[i])) {
// same as number <= fChoiceLimits, except catches NaN
break;
}
} else if (!(number >= fChoiceLimits[i])) {
// same as number < fChoiceLimits, except catches NaN
if (msgPattern.countParts() == 0) {
// No pattern was applied, or it failed.
return appendTo;
}
// Get the appropriate sub-message.
int32_t msgStart = findSubMessage(msgPattern, 0, number);
if (!MessageImpl::jdkAposMode(msgPattern)) {
int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
appendTo.append(msgPattern.getPatternString(),
patternStart,
msgPattern.getPatternIndex(msgLimit) - patternStart);
return appendTo;
}
// JDK compatibility mode: Remove SKIP_SYNTAX.
return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
}
int32_t
ChoiceFormat::findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number) {
int32_t count = pattern.countParts();
int32_t msgStart;
// Iterate over (ARG_INT|DOUBLE, ARG_SELECTOR, message) tuples
// until ARG_LIMIT or end of choice-only pattern.
// Ignore the first number and selector and start the loop on the first message.
partIndex += 2;
for (;;) {
// Skip but remember the current sub-message.
msgStart = partIndex;
partIndex = pattern.getLimitPartIndex(partIndex);
if (++partIndex >= count) {
// Reached the end of the choice-only pattern.
// Return with the last sub-message.
break;
}
const MessagePattern::Part &part = pattern.getPart(partIndex++);
UMessagePatternPartType type = part.getType();
if (type == UMSGPAT_PART_TYPE_ARG_LIMIT) {
// Reached the end of the ChoiceFormat style.
// Return with the last sub-message.
break;
}
// part is an ARG_INT or ARG_DOUBLE
U_ASSERT(MessagePattern::Part::hasNumericValue(type));
double boundary = pattern.getNumericValue(part);
// Fetch the ARG_SELECTOR character.
int32_t selectorIndex = pattern.getPatternIndex(partIndex++);
UChar boundaryChar = pattern.getPatternString().charAt(selectorIndex);
if (boundaryChar == LESS_THAN ? !(number > boundary) : !(number >= boundary)) {
// The number is in the interval between the previous boundary and the current one.
// Return with the sub-message between them.
// The !(a>b) and !(a>=b) comparisons are equivalent to
// (a<=b) and (a<b) except they "catch" NaN.
break;
}
}
--i;
if (i < 0) {
i = 0;
}
// return either a formatted number, or a string
appendTo += fChoiceFormats[i];
return appendTo;
return msgStart;
}
// -------------------------------------
@ -680,13 +475,15 @@ ChoiceFormat::format(const Formattable* objs,
status = U_ILLEGAL_ARGUMENT_ERROR;
return appendTo;
}
if (msgPattern.countParts() == 0) {
status = U_INVALID_STATE_ERROR;
return appendTo;
}
UnicodeString buffer;
for (int32_t i = 0; i < cnt; i++) {
double objDouble = objs[i].getDouble(status);
if (U_SUCCESS(status)) {
buffer.remove();
appendTo += format(objDouble, buffer, pos);
format(objDouble, appendTo, pos);
}
}
@ -710,31 +507,68 @@ ChoiceFormat::format(const Formattable& obj,
void
ChoiceFormat::parse(const UnicodeString& text,
Formattable& result,
ParsePosition& status) const
ParsePosition& pos) const
{
result.setDouble(parseArgument(msgPattern, 0, text, pos));
}
double
ChoiceFormat::parseArgument(
const MessagePattern &pattern, int32_t partIndex,
const UnicodeString &source, ParsePosition &pos) {
// find the best number (defined as the one with the longest parse)
int32_t start = status.getIndex();
int32_t start = pos.getIndex();
int32_t furthest = start;
double bestNumber = uprv_getNaN();
double tempNumber = 0.0;
for (int i = 0; i < fCount; ++i) {
int32_t len = fChoiceFormats[i].length();
if (text.compare(start, len, fChoiceFormats[i]) == 0) {
status.setIndex(start + len);
tempNumber = fChoiceLimits[i];
if (status.getIndex() > furthest) {
furthest = status.getIndex();
int32_t count = pattern.countParts();
while (partIndex < count && pattern.getPartType(partIndex) != UMSGPAT_PART_TYPE_ARG_LIMIT) {
tempNumber = pattern.getNumericValue(pattern.getPart(partIndex));
partIndex += 2; // skip the numeric part and ignore the ARG_SELECTOR
int32_t msgLimit = pattern.getLimitPartIndex(partIndex);
int32_t len = matchStringUntilLimitPart(pattern, partIndex, msgLimit, source, start);
if (len >= 0) {
int32_t newIndex = start + len;
if (newIndex > furthest) {
furthest = newIndex;
bestNumber = tempNumber;
if (furthest == text.length())
if (furthest == source.length()) {
break;
}
}
}
partIndex = msgLimit + 1;
}
status.setIndex(furthest);
if (status.getIndex() == start) {
status.setErrorIndex(furthest);
if (furthest == start) {
pos.setErrorIndex(start);
} else {
pos.setIndex(furthest);
}
return bestNumber;
}
int32_t
ChoiceFormat::matchStringUntilLimitPart(
const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
const UnicodeString &source, int32_t sourceOffset) {
int32_t matchingSourceLength = 0;
const UnicodeString &msgString = pattern.getPatternString();
int32_t prevIndex = pattern.getPart(partIndex).getLimit();
for (;;) {
const MessagePattern::Part &part = pattern.getPart(++partIndex);
if (partIndex == limitPartIndex || part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
int32_t index = part.getIndex();
int32_t length = index - prevIndex;
if (length != 0 && 0 != source.compare(sourceOffset, length, msgString, prevIndex, length)) {
return -1; // mismatch
}
matchingSourceLength += length;
if (partIndex == limitPartIndex) {
return matchingSourceLength;
}
prevIndex = part.getLimit(); // SKIP_SYNTAX
}
}
result.setDouble(bestNumber);
}
// -------------------------------------

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2009, International Business Machines Corporation and
* Copyright (C) 2009-2011, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
@ -12,238 +12,161 @@
*******************************************************************************
*/
#include "unicode/utypes.h"
#include "unicode/messagepattern.h"
#include "unicode/plurfmt.h"
#include "unicode/plurrule.h"
#include "unicode/utypes.h"
#include "cmemory.h"
#include "messageimpl.h"
#include "plurrule_impl.h"
#include "uassert.h"
#include "uhash.h"
#if !UCONFIG_NO_FORMATTING
U_NAMESPACE_BEGIN
U_CDECL_BEGIN
static void U_CALLCONV
deleteHashStrings(void *obj) {
delete (UnicodeString *)obj;
}
U_CDECL_END
static const UChar OTHER_STRING[] = {
0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other"
};
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
#define MAX_KEYWORD_SIZE 30
PluralFormat::PluralFormat(UErrorCode& status) {
init(NULL, Locale::getDefault(), status);
PluralFormat::PluralFormat(UErrorCode& status)
: locale(Locale::getDefault()),
msgPattern(status),
numberFormat(NULL),
offset(0) {
init(NULL, status);
}
PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status) {
init(NULL, loc, status);
PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
: locale(loc),
msgPattern(status),
numberFormat(NULL),
offset(0) {
init(NULL, status);
}
PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status) {
init(&rules, Locale::getDefault(), status);
PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
: locale(Locale::getDefault()),
msgPattern(status),
numberFormat(NULL),
offset(0) {
init(&rules, status);
}
PluralFormat::PluralFormat(const Locale& loc, const PluralRules& rules, UErrorCode& status) {
init(&rules, loc, status);
PluralFormat::PluralFormat(const Locale& loc,
const PluralRules& rules,
UErrorCode& status)
: locale(loc),
msgPattern(status),
numberFormat(NULL),
offset(0) {
init(&rules, status);
}
PluralFormat::PluralFormat(const UnicodeString& pat, UErrorCode& status) {
init(NULL, Locale::getDefault(), status);
PluralFormat::PluralFormat(const UnicodeString& pat,
UErrorCode& status)
: locale(Locale::getDefault()),
msgPattern(status),
numberFormat(NULL),
offset(0) {
init(NULL, status);
applyPattern(pat, status);
}
PluralFormat::PluralFormat(const Locale& loc, const UnicodeString& pat, UErrorCode& status) {
init(NULL, loc, status);
PluralFormat::PluralFormat(const Locale& loc,
const UnicodeString& pat,
UErrorCode& status)
: locale(loc),
msgPattern(status),
numberFormat(NULL),
offset(0) {
init(NULL, status);
applyPattern(pat, status);
}
PluralFormat::PluralFormat(const PluralRules& rules, const UnicodeString& pat, UErrorCode& status) {
init(&rules, Locale::getDefault(), status);
PluralFormat::PluralFormat(const PluralRules& rules,
const UnicodeString& pat,
UErrorCode& status)
: locale(Locale::getDefault()),
msgPattern(status),
numberFormat(NULL),
offset(0) {
init(&rules, status);
applyPattern(pat, status);
}
PluralFormat::PluralFormat(const Locale& loc, const PluralRules& rules, const UnicodeString& pat, UErrorCode& status) {
init(&rules, loc, status);
PluralFormat::PluralFormat(const Locale& loc,
const PluralRules& rules,
const UnicodeString& pat,
UErrorCode& status)
: locale(loc),
msgPattern(status),
numberFormat(NULL),
offset(0) {
init(&rules, status);
applyPattern(pat, status);
}
PluralFormat::PluralFormat(const PluralFormat& other) : Format(other) {
PluralFormat::PluralFormat(const PluralFormat& other)
: Format(other),
locale(other.locale),
msgPattern(other.msgPattern),
numberFormat(NULL),
offset(other.offset) {
copyObjects(other);
}
void
PluralFormat::copyObjects(const PluralFormat& other) {
UErrorCode status = U_ZERO_ERROR;
locale = other.locale;
pluralRules = other.pluralRules->clone();
pattern = other.pattern;
copyHashtable(other.fParsedValuesHash, status);
if (U_FAILURE(status)) {
delete pluralRules;
pluralRules = NULL;
return;
if (other.numberFormat == NULL) {
numberFormat = NumberFormat::createInstance(locale, status);
} else {
numberFormat = (NumberFormat*)other.numberFormat->clone();
}
numberFormat=NumberFormat::createInstance(locale, status);
if (U_FAILURE(status)) {
delete pluralRules;
pluralRules = NULL;
delete fParsedValuesHash;
fParsedValuesHash = NULL;
return;
if (other.pluralRulesWrapper.pluralRules == NULL) {
pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
} else {
pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
}
replacedNumberFormat=other.replacedNumberFormat;
}
PluralFormat::~PluralFormat() {
delete pluralRules;
delete fParsedValuesHash;
delete numberFormat;
}
void
PluralFormat::init(const PluralRules* rules, const Locale& curLocale, UErrorCode& status) {
PluralFormat::init(const PluralRules* rules, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
locale = curLocale;
if ( rules==NULL) {
pluralRules = PluralRules::forLocale(locale, status);
if (U_FAILURE(status)) {
if (rules==NULL) {
pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
} else {
pluralRulesWrapper.pluralRules = rules->clone();
if (pluralRulesWrapper.pluralRules == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
}
else {
pluralRules = rules->clone();
}
fParsedValuesHash=NULL;
pattern.remove();
numberFormat= NumberFormat::createInstance(curLocale, status);
if (U_FAILURE(status)) {
delete pluralRules;
pluralRules = NULL;
return;
}
replacedNumberFormat=NULL;
numberFormat= NumberFormat::createInstance(locale, status);
}
void
PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
msgPattern.parsePluralStyle(newPattern, NULL, status);
if (U_FAILURE(status)) {
msgPattern.clear();
offset = 0;
return;
}
this->pattern = newPattern;
UnicodeString token;
int32_t braceCount=0;
fmtToken type;
UBool spaceIncluded=FALSE;
if (fParsedValuesHash==NULL) {
fParsedValuesHash = new Hashtable(TRUE, status);
if (U_FAILURE(status)) {
return;
}
fParsedValuesHash->setValueDeleter(deleteHashStrings);
}
UBool getKeyword=TRUE;
UnicodeString hashKeyword;
UnicodeString *hashPattern;
for (int32_t i=0; i<pattern.length(); ++i) {
UChar ch=pattern.charAt(i);
if ( !inRange(ch, type) ) {
if (getKeyword) {
status = U_ILLEGAL_CHARACTER;
return;
}
else {
token += ch;
continue;
}
}
switch (type) {
case tSpace:
if (token.length()==0) {
continue;
}
if (getKeyword) {
// space after keyword
spaceIncluded = TRUE;
}
else {
token += ch;
}
break;
case tLeftBrace:
if ( getKeyword ) {
if (fParsedValuesHash->get(token)!= NULL) {
status = U_DUPLICATE_KEYWORD;
return;
}
if (token.length()==0) {
status = U_PATTERN_SYNTAX_ERROR;
return;
}
if (!pluralRules->isKeyword(token)) {
status = U_UNDEFINED_KEYWORD;
return;
}
hashKeyword = token;
getKeyword = FALSE;
token.remove();
}
else {
if (braceCount==0) {
status = U_UNEXPECTED_TOKEN;
return;
}
else {
token += ch;
}
}
braceCount++;
spaceIncluded = FALSE;
break;
case tRightBrace:
if ( getKeyword ) {
status = U_UNEXPECTED_TOKEN;
return;
}
else {
hashPattern = new UnicodeString(token);
fParsedValuesHash->put(hashKeyword, hashPattern, status);
if (U_FAILURE(status)) {
return;
}
braceCount--;
if ( braceCount==0 ) {
getKeyword=TRUE;
hashKeyword.remove();
hashPattern=NULL;
token.remove();
}
else {
token += ch;
}
}
spaceIncluded = FALSE;
break;
case tLetter:
case tNumberSign:
if (spaceIncluded) {
status = U_PATTERN_SYNTAX_ERROR;
return;
}
default:
token+=ch;
break;
}
}
if ( checkSufficientDefinition() ) {
return;
}
else {
status = U_DEFAULT_KEYWORD_MISSING;
return;
}
offset = msgPattern.getPluralOffset(0);
}
UnicodeString&
@ -253,20 +176,10 @@ PluralFormat::format(const Formattable& obj,
UErrorCode& status) const
{
if (U_FAILURE(status)) return appendTo;
int32_t number;
switch (obj.getType())
{
case Formattable::kDouble:
return format((int32_t)obj.getDouble(), appendTo, pos, status);
break;
case Formattable::kLong:
number = (int32_t)obj.getLong();
return format(number, appendTo, pos, status);
break;
case Formattable::kInt64:
return format((int32_t)obj.getInt64(), appendTo, pos, status);
default:
if (obj.isNumeric()) {
return format(obj.getDouble(), appendTo, pos, status);
} else {
status = U_ILLEGAL_ARGUMENT_ERROR;
return appendTo;
}
@ -274,30 +187,22 @@ PluralFormat::format(const Formattable& obj,
UnicodeString
PluralFormat::format(int32_t number, UErrorCode& status) const {
if (U_FAILURE(status)) {
return UnicodeString();
}
FieldPosition fpos(0);
UnicodeString result;
return format(number, result, fpos, status);
}
UnicodeString
PluralFormat::format(double number, UErrorCode& status) const {
if (U_FAILURE(status)) {
return UnicodeString();
}
FieldPosition fpos(0);
UnicodeString result;
return format(number, result, fpos, status);
}
UnicodeString&
PluralFormat::format(int32_t number,
UnicodeString& appendTo,
UnicodeString& appendTo,
FieldPosition& pos,
UErrorCode& status) const {
return format((double)number, appendTo, pos, status);
@ -305,101 +210,82 @@ PluralFormat::format(int32_t number,
UnicodeString&
PluralFormat::format(double number,
UnicodeString& appendTo,
UnicodeString& appendTo,
FieldPosition& pos,
UErrorCode& /*status*/) const {
if (fParsedValuesHash==NULL) {
if ( replacedNumberFormat== NULL ) {
return numberFormat->format(number, appendTo, pos);
}
else {
replacedNumberFormat->format(number, appendTo, pos);
UErrorCode& status) const {
if (U_FAILURE(status)) {
return appendTo;
}
if (msgPattern.countParts() == 0) {
return numberFormat->format(number, appendTo, pos);
}
// Get the appropriate sub-message.
int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, number, status);
// Replace syntactic # signs in the top level of this sub-message
// (not in nested arguments) with the formatted number-offset.
const UnicodeString& pattern = msgPattern.getPatternString();
number -= offset;
int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
for (;;) {
const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
const UMessagePatternPartType type = part.getType();
int32_t index = part.getIndex();
if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
return appendTo.append(pattern, prevIndex, index - prevIndex);
} else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
(type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
appendTo.append(pattern, prevIndex, index - prevIndex);
if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
numberFormat->format(number, appendTo);
}
prevIndex = part.getLimit();
} else if (type == UMSGPAT_PART_TYPE_ARG_START) {
appendTo.append(pattern, prevIndex, index - prevIndex);
prevIndex = index;
partIndex = msgPattern.getLimitPartIndex(partIndex);
index = msgPattern.getPart(partIndex).getLimit();
MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
prevIndex = index;
}
}
UnicodeString selectedRule = pluralRules->select(number);
UnicodeString *selectedPattern = (UnicodeString *)fParsedValuesHash->get(selectedRule);
if (selectedPattern==NULL) {
selectedPattern = (UnicodeString *)fParsedValuesHash->get(pluralRules->getKeywordOther());
}
appendTo = insertFormattedNumber(number, *selectedPattern, appendTo, pos);
return appendTo;
}
UnicodeString&
PluralFormat::toPattern(UnicodeString& appendTo) {
appendTo+= pattern;
if (0 == msgPattern.countParts()) {
appendTo.setToBogus();
} else {
appendTo.append(msgPattern.getPatternString());
}
return appendTo;
}
UBool
PluralFormat::inRange(UChar ch, fmtToken& type) {
if ((ch>=CAP_A) && (ch<=CAP_Z)) {
// we assume all characters are in lower case already.
return FALSE;
}
if ((ch>=LOW_A) && (ch<=LOW_Z)) {
type = tLetter;
return TRUE;
}
switch (ch) {
case LEFTBRACE:
type = tLeftBrace;
return TRUE;
case SPACE:
type = tSpace;
return TRUE;
case RIGHTBRACE:
type = tRightBrace;
return TRUE;
case NUMBER_SIGN:
type = tNumberSign;
return TRUE;
default :
type = none;
return FALSE;
}
}
UBool
PluralFormat::checkSufficientDefinition() {
// Check that at least the default rule is defined.
if (fParsedValuesHash==NULL) return FALSE;
if (fParsedValuesHash->get(pluralRules->getKeywordOther()) == NULL) {
return FALSE;
}
else {
return TRUE;
}
}
void
PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
if (pluralRules!=NULL) {
delete pluralRules;
pluralRules=NULL;
}
if (fParsedValuesHash!= NULL) {
delete fParsedValuesHash;
fParsedValuesHash = NULL;
}
if (numberFormat!=NULL) {
delete numberFormat;
numberFormat = NULL;
replacedNumberFormat=NULL;
}
init(NULL, loc, status);
locale = loc;
msgPattern.clear();
delete numberFormat;
offset = 0;
numberFormat = NULL;
pluralRulesWrapper.reset();
init(NULL, status);
}
void
PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& /*status*/) {
// TODO: The copy constructor and assignment op of NumberFormat class are protected.
// create a pointer as the workaround.
replacedNumberFormat = (NumberFormat *)format;
PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
NumberFormat* nf = (NumberFormat*)format->clone();
if (nf != NULL) {
delete numberFormat;
numberFormat = nf;
} else {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
Format*
@ -408,34 +294,14 @@ PluralFormat::clone() const
return new PluralFormat(*this);
}
PluralFormat&
PluralFormat::operator=(const PluralFormat& other) {
if (this != &other) {
UErrorCode status = U_ZERO_ERROR;
delete pluralRules;
delete fParsedValuesHash;
delete numberFormat;
locale = other.locale;
pluralRules = other.pluralRules->clone();
pattern = other.pattern;
copyHashtable(other.fParsedValuesHash, status);
if (U_FAILURE(status)) {
delete pluralRules;
pluralRules = NULL;
fParsedValuesHash = NULL;
numberFormat = NULL;
return *this;
}
numberFormat=NumberFormat::createInstance(locale, status);
if (U_FAILURE(status)) {
delete pluralRules;
delete fParsedValuesHash;
pluralRules = NULL;
fParsedValuesHash = NULL;
numberFormat = NULL;
return *this;
}
replacedNumberFormat=other.replacedNumberFormat;
msgPattern = other.msgPattern;
offset = other.offset;
copyObjects(other);
}
return *this;
@ -443,13 +309,21 @@ PluralFormat::operator=(const PluralFormat& other) {
UBool
PluralFormat::operator==(const Format& other) const {
// This protected comparison operator should only be called by subclasses
// which have confirmed that the other object being compared against is
// an instance of a sublcass of PluralFormat. THIS IS IMPORTANT.
// Format::operator== guarantees that this cast is safe
PluralFormat* fmt = (PluralFormat*)&other;
return ((*pluralRules == *(fmt->pluralRules)) &&
(*numberFormat == *(fmt->numberFormat)));
if (this == &other) {
return TRUE;
}
if (!Format::operator==(other)) {
return FALSE;
}
const PluralFormat& o = (const PluralFormat&)other;
return
locale == o.locale &&
msgPattern == o.msgPattern && // implies same offset
(numberFormat == NULL) == (o.numberFormat == NULL) &&
(numberFormat == NULL || *numberFormat == *o.numberFormat) &&
(pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
(pluralRulesWrapper.pluralRules == NULL ||
*pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
}
UBool
@ -460,72 +334,112 @@ PluralFormat::operator!=(const Format& other) const {
void
PluralFormat::parseObject(const UnicodeString& /*source*/,
Formattable& /*result*/,
ParsePosition& /*pos*/) const
ParsePosition& pos) const
{
// TODO: not yet supported in icu4j and icu4c
// Parsing not supported.
pos.setErrorIndex(pos.getIndex());
}
UnicodeString
PluralFormat::insertFormattedNumber(double number,
UnicodeString& message,
UnicodeString& appendTo,
FieldPosition& pos) const {
UnicodeString result;
int32_t braceStack=0;
int32_t startIndex=0;
if (message.length()==0) {
return result;
int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
const PluralSelector& selector, double number, UErrorCode& ec) {
if (U_FAILURE(ec)) {
return 0;
}
appendTo = numberFormat->format(number, appendTo, pos);
for(int32_t i=0; i<message.length(); ++i) {
switch(message.charAt(i)) {
case LEFTBRACE:
++braceStack;
int32_t count=pattern.countParts();
double offset;
const MessagePattern::Part* part=&pattern.getPart(partIndex);
if (MessagePattern::Part::hasNumericValue(part->getType())) {
offset=pattern.getNumericValue(*part);
++partIndex;
} else {
offset=0;
}
// The keyword is empty until we need to match against non-explicit, not-"other" value.
// Then we get the keyword from the selector.
// (In other words, we never call the selector if we match against an explicit value,
// or if the only non-explicit keyword is "other".)
UnicodeString keyword;
UnicodeString other(FALSE, OTHER_STRING, 5);
// When we find a match, we set msgStart>0 and also set this boolean to true
// to avoid matching the keyword again (duplicates are allowed)
// while we continue to look for an explicit-value match.
UBool haveKeywordMatch=FALSE;
// msgStart is 0 until we find any appropriate sub-message.
// We remember the first "other" sub-message if we have not seen any
// appropriate sub-message before.
// We remember the first matching-keyword sub-message if we have not seen
// one of those before.
// (The parser allows [does not check for] duplicate keywords.
// We just have to make sure to take the first one.)
// We avoid matching the keyword twice by also setting haveKeywordMatch=true
// at the first keyword match.
// We keep going until we find an explicit-value match or reach the end of the plural style.
int32_t msgStart=0;
// Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
// until ARG_LIMIT or end of plural-only pattern.
do {
part=&pattern.getPart(partIndex++);
const UMessagePatternPartType type = part->getType();
if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
break;
case RIGHTBRACE:
--braceStack;
break;
case NUMBER_SIGN:
if (braceStack==0) {
result += UnicodeString(message, startIndex, i);
result += appendTo;
startIndex = i + 1;
}
U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
// part is an ARG_SELECTOR followed by an optional explicit value, and then a message
if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
// explicit value like "=2"
part=&pattern.getPart(partIndex++);
if(number==pattern.getNumericValue(*part)) {
// matches explicit value
return partIndex;
}
} else if(!haveKeywordMatch) {
// plural keyword like "few" or "other"
// Compare "other" first and call the selector if this is not "other".
if(pattern.partSubstringMatches(*part, other)) {
if(msgStart==0) {
msgStart=partIndex;
if(0 == keyword.compare(other)) {
// This is the first "other" sub-message,
// and the selected keyword is also "other".
// Do not match "other" again.
haveKeywordMatch=TRUE;
}
}
} else {
if(keyword.isEmpty()) {
keyword=selector.select(number-offset, ec);
if(msgStart!=0 && (0 == keyword.compare(other))) {
// We have already seen an "other" sub-message.
// Do not match "other" again.
haveKeywordMatch=TRUE;
continue;
}
}
if(pattern.partSubstringMatches(*part, keyword)) {
// keyword matches
msgStart=partIndex;
// Do not match this keyword again.
haveKeywordMatch=TRUE;
}
}
break;
}
}
if ( startIndex < message.length() ) {
result += UnicodeString(message, startIndex, message.length()-startIndex);
}
appendTo = result;
return result;
partIndex=pattern.getLimitPartIndex(partIndex);
} while(++partIndex<count);
return msgStart;
}
void
PluralFormat::copyHashtable(Hashtable *other, UErrorCode& status) {
if (other == NULL || U_FAILURE(status)) {
fParsedValuesHash = NULL;
return;
}
fParsedValuesHash = new Hashtable(TRUE, status);
if(U_FAILURE(status)){
return;
}
fParsedValuesHash->setValueDeleter(deleteHashStrings);
int32_t pos = -1;
const UHashElement* elem = NULL;
// walk through the hash table and create a deep clone
while((elem = other->nextElement(pos))!= NULL){
const UHashTok otherKeyTok = elem->key;
UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer;
const UHashTok otherKeyToVal = elem->value;
UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer;
fParsedValuesHash->put(*otherKey, new UnicodeString(*otherValue), status);
if(U_FAILURE(status)){
return;
}
}
PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
delete pluralRules;
}
UnicodeString PluralFormat::PluralSelectorAdapter::select(double number,
UErrorCode& /*ec*/) const {
return pluralRules->select(number);
}
void PluralFormat::PluralSelectorAdapter::reset() {
delete pluralRules;
pluralRules = NULL;
}

View file

@ -13,7 +13,6 @@
*/
#include "unicode/uniset.h"
#include "unicode/utypes.h"
#include "unicode/ures.h"
#include "unicode/plurrule.h"
@ -21,6 +20,7 @@
#include "cstring.h"
#include "hash.h"
#include "mutex.h"
#include "patternprops.h"
#include "plurrule_impl.h"
#include "putilimp.h"
#include "ucln_in.h"
@ -1159,16 +1159,9 @@ RuleChain::isKeyword(const UnicodeString& keywordParam) const {
RuleParser::RuleParser() {
UErrorCode err=U_ZERO_ERROR;
const UnicodeString idStart=UNICODE_STRING_SIMPLE("[[a-z]]");
const UnicodeString idContinue=UNICODE_STRING_SIMPLE("[[a-z][A-Z][_][0-9]]");
idStartFilter = new UnicodeSet(idStart, err);
idContinueFilter = new UnicodeSet(idContinue, err);
}
RuleParser::~RuleParser() {
delete idStartFilter;
delete idContinueFilter;
}
void
@ -1413,21 +1406,7 @@ RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCod
UBool
RuleParser::isValidKeyword(const UnicodeString& token) {
if ( token.length()==0 ) {
return FALSE;
}
if ( idStartFilter->contains(token.charAt(0) )==TRUE ) {
int32_t i;
for (i=1; i< token.length(); i++) {
if (idContinueFilter->contains(token.charAt(i))== FALSE) {
return FALSE;
}
}
return TRUE;
}
else {
return FALSE;
}
return PatternProps::isIdentifier(token.getBuffer(), token.length());
}
PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status) :

View file

@ -13,10 +13,7 @@
#ifndef PLURRULE_IMPLE
#define PLURRULE_IMPLE
/**
* \file
* \brief C++ API: Defines rules for mapping positive long values onto a small set of keywords.
*/
// Internal definitions for the PluralRules implementation.
#if !UCONFIG_NO_FORMATTING
@ -89,8 +86,6 @@ U_NAMESPACE_BEGIN
#define PLURAL_RANGE_HIGH 0x7fffffff;
class UnicodeSet;
typedef enum PluralKey {
pZero,
pOne,
@ -138,9 +133,6 @@ public:
tokenType& type, UErrorCode &status);
void checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status);
private:
UnicodeSet *idStartFilter;
UnicodeSet *idContinueFilter;
void getKeyType(const UnicodeString& token, tokenType& type, UErrorCode &status);
UBool inRange(UChar ch, tokenType& type);
UBool isValidKeyword(const UnicodeString& token);

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2010, International Business Machines Corporation and
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
* Copyright (C) 2010 , Yahoo! Inc.
********************************************************************
@ -16,76 +16,41 @@
#include <typeinfo> // for 'typeid' to work
#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "unicode/ucnv_err.h"
#include "unicode/uchar.h"
#include "unicode/umsg.h"
#include "unicode/messagepattern.h"
#include "unicode/rbnf.h"
#include "unicode/selfmt.h"
#include "unicode/uchar.h"
#include "unicode/ucnv_err.h"
#include "unicode/umsg.h"
#include "unicode/ustring.h"
#include "unicode/utypes.h"
#include "cmemory.h"
#include "util.h"
#include "messageimpl.h"
#include "patternprops.h"
#include "selfmtimpl.h"
#include "uassert.h"
#include "ustrfmt.h"
#include "util.h"
#include "uvector.h"
#include "unicode/selfmt.h"
#include "selfmtimpl.h"
#if !UCONFIG_NO_FORMATTING
U_NAMESPACE_BEGIN
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SelectFormat)
#define MAX_KEYWORD_SIZE 30
static const UChar SELECT_KEYWORD_OTHER[] = {LOW_O, LOW_T, LOW_H, LOW_E, LOW_R, 0};
SelectFormat::SelectFormat(const UnicodeString& pat, UErrorCode& status) : parsedValuesHash(NULL) {
if (U_FAILURE(status)) {
return;
}
initHashTable(status);
SelectFormat::SelectFormat(const UnicodeString& pat,
UErrorCode& status) : msgPattern(status) {
applyPattern(pat, status);
}
SelectFormat::SelectFormat(const SelectFormat& other) : Format(other), parsedValuesHash(NULL) {
UErrorCode status = U_ZERO_ERROR;
pattern = other.pattern;
copyHashtable(other.parsedValuesHash, status);
SelectFormat::SelectFormat(const SelectFormat& other) : Format(other),
msgPattern(other.msgPattern) {
}
SelectFormat::~SelectFormat() {
cleanHashTable();
}
void SelectFormat::initHashTable(UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
// has inited
if (parsedValuesHash != NULL) {
return;
}
parsedValuesHash = new Hashtable(TRUE, status);
if (U_FAILURE(status)) {
cleanHashTable();
return;
} else {
if (parsedValuesHash == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
}
// to use hashtable->equals(), must set Value Compartor.
parsedValuesHash->setValueComparator(uhash_compareCaselessUnicodeString);
}
void SelectFormat::cleanHashTable() {
if (parsedValuesHash != NULL) {
delete parsedValuesHash;
parsedValuesHash = NULL;
}
}
void
@ -94,164 +59,10 @@ SelectFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status)
return;
}
pattern = newPattern;
enum State{ startState, keywordState, pastKeywordState, phraseState};
//Initialization
UnicodeString keyword ;
UnicodeString phrase ;
UnicodeString* ptrPhrase ;
int32_t braceCount = 0;
if (parsedValuesHash == NULL) {
initHashTable(status);
if (U_FAILURE(status)) {
return;
}
msgPattern.parseSelectStyle(newPattern, NULL, status);
if (U_FAILURE(status)) {
msgPattern.clear();
}
parsedValuesHash->removeAll();
parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString);
//Process the state machine
State state = startState;
for (int32_t i = 0; i < pattern.length(); ++i) {
//Get the character and check its type
UChar ch = pattern.charAt(i);
CharacterClass type = classifyCharacter(ch);
//Allow any character in phrase but nowhere else
if ( type == tOther ) {
if ( state == phraseState ){
phrase += ch;
continue;
}else {
status = U_PATTERN_SYNTAX_ERROR;
cleanHashTable();
return;
}
}
//Process the state machine
switch (state) {
//At the start of pattern
case startState:
switch (type) {
case tSpace:
break;
case tStartKeyword:
state = keywordState;
keyword += ch;
break;
//If anything else is encountered, it's a syntax error
default:
status = U_PATTERN_SYNTAX_ERROR;
cleanHashTable();
return;
}//end of switch(type)
break;
//Handle the keyword state
case keywordState:
switch (type) {
case tSpace:
state = pastKeywordState;
break;
case tStartKeyword:
case tContinueKeyword:
keyword += ch;
break;
case tLeftBrace:
state = phraseState;
break;
//If anything else is encountered, it's a syntax error
default:
status = U_PATTERN_SYNTAX_ERROR;
cleanHashTable();
return;
}//end of switch(type)
break;
//Handle the pastkeyword state
case pastKeywordState:
switch (type) {
case tSpace:
break;
case tLeftBrace:
state = phraseState;
break;
//If anything else is encountered, it's a syntax error
default:
status = U_PATTERN_SYNTAX_ERROR;
cleanHashTable();
return;
}//end of switch(type)
break;
//Handle the phrase state
case phraseState:
switch (type) {
case tLeftBrace:
braceCount++;
phrase += ch;
break;
case tRightBrace:
//Matching keyword, phrase pair found
if (braceCount == 0){
//Check validity of keyword
if (parsedValuesHash->get(keyword) != NULL) {
status = U_DUPLICATE_KEYWORD;
cleanHashTable();
return;
}
if (keyword.length() == 0) {
status = U_PATTERN_SYNTAX_ERROR;
cleanHashTable();
return;
}
//Store the keyword, phrase pair in hashTable
ptrPhrase = new UnicodeString(phrase);
parsedValuesHash->put( keyword, ptrPhrase, status);
//Reinitialize
keyword.remove();
phrase.remove();
ptrPhrase = NULL;
state = startState;
}
if (braceCount > 0){
braceCount-- ;
phrase += ch;
}
break;
default:
phrase += ch;
}//end of switch(type)
break;
//Handle the default case of switch(state)
default:
status = U_PATTERN_SYNTAX_ERROR;
cleanHashTable();
return;
}//end of switch(state)
}
//Check if the state machine is back to startState
if ( state != startState){
status = U_PATTERN_SYNTAX_ERROR;
cleanHashTable();
return;
}
//Check if "other" keyword is present
if ( !checkSufficientDefinition() ) {
status = U_DEFAULT_KEYWORD_MISSING;
cleanHashTable();
}
return;
}
UnicodeString&
@ -260,14 +71,13 @@ SelectFormat::format(const Formattable& obj,
FieldPosition& pos,
UErrorCode& status) const
{
switch (obj.getType())
{
case Formattable::kString:
return format(obj.getString(), appendTo, pos, status);
default:
if( U_SUCCESS(status) ){
status = U_ILLEGAL_ARGUMENT_ERROR;
}
if (U_FAILURE(status)) {
return appendTo;
}
if (obj.getType() == Formattable::kString) {
return format(obj.getString(status), appendTo, pos, status);
} else {
status = U_ILLEGAL_ARGUMENT_ERROR;
return appendTo;
}
}
@ -277,85 +87,66 @@ SelectFormat::format(const UnicodeString& keyword,
UnicodeString& appendTo,
FieldPosition& /*pos */,
UErrorCode& status) const {
if (U_FAILURE(status)) return appendTo;
if (parsedValuesHash == NULL) {
status = U_INVALID_FORMAT_ERROR;
if (U_FAILURE(status)) {
return appendTo;
}
//Check for the validity of the keyword
if ( !checkValidKeyword(keyword) ){
status = U_ILLEGAL_ARGUMENT_ERROR;
// Check for the validity of the keyword
if (!PatternProps::isIdentifier(keyword.getBuffer(), keyword.length())) {
status = U_ILLEGAL_ARGUMENT_ERROR; // Invalid formatting argument.
}
if (msgPattern.countParts() == 0) {
status = U_INVALID_STATE_ERROR;
return appendTo;
}
UnicodeString *selectedPattern = (UnicodeString *)parsedValuesHash->get(keyword);
if (selectedPattern == NULL) {
selectedPattern = (UnicodeString *)parsedValuesHash->get(SELECT_KEYWORD_OTHER);
int32_t msgStart = findSubMessage(msgPattern, 0, keyword, status);
if (!MessageImpl::jdkAposMode(msgPattern)) {
int32_t patternStart = msgPattern.getPart(msgStart).getLimit();
int32_t msgLimit = msgPattern.getLimitPartIndex(msgStart);
appendTo.append(msgPattern.getPatternString(),
patternStart,
msgPattern.getPatternIndex(msgLimit) - patternStart);
return appendTo;
}
return appendTo += *selectedPattern;
// JDK compatibility mode: Remove SKIP_SYNTAX.
return MessageImpl::appendSubMessageWithoutSkipSyntax(msgPattern, msgStart, appendTo);
}
UnicodeString&
SelectFormat::toPattern(UnicodeString& appendTo) {
return appendTo += pattern;
if (0 == msgPattern.countParts()) {
appendTo.setToBogus();
} else {
appendTo.append(msgPattern.getPatternString());
}
return appendTo;
}
SelectFormat::CharacterClass
SelectFormat::classifyCharacter(UChar ch) const{
if ((ch >= CAP_A) && (ch <= CAP_Z)) {
return tStartKeyword;
}
if ((ch >= LOW_A) && (ch <= LOW_Z)) {
return tStartKeyword;
}
if ((ch >= U_ZERO) && (ch <= U_NINE)) {
return tContinueKeyword;
}
if ( uprv_isRuleWhiteSpace(ch) ){
return tSpace;
}
switch (ch) {
case LEFTBRACE:
return tLeftBrace;
case RIGHTBRACE:
return tRightBrace;
case HYPHEN:
case LOWLINE:
return tContinueKeyword;
default :
return tOther;
}
}
UBool
SelectFormat::checkSufficientDefinition() {
// Check that at least the default rule is defined.
return (parsedValuesHash != NULL &&
parsedValuesHash->get(SELECT_KEYWORD_OTHER) != NULL) ;
}
UBool
SelectFormat::checkValidKeyword(const UnicodeString& argKeyword ) const{
int32_t len = argKeyword.length();
if (len < 1){
return FALSE;
int32_t SelectFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
const UnicodeString& keyword, UErrorCode& ec) {
if (U_FAILURE(ec)) {
return 0;
}
CharacterClass type = classifyCharacter(argKeyword.charAt(0));
if( type != tStartKeyword ){
return FALSE;
}
for (int32_t i = 0; i < argKeyword.length(); ++i) {
type = classifyCharacter(argKeyword.charAt(i));
if( type != tStartKeyword && type != tContinueKeyword ){
return FALSE;
UnicodeString other(FALSE, SELECT_KEYWORD_OTHER, 5);
int32_t count = pattern.countParts();
int32_t msgStart=0;
// Iterate over (ARG_SELECTOR, message) pairs until ARG_LIMIT or end of select-only pattern.
do {
const MessagePattern::Part& part=pattern.getPart(partIndex++);
const UMessagePatternPartType type=part.getType();
if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
break;
}
}
return TRUE;
// part is an ARG_SELECTOR followed by a message
if(pattern.partSubstringMatches(part, keyword)) {
// keyword matches
return partIndex;
} else if(msgStart==0 && pattern.partSubstringMatches(part, other)) {
msgStart=partIndex;
}
partIndex=pattern.getLimitPartIndex(partIndex);
} while(++partIndex<count);
return msgStart;
}
Format* SelectFormat::clone() const
@ -366,28 +157,21 @@ Format* SelectFormat::clone() const
SelectFormat&
SelectFormat::operator=(const SelectFormat& other) {
if (this != &other) {
UErrorCode status = U_ZERO_ERROR;
pattern = other.pattern;
copyHashtable(other.parsedValuesHash, status);
msgPattern = other.msgPattern;
}
return *this;
}
UBool
SelectFormat::operator==(const Format& other) const {
if( this == &other){
if (this == &other) {
return TRUE;
}
if (typeid(*this) != typeid(other)) {
return FALSE;
}
SelectFormat* fmt = (SelectFormat*)&other;
Hashtable* hashOther = fmt->parsedValuesHash;
if ( parsedValuesHash == NULL && hashOther == NULL)
return TRUE;
if ( parsedValuesHash == NULL || hashOther == NULL)
if (!Format::operator==(other)) {
return FALSE;
return parsedValuesHash->equals(*hashOther);
}
const SelectFormat& o = (const SelectFormat&)other;
return msgPattern == o.msgPattern;
}
UBool
@ -400,46 +184,10 @@ SelectFormat::parseObject(const UnicodeString& /*source*/,
Formattable& /*result*/,
ParsePosition& pos) const
{
// TODO: not yet supported in icu4j and icu4c
// Parsing not supported.
pos.setErrorIndex(pos.getIndex());
}
void
SelectFormat::copyHashtable(Hashtable *other, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
if (other == NULL) {
cleanHashTable();
return;
}
if (parsedValuesHash == NULL) {
initHashTable(status);
if (U_FAILURE(status)) {
return;
}
}
parsedValuesHash->removeAll();
parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString);
int32_t pos = -1;
const UHashElement* elem = NULL;
// walk through the hash table and create a deep clone
while ((elem = other->nextElement(pos)) != NULL){
const UHashTok otherKeyTok = elem->key;
UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer;
const UHashTok otherKeyToVal = elem->value;
UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer;
parsedValuesHash->put(*otherKey, new UnicodeString(*otherValue), status);
if (U_FAILURE(status)){
cleanHashTable();
return;
}
}
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -1,10 +1,10 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2010, International Business Machines Corporation and
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
* Copyright (C) 2010 , Yahoo! Inc.
********************************************************************
* File SELECTFMT_IMPL.H
* File selectfmtimpl.h
*
* Date Name Description
* 11/11/09 kirtig Finished first cut of implementation.
@ -14,11 +14,6 @@
#ifndef SELFMTIMPL
#define SELFMTIMPL
/**
* \file
* \brief C++ API: Defines rules for mapping positive long values onto a small set of keywords.
*/
#if !UCONFIG_NO_FORMATTING
#include "unicode/format.h"

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2006, International Business Machines
* Copyright (C) 1999-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -32,6 +32,27 @@
#include "uassert.h"
#include "ustr_imp.h"
U_NAMESPACE_BEGIN
/**
* This class isolates our access to private internal methods of
* MessageFormat. It is never instantiated; it exists only for C++
* access management.
*/
class MessageFormatAdapter {
public:
static const Formattable::Type* getArgTypeList(const MessageFormat& m,
int32_t& count);
static UBool hasArgTypeConflicts(const MessageFormat& m) {
return m.hasArgTypeConflicts;
}
};
const Formattable::Type*
MessageFormatAdapter::getArgTypeList(const MessageFormat& m,
int32_t& count) {
return m.getArgTypeList(count);
}
U_NAMESPACE_END
U_NAMESPACE_USE
U_CAPI int32_t
@ -217,25 +238,23 @@ umsg_open( const UChar *pattern,
}
UParseError tErr;
if(parseError==NULL)
{
parseError = &tErr;
}
UMessageFormat* retVal = 0;
int32_t len = (patternLength == -1 ? u_strlen(pattern) : patternLength);
UnicodeString patString((patternLength == -1 ? TRUE:FALSE), pattern,len);
UnicodeString patString(patternLength == -1, pattern, len);
retVal = (UMessageFormat*) new MessageFormat(patString,Locale(locale),*parseError,*status);
if(retVal == 0) {
MessageFormat* retVal = new MessageFormat(patString,Locale(locale),*parseError,*status);
if(retVal == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return 0;
return NULL;
}
return retVal;
if (U_SUCCESS(*status) && MessageFormatAdapter::hasArgTypeConflicts(*retVal)) {
*status = U_ARGUMENT_TYPE_MISMATCH;
}
return (UMessageFormat*)retVal;
}
U_CAPI void U_EXPORT2
@ -366,24 +385,6 @@ umsg_format( const UMessageFormat *fmt,
return actLen;
}
U_NAMESPACE_BEGIN
/**
* This class isolates our access to private internal methods of
* MessageFormat. It is never instantiated; it exists only for C++
* access management.
*/
class MessageFormatAdapter {
public:
static const Formattable::Type* getArgTypeList(const MessageFormat& m,
int32_t& count);
};
const Formattable::Type*
MessageFormatAdapter::getArgTypeList(const MessageFormat& m,
int32_t& count) {
return m.getArgTypeList(count);
}
U_NAMESPACE_END
U_CAPI int32_t U_EXPORT2
umsg_vformat( const UMessageFormat *fmt,
UChar *result,
@ -456,11 +457,13 @@ umsg_vformat( const UMessageFormat *fmt,
break;
case Formattable::kObject:
default:
// This will never happen because MessageFormat doesn't
// support kObject. When MessageFormat is changed to
// understand MeasureFormats, modify this code to do the
// right thing. [alan]
U_ASSERT(FALSE);
*status=U_ILLEGAL_ARGUMENT_ERROR;
break;
}
}

View file

@ -1,6 +1,6 @@
/*
********************************************************************************
* Copyright (C) 1997-2010, International Business Machines
* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************************
*
@ -31,147 +31,91 @@
#if !UCONFIG_NO_FORMATTING
#include "unicode/unistr.h"
#include "unicode/numfmt.h"
#include "unicode/fieldpos.h"
#include "unicode/format.h"
#include "unicode/messagepattern.h"
#include "unicode/numfmt.h"
#include "unicode/unistr.h"
U_NAMESPACE_BEGIN
class MessageFormat;
/**
* ChoiceFormat converts between ranges of numeric values
* and string names for those ranges. A <code>ChoiceFormat</code> splits
* the real number line <code>-Inf</code> to <code>+Inf</code> into two
* ChoiceFormat converts between ranges of numeric values and strings for those ranges.
* The strings must conform to the MessageFormat pattern syntax.
*
* <p><em><code>ChoiceFormat</code> is probably not what you need.
* Please use <code>MessageFormat</code>
* with <code>plural</code> arguments for proper plural selection,
* and <code>select</code> arguments for simple selection among a fixed set of choices!</em></p>
*
* <p>A <code>ChoiceFormat</code> splits
* the real number line \htmlonly<code>-&#x221E;</code> to
* <code>+&#x221E;</code>\endhtmlonly into two
* or more contiguous ranges. Each range is mapped to a
* string. <code>ChoiceFormat</code> is generally used in a
* <code>MessageFormat</code> for displaying grammatically correct
* plurals such as &quot;There are 2 files.&quot;</p>
* string.</p>
*
* <p><code>ChoiceFormat</code> was originally intended
* for displaying grammatically correct
* plurals such as &quot;There is one file.&quot; vs. &quot;There are 2 files.&quot;
* <em>However,</em> plural rules for many languages
* are too complex for the capabilities of ChoiceFormat,
* and its requirement of specifying the precise rules for each message
* is unmanageable for translators.</p>
*
* <p>There are two methods of defining a <code>ChoiceFormat</code>; both
* are equivalent. The first is by using a string pattern. This is the
* preferred method in most cases. The second method is through direct
* specification of the arrays that make up the
* specification of the arrays that logically make up the
* <code>ChoiceFormat</code>.</p>
*
* <p><strong>Patterns</strong></p>
* <p>Note: Typically, choice formatting is done (if done at all) via <code>MessageFormat</code>
* with a <code>choice</code> argument type,
* rather than using a stand-alone <code>ChoiceFormat</code>.</p>
*
* <p>In most cases, the preferred way to define a
* <code>ChoiceFormat</code> is with a pattern. Here is an example of a
* <code>ChoiceFormat</code> pattern:</p>
* <h5>Patterns and Their Interpretation</h5>
*
* \htmlonly<pre> 0&#x2264;are no files|1&#x2264;is one file|1&lt;are many files</pre>\endhtmlonly
* <p>The pattern string defines the range boundaries and the strings for each number range.
* Syntax:
* <pre>
* choiceStyle = number separator message ('|' number separator message)*
* number = normal_number | ['-'] \htmlonly&#x221E;\endhtmlonly (U+221E, infinity)
* normal_number = double value (unlocalized ASCII string)
* separator = less_than | less_than_or_equal
* less_than = '<'
* less_than_or_equal = '#' | \htmlonly&#x2264;\endhtmlonly (U+2264)
* message: see {@link MessageFormat}
* </pre>
* Pattern_White_Space between syntax elements is ignored, except
* around each range's sub-message.</p>
*
* <p>or equivalently,</p>
* <p>Each numeric sub-range extends from the current range's number
* to the next range's number.
* The number itself is included in its range if a <code>less_than_or_equal</code> sign is used,
* and excluded from its range (and instead included in the previous range)
* if a <code>less_than</code> sign is used.</p>
*
* \htmlonly<pre> 0#are no files|1#is one file|1&lt;are many files</pre>\endhtmlonly
* <p>When a <code>ChoiceFormat</code> is constructed from
* arrays of numbers, closure flags and strings,
* they are interpreted just like
* the sequence of <code>(number separator string)</code> in an equivalent pattern string.
* <code>closure[i]==TRUE</code> corresponds to a <code>less_than</code> separator sign.
* The equivalent pattern string will be constructed automatically.</p>
*
* <p>The pattern consists of a number or <em>range specifiers</em>
* separated by vertical bars '|' (U+007C). There is no
* vertical bar after the last range. Each range specifier is of the
* form:</p>
* <p>During formatting, a number is mapped to the first range
* where the number is not greater than the range's upper limit.
* That range's message string is returned. A NaN maps to the very first range.</p>
*
* \htmlonly<blockquote><em>Number Separator String</em></blockquote>\endhtmlonly
* <p>During parsing, a range is selected for the longest match of
* any range's message. That range's number is returned, ignoring the separator/closure.
* Only a simple string match is performed, without parsing of arguments that
* might be specified in the message strings.</p>
*
* <p><em>Number</em> is a floating point number that can be parsed by a
* default <code>NumberFormat</code> for the US locale. It gives the
* lower limit of this range. The lower limit is either inclusive or
* exclusive, depending on the <em>separator</em>. The upper limit is
* given by the lower limit of the next range. The Unicode infinity
* sign \htmlonly&#x221E \endhtmlonly (U+221E) is recognized for positive infinity. It may be preceded by
* '-' (U+002D) to indicate negative infinity.</p>
* <p>Note that the first range's number is ignored in formatting
* but may be returned from parsing.</p>
*
* <p><em>String</em> is the format string for this range, with special
* characters enclosed in single quotes (<code>'The #
* sign'</code>). Single quotes themselves are indicated by two single
* quotes in a row (<code>'o''clock'</code>).</p>
*
* <p><em>Separator</em> is one of the following single characters:
*
* <ul>
* <li>\htmlonly'&#x2264;' \endhtmlonly (U+2264) or '#' (U+0023)
* indicates that the lower limit given by <em>Number</em> is
* inclusive. (The two characters are equivalent to ChoiceFormat.)
* This means that the limit value <em>Number</em> belongs to this
* range. Another way of saying this is that the corresponding
* closure is <code>FALSE</code>.</li>
*
* <li>'<' (U+003C) indicates that the lower limit given by
* <em>Number</em> is exclusive. This means that the value
* <em>Number</em> belongs to the prior range.</li> Another way of
* saying this is that the corresponding closure is
* <code>TRUE</code>.
* </ul>
*
* <p>See below for more information about closures.</p>
*
* <p><strong>Arrays</strong></p>
*
* <p>A <code>ChoiceFormat</code> defining <code>n</code> intervals
* (<code>n</code> &gt;= 2) is specified by three arrays of
* <code>n</code> items:
*
* <ul>
* <li><code>double limits[]</code> gives the start of each
* interval. This must be a non-decreasing list of values, none of
* which may be <code>NaN</code>.</li>
* <li><code>UBool closures[]</code> determines whether each limit
* value is contained in the interval below it or in the interval
* above it. If <code>closures[i]</code> is <code>FALSE</code>, then
* <code>limits[i]</code> is a member of interval
* <code>i</code>. Otherwise it is a member of interval
* <code>i+1</code>. If no closures array is specified, this is
* equivalent to having all closures be <code>FALSE</code>. Closures
* allow one to specify half-open, open, or closed intervals.</li>
* <li><code>UnicodeString formats[]</code> gives the string label
* associated with each interval.</li>
* </ul>
*
* <p><strong>Formatting and Parsing</strong></p>
*
* <p>During formatting, a number is converted to a
* string. <code>ChoiceFormat</code> accomplishes this by mapping the
* number to an interval using the following rule. Given a number
* <code>X</code> and and index value <code>j</code> in the range
* <code>0..n-1</code>, where <code>n</code> is the number of ranges:</p>
*
* \htmlonly<blockquote>\endhtmlonly<code>X</code> matches <code>j</code> if and only if
* <code>limit[j] &lt;= X &lt; limit[j+1]</code>
* \htmlonly</blockquote>\endhtmlonly
*
* <p>(This assumes that all closures are <code>FALSE</code>. If some
* closures are <code>TRUE</code> then the relations must be changed to
* <code>&lt;=</code> or <code>&lt;</code> as appropriate.) If there is
* no match, then either the first or last index is used, depending on
* whether the number is too low or too high. Once a number is mapped to
* an interval <code>j</code>, the string <code>formats[j]</code> is
* output.</p>
*
* <p>During parsing, a string is converted to a
* number. <code>ChoiceFormat</code> finds the element
* <code>formats[j]</code> equal to the string, and returns
* <code>limits[j]</code> as the parsed value.</p>
*
* <p><strong>Notes</strong></p>
*
* <p>The first limit value does not define a range boundary. For
* example, in the pattern \htmlonly&quot;<code>1.0#a|2.0#b</code>&quot;\endhtmlonly, the
* intervals are [-Inf, 2.0) and [2.0, +Inf]. It appears that the first
* interval should be [1.0, 2.0). However, since all values that are too
* small are mapped to range zero, the first interval is effectively
* [-Inf, 2.0). However, the first limit value <em>is</em> used during
* formatting. In this example, <code>parse(&quot;a&quot;)</code> returns
* 1.0.</p>
*
* <p>There are no gaps between intervals and the entire number line is
* covered. A <code>ChoiceFormat</code> maps <em>all</em> possible
* double values to a finite set of intervals.</p>
*
* <p>The non-number <code>NaN</code> is mapped to interval zero during
* formatting.</p>
*
* <p><strong>Examples</strong></p>
* <h5>Examples</h5>
*
* <p>Here is an example of two arrays that map the number
* <code>1..7</code> to the English day of the week abbreviations
@ -183,13 +127,15 @@ class MessageFormat;
*
* <p>Here is an example that maps the ranges [-Inf, 1), [1, 1], and (1,
* +Inf] to three strings. That is, the number line is split into three
* ranges: x &lt; 1.0, x = 1.0, and x &gt; 1.0.</p>
* ranges: x &lt; 1.0, x = 1.0, and x &gt; 1.0.
* (The round parentheses in the notation above indicate an exclusive boundary,
* like the turned bracket in European notation: [-Inf, 1) == [-Inf, 1[ )</p>
*
* <pre> {0, 1, 1},
* {FALSE, FALSE, TRUE},
* {&quot;no files&quot;, &quot;one file&quot;, &quot;many files&quot;}</pre>
*
* <p>Here is a simple example that shows formatting and parsing: </p>
* <p>Here is an example that shows formatting and parsing: </p>
*
* \code
* #include <unicode/choicfmt.h>
@ -215,43 +161,6 @@ class MessageFormat;
* }
* \endcode
*
* <p>Here is a more complex example using a <code>ChoiceFormat</code>
* constructed from a pattern together with a
* <code>MessageFormat</code>.</p>
*
* \code
* #include <unicode/choicfmt.h>
* #include <unicode/msgfmt.h>
* #include <unicode/unistr.h>
* #include <iostream.h>
*
* int main(int argc, char *argv[]) {
* UErrorCode status = U_ZERO_ERROR;
* double filelimits[] = {0,1,2};
* UnicodeString filepart[] =
* {"are no files","is one file","are {0} files"};
* ChoiceFormat* fileform = new ChoiceFormat(filelimits, filepart, 3 );
* Format* testFormats[] =
* {fileform, NULL, NumberFormat::createInstance(status)};
* MessageFormat pattform("There {0} on {1}", status );
* pattform.adoptFormats(testFormats, 3);
* Formattable testArgs[] = {0L, "Disk A"};
* FieldPosition fp(0);
* UnicodeString str;
* char buf[256];
* for (int32_t i = 0; i < 4; ++i) {
* Formattable fInt(i);
* testArgs[0] = fInt;
* pattform.format(testArgs, 2, str, fp, status );
* str.extract(0, str.length(), buf, "");
* str.truncate(0);
* cout << "Output for i=" << i << " : " << buf << endl;
* }
* cout << endl;
* return 0;
* }
* \endcode
*
* <p><em>User subclasses are not supported.</em> While clients may write
* subclasses, such code will not necessarily work and will not be
* guaranteed to work stably from release to release.
@ -259,8 +168,7 @@ class MessageFormat;
class U_I18N_API ChoiceFormat: public NumberFormat {
public:
/**
* Construct a new ChoiceFormat with the limits and the corresponding formats
* based on the pattern.
* Constructs a new ChoiceFormat from the pattern string.
*
* @param pattern Pattern used to construct object.
* @param status Output param to receive success code. If the
@ -272,32 +180,31 @@ public:
/**
* Construct a new ChoiceFormat with the given limits and formats. Copy
* the limits and formats instead of adopting them.
* Constructs a new ChoiceFormat with the given limits and message strings.
* All closure flags default to <code>FALSE</code>,
* equivalent to <code>less_than_or_equal</code> separators.
*
* Copies the limits and formats instead of adopting them.
*
* @param limits Array of limit values.
* @param formats Array of formats.
* @param count Size of 'limits' and 'formats' arrays.
* @stable ICU 2.0
*/
ChoiceFormat(const double* limits,
const UnicodeString* formats,
int32_t count );
/**
* Construct a new ChoiceFormat with the given limits and formats.
* Copy the limits and formats (instead of adopting them). By
* default, each limit in the array specifies the inclusive lower
* bound of its range, and the exclusive upper bound of the previous
* range. However, if the isLimitOpen element corresponding to a
* limit is TRUE, then the limit is the exclusive lower bound of its
* range, and the inclusive upper bound of the previous range.
* Constructs a new ChoiceFormat with the given limits, closure flags and message strings.
*
* Copies the limits and formats instead of adopting them.
*
* @param limits Array of limit values
* @param closures Array of booleans specifying whether each
* element of 'limits' is open or closed. If FALSE, then the
* corresponding limit is a member of the range above it. If TRUE,
* then the limit belongs to the range below it.
* corresponding limit number is a member of its range.
* If TRUE, then the limit number belongs to the previous range it.
* @param formats Array of formats
* @param count Size of 'limits', 'closures', and 'formats' arrays
* @stable ICU 2.4
@ -330,8 +237,8 @@ public:
virtual ~ChoiceFormat();
/**
* Clone this Format object polymorphically. The caller owns the
* result and should delete it when done.
* Clones this Format object. The caller owns the
* result and must delete it when done.
*
* @return a copy of this object
* @stable ICU 2.0
@ -339,7 +246,7 @@ public:
virtual Format* clone(void) const;
/**
* Return true if the given Format objects are semantically equal.
* Returns true if the given Format objects are semantically equal.
* Objects of different subclasses are considered unequal.
*
* @param other ChoiceFormat object to be compared
@ -362,7 +269,7 @@ public:
/**
* Sets the pattern.
* @param pattern The pattern to be applied.
* @param parseError Struct to recieve information on position
* @param parseError Struct to receive information on position
* of error if an error is encountered
* @param status Output param set to success/failure code on
* exit. If the pattern is invalid, this will be
@ -375,7 +282,7 @@ public:
/**
* Gets the pattern.
*
* @param pattern Output param which will recieve the pattern
* @param pattern Output param which will receive the pattern
* Previous contents are deleted.
* @return A reference to 'pattern'
* @stable ICU 2.0
@ -383,7 +290,8 @@ public:
virtual UnicodeString& toPattern(UnicodeString &pattern) const;
/**
* Set the choices to be used in formatting.
* Sets the choices to be used in formatting.
* For details see the constructor with the same parameter list.
*
* @param limitsToCopy Contains the top value that you want
* parsed with that format,and should be in
@ -399,8 +307,9 @@ public:
int32_t count );
/**
* Set the choices to be used in formatting. See class description
* for documenatation of the limits, closures, and formats arrays.
* Sets the choices to be used in formatting.
* For details see the constructor with the same parameter list.
*
* @param limits Array of limits
* @param closures Array of limit booleans
* @param formats Array of format string
@ -413,30 +322,32 @@ public:
int32_t count);
/**
* Get the limits passed in the constructor.
* Returns NULL and 0.
* Before ICU 4.8, this used to return the choice limits array.
*
* @param count The size of the limits arrays
* @return the limits.
* @stable ICU 2.0
* @param count Will be set to 0.
* @return NULL
* @deprecated ICU 4.8 Use the MessagePattern class to analyze a ChoiceFormat pattern.
*/
virtual const double* getLimits(int32_t& count) const;
/**
* Get the limit booleans passed in the constructor. The caller
* must not delete the result.
* Returns NULL and 0.
* Before ICU 4.8, this used to return the limit booleans array.
*
* @param count The size of the arrays
* @return the closures
* @stable ICU 2.4
* @param count Will be set to 0.
* @return NULL
* @deprecated ICU 4.8 Use the MessagePattern class to analyze a ChoiceFormat pattern.
*/
virtual const UBool* getClosures(int32_t& count) const;
/**
* Get the formats passed in the constructor.
* Returns NULL and 0.
* Before ICU 4.8, this used to return the array of choice strings.
*
* @param count The size of the arrays
* @return the formats.
* @stable ICU 2.0
* @param count Will be set to 0.
* @return NULL
* @deprecated ICU 4.8 Use the MessagePattern class to analyze a ChoiceFormat pattern.
*/
virtual const UnicodeString* getFormats(int32_t& count) const;
@ -444,7 +355,7 @@ public:
using NumberFormat::format;
/**
* Format a double or long number using this object's choices.
* Formats a double number using this object's choices.
*
* @param number The value to be formatted.
* @param appendTo Output parameter to receive result.
@ -458,7 +369,7 @@ public:
UnicodeString& appendTo,
FieldPosition& pos) const;
/**
* Format a int_32t number using this object's choices.
* Formats an int32_t number using this object's choices.
*
* @param number The value to be formatted.
* @param appendTo Output parameter to receive result.
@ -473,7 +384,7 @@ public:
FieldPosition& pos) const;
/**
* Format an int64_t number using this object's choices.
* Formats an int64_t number using this object's choices.
*
* @param number The value to be formatted.
* @param appendTo Output parameter to receive result.
@ -488,7 +399,7 @@ public:
FieldPosition& pos) const;
/**
* Format an array of objects using this object's choices.
* Formats an array of objects using this object's choices.
*
* @param objs The array of objects to be formatted.
* @param cnt The size of objs.
@ -507,7 +418,7 @@ public:
FieldPosition& pos,
UErrorCode& success) const;
/**
* Format an object using this object's choices.
* Formats an object using this object's choices.
*
*
* @param obj The object to be formatted.
@ -542,7 +453,7 @@ public:
/**
* Redeclared NumberFormat method.
* Format a double number. These methods call the NumberFormat
* Formats a double number. These methods call the NumberFormat
* pure virtual format() methods with the default FieldPosition.
*
* @param number The value to be formatted.
@ -556,7 +467,7 @@ public:
/**
* Redeclared NumberFormat method.
* Format a long number. These methods call the NumberFormat
* Formats an int32_t number. These methods call the NumberFormat
* pure virtual format() methods with the default FieldPosition.
*
* @param number The value to be formatted.
@ -569,13 +480,10 @@ public:
UnicodeString& appendTo) const;
/**
* Return a long if possible (e.g. within range LONG_MAX,
* LONG_MAX], and with no decimals), otherwise a double. If
* IntegerOnly is set, will stop at a decimal point (or equivalent;
* e.g. for rational numbers "1 2/3", will stop after the 1).
* <P>
* If no object can be parsed, parsePosition is unchanged, and NULL is
* returned.
* Looks for the longest match of any message string on the input text and,
* if there is a match, sets the result object to the corresponding range's number.
*
* If no string matches, then the parsePosition is unchanged.
*
* @param text The text to be parsed.
* @param result Formattable to be set to the parse result.
@ -583,7 +491,6 @@ public:
* @param parsePosition The position to start parsing at on input.
* On output, moved to after the last successfully
* parse character. On parse failure, does not change.
* @see NumberFormat::isParseIntegerOnly
* @stable ICU 2.0
*/
virtual void parse(const UnicodeString& text,
@ -591,32 +498,23 @@ public:
ParsePosition& parsePosition) const;
/**
* Return a long if possible (e.g. within range LONG_MAX,
* LONG_MAX], and with no decimals), otherwise a double. If
* IntegerOnly is set, will stop at a decimal point (or equivalent;
* e.g. for rational numbers "1 2/3", will stop after the 1).
* <P>
* If no object can be parsed, parsePosition is unchanged, and NULL is
* returned.
*
* @param text The text to be parsed.
* @param result Formattable to be set to the parse result.
* If parse fails, return contents are undefined.
* @param status Output param with the formatted string.
* @see NumberFormat::isParseIntegerOnly
* @stable ICU 2.0
*/
* Looks for the longest match of any message string on the input text and,
* if there is a match, sets the result object to the corresponding range's number.
*
* If no string matches, then the UErrorCode is set to U_INVALID_FORMAT_ERROR.
*
* @param text The text to be parsed.
* @param result Formattable to be set to the parse result.
* If parse fails, return contents are undefined.
* @param status Output param with the formatted string.
* @stable ICU 2.0
*/
virtual void parse(const UnicodeString& text,
Formattable& result,
UErrorCode& status) const;
public:
/**
* Returns a unique class ID POLYMORPHICALLY. Pure virtual override.
* This method is to implement a simple version of RTTI, since not all
* C++ compilers support genuine RTTI. Polymorphic operator==() and
* clone() methods call this method.
* Returns a unique class ID POLYMORPHICALLY. Part of ICU's "poor man's RTTI".
*
* @return The class ID for this object. All objects of a
* given class have the same class ID. Objects of
@ -626,7 +524,7 @@ public:
virtual UClassID getDynamicClassID(void) const;
/**
* Return the class ID for this class. This is useful only for
* Returns the class ID for this class. This is useful only for
* comparing to a return value from getDynamicClassID(). For example:
* <pre>
* . Base* polymorphic_pointer = createPolymorphicObject();
@ -639,22 +537,9 @@ public:
static UClassID U_EXPORT2 getStaticClassID(void);
private:
// static cache management (thread-safe)
// static NumberFormat* getNumberFormat(UErrorCode &status); // call this function to 'check out' a numberformat from the cache.
// static void releaseNumberFormat(NumberFormat *adopt); // call this function to 'return' the number format to the cache.
/**
* Converts a string to a double value using a default NumberFormat object
* which is static (shared by all ChoiceFormat instances).
* @param string the string to be converted with.
* @return the converted double number.
*/
static double stod(const UnicodeString& string);
/**
* Converts a double value to a string using a default NumberFormat object
* which is static (shared by all ChoiceFormat instances).
* @param value the double number to be converted with.
* Converts a double value to a string.
* @param value the double number to be converted.
* @param string the result string.
* @return the converted string.
*/
@ -667,7 +552,7 @@ private:
* based on the pattern.
*
* @param newPattern Pattern used to construct object.
* @param parseError Struct to recieve information on position
* @param parseError Struct to receive information on position
* of error if an error is encountered.
* @param status Output param to receive success code. If the
* pattern cannot be parsed, set to failure code.
@ -678,7 +563,59 @@ private:
UErrorCode& status);
friend class MessageFormat;
virtual void setChoices(const double* limits,
const UBool* closures,
const UnicodeString* formats,
int32_t count,
UErrorCode &errorCode);
/**
* Finds the ChoiceFormat sub-message for the given number.
* @param pattern A MessagePattern.
* @param partIndex the index of the first ChoiceFormat argument style part.
* @param number a number to be mapped to one of the ChoiceFormat argument's intervals
* @return the sub-message start part index.
*/
static int32_t findSubMessage(const MessagePattern &pattern, int32_t partIndex, double number);
static double parseArgument(
const MessagePattern &pattern, int32_t partIndex,
const UnicodeString &source, ParsePosition &pos);
/**
* Matches the pattern string from the end of the partIndex to
* the beginning of the limitPartIndex,
* including all syntax except SKIP_SYNTAX,
* against the source string starting at sourceOffset.
* If they match, returns the length of the source string match.
* Otherwise returns -1.
*/
static int32_t matchStringUntilLimitPart(
const MessagePattern &pattern, int32_t partIndex, int32_t limitPartIndex,
const UnicodeString &source, int32_t sourceOffset);
/**
* Some of the ChoiceFormat constructors do not have a UErrorCode paramater.
* We need _some_ way to provide one for the MessagePattern constructor.
* Alternatively, the MessagePattern could be a pointer field, but that is
* not nice either.
*/
UErrorCode constructorErrorCode;
/**
* The MessagePattern which contains the parsed structure of the pattern string.
*
* Starting with ICU 4.8, the MessagePattern contains a sequence of
* numeric/selector/message parts corresponding to the parsed pattern.
* For details see the MessagePattern class API docs.
*/
MessagePattern msgPattern;
/**
* Docs & fields from before ICU 4.8, before MessagePattern was used.
* Commented out, and left only for explanation of semantics.
* --------
* Each ChoiceFormat divides the range -Inf..+Inf into fCount
* intervals. The intervals are:
*
@ -713,12 +650,11 @@ private:
*
* Because of the nature of interval 0, fClosures[0] has no
* effect.
*/
double* fChoiceLimits;
UBool* fClosures;
UnicodeString* fChoiceFormats;
int32_t fCount;
// double* fChoiceLimits;
// UBool* fClosures;
// UnicodeString* fChoiceFormats;
// int32_t fCount;
};
inline UnicodeString&

View file

@ -1,5 +1,5 @@
/*
* Copyright (C) 2007-2010, International Business Machines Corporation and
* Copyright (C) 2007-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************************
*
@ -28,106 +28,213 @@
#include "unicode/format.h"
#include "unicode/locid.h"
#include "unicode/messagepattern.h"
#include "unicode/parseerr.h"
#include "unicode/uchar.h"
#include "unicode/plurfmt.h"
#include "unicode/plurrule.h"
U_CDECL_BEGIN
// Forward declaration.
struct UHashtable;
typedef struct UHashtable UHashtable;
U_CDECL_END
U_NAMESPACE_BEGIN
class NumberFormat;
class AppendableWrapper;
class DateFormat;
class NumberFormat;
/**
* <p>MessageFormat prepares strings for display to users,
* with optional arguments (variables/placeholders).
* The arguments can occur in any order, which is necessary for translation
* into languages with different grammars.
*
* MessageFormat produces concatenated messages in a language-neutral
* way. Use this whenever concatenating strings that are displayed to
* end users.
* <p>A MessageFormat is constructed from a <em>pattern</em> string
* with arguments in {curly braces} which will be replaced by formatted values.
*
* <P>A MessageFormat contains an array of <EM>subformats</EM> arranged
* within a <EM>template string</EM>. Together, the subformats and
* template string determine how the MessageFormat will operate during
* formatting and parsing.
* <p><code>MessageFormat</code> differs from the other <code>Format</code>
* classes in that you create a <code>MessageFormat</code> object with one
* of its constructors (not with a <code>createInstance</code> style factory
* method). Factory methods aren't necessary because <code>MessageFormat</code>
* itself doesn't implement locale-specific behavior. Any locale-specific
* behavior is defined by the pattern that you provide and the
* subformats used for inserted arguments.
*
* <P>Typically, both the subformats and the template string are
* specified at once in a <EM>pattern</EM>. By using different
* patterns for different locales, messages may be localized.
* <p>Arguments can be named (using identifiers) or numbered (using small ASCII-digit integers).
* Some of the API methods work only with argument numbers and throw an exception
* if the pattern has named arguments (see {@link #usesNamedArguments()}).
*
* <P>When formatting, MessageFormat takes an array of arguments
* and produces a user-readable string. Each argument is a
* Formattable object; they may be passed in in an array, or as a
* single Formattable object which itself contains an array. Each
* argument is matched up with its corresponding subformat, which then
* formats it into a string. The resulting strings are then assembled
* within the string template of the MessageFormat to produce the
* final output string.
* <p>An argument might not specify any format type. In this case,
* a Number value is formatted with a default (for the locale) NumberFormat,
* a Date value is formatted with a default (for the locale) DateFormat,
* and for any other value its toString() value is used.
*
* <p><strong>Note:</strong>
* In ICU 4.0 MessageFormat supports named arguments. If a named argument
* is used, all arguments must be named. Names start with a character in
* <code>UCHAR_ID_START</code> and continue with characters in
* <code>UCHARID_CONTINUE</code>, in particular they do not start with a digit.
* If named arguments are used, {@link #usesNamedArguments()} will return true.
* <p>An argument might specify a "simple" type for which the specified
* Format object is created, cached and used.
*
* <p>The other new methods supporting named arguments are
* {@link #getFormatNames(UErrorCode& status)},
* {@link #getFormat(const UnicodeString& formatName, UErrorCode& status)}
* {@link #setFormat(const UnicodeString& formatName, const Format& format, UErrorCode& status)},
* {@link #adoptFormat(const UnicodeString& formatName, Format* formatToAdopt, UErrorCode& status)},
* {@link #format(const UnicodeString* argumentNames, const Formattable* arguments,
* int32_t count, UnicodeString& appendTo,UErrorCode& status)}.
* These methods are all compatible with patterns that do not used named arguments--
* in these cases the keys in the input or output use <code>UnicodeString</code>s
* that name the argument indices, e.g. "0", "1", "2"... etc.
* <p>An argument might have a "complex" type with nested MessageFormat sub-patterns.
* During formatting, one of these sub-messages is selected according to the argument value
* and recursively formatted.
*
* <p>If this format uses named arguments, certain methods that take or
* return arrays do not perform any action, since it is not possible to
* identify positions in an array using a name. Of these methods,
* UErrorCode is set to U_ILLEGAL_ARGUMENT_ERROR by format, and to
* U_ARGUMENT_TYPE_MISMATCH by parse.
* These methods are
* {@link #adoptFormats(Format** formatsToAdopt, int32_t count)},
* {@link #setFormats(const Format** newFormats,int32_t count)},
* {@link #adoptFormat(int32_t n, Format *newFormat)},
* {@link #setFormat(int32_t n, Format& newFormat)},
* {@link #format(const Formattable* source, int32_t count, UnicodeString& appendTo, FieldPosition& ignore, UErrorCode& success)},
* {@link #format(const UnicodeString& pattern,const Formattable* arguments,int32_t cnt,UnicodeString& appendTo,UErrorCode& success)},
* {@link #format(const Formattable& source, UnicodeString& appendTo, FieldPosition& ignore, UErrorCode& success)},
* {@link #format(const Formattable* arguments, int32_t cnt, UnicodeString& appendTo, FieldPosition& status, int32_t recursionProtection,UErrorCode& success)},
* {@link #parse(const UnicodeString& source, ParsePosition& pos, int32_t& count)},
* {@link #parse(const UnicodeString& source, int32_t& cnt, UErrorCode& status)}
* <p>After construction, a custom Format object can be set for
* a top-level argument, overriding the default formatting and parsing behavior
* for that argument.
* However, custom formatting can be achieved more simply by writing
* a typeless argument in the pattern string
* and supplying it with a preformatted string value.
*
* <P>
* During parsing, an input string is matched against the string
* template of the MessageFormat to produce an array of Formattable
* objects. Plain text of the template string is matched directly
* against input text. At each position in the template string where
* a subformat is located, the subformat is called to parse the
* corresponding segment of input text to produce an output argument.
* In this way, an array of arguments is created which together
* constitute the parse result.
* <P>
* Parsing may fail or produce unexpected results in a number of
* circumstances.
* <UL>
* <LI>If one of the arguments does not occur in the pattern, it
* will be returned as a default Formattable.
* <LI>If the format of an argument loses information, such as with
* a choice format where a large number formats to "many", then the
* parse may not correspond to the originally formatted argument.
* <LI>MessageFormat does not handle ChoiceFormat recursion during
* parsing; such parses will fail.
* <LI>Parsing will not always find a match (or the correct match) if
* some part of the parse is ambiguous. For example, if the pattern
* "{1},{2}" is used with the string arguments {"a,b", "c"}, it will
* format as "a,b,c". When the result is parsed, it will return {"a",
* "b,c"}.
* <LI>If a single argument is formatted more than once in the string,
* then the rightmost subformat in the pattern string will produce the
* parse result; prior subformats with the same argument index will
* have no effect.
* </UL>
* Here are some examples of usage:
* <P>
* <p>When formatting, MessageFormat takes a collection of argument values
* and writes an output string.
* The argument values may be passed as an array
* (when the pattern contains only numbered arguments)
* or as an array of names and and an array of arguments (which works for both named
* and numbered arguments).
*
* <p>Each argument is matched with one of the input values by array index or argument name
* and formatted according to its pattern specification
* (or using a custom Format object if one was set).
* A numbered pattern argument is matched with an argument name that contains that number
* as an ASCII-decimal-digit string (without leading zero).
*
* <h4><a name="patterns">Patterns and Their Interpretation</a></h4>
*
* <code>MessageFormat</code> uses patterns of the following form:
* <pre>
* message = messageText (argument messageText)*
* argument = noneArg | simpleArg | complexArg
* complexArg = choiceArg | pluralArg | selectArg
*
* noneArg = '{' argNameOrNumber '}'
* simpleArg = '{' argNameOrNumber ',' argType [',' argStyle] '}'
* choiceArg = '{' argNameOrNumber ',' "choice" ',' choiceStyle '}'
* pluralArg = '{' argNameOrNumber ',' "plural" ',' pluralStyle '}'
* selectArg = '{' argNameOrNumber ',' "select" ',' selectStyle '}'
*
* choiceStyle: see {@link ChoiceFormat}
* pluralStyle: see {@link PluralFormat}
* selectStyle: see {@link SelectFormat}
*
* argNameOrNumber = argName | argNumber
* argName = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+
* argNumber = '0' | ('1'..'9' ('0'..'9')*)
*
* argType = "number" | "date" | "time" | "spellout" | "ordinal" | "duration"
* argStyle = "short" | "medium" | "long" | "full" | "integer" | "currency" | "percent" | argStyleText
* </pre>
*
* <ul>
* <li>messageText can contain quoted literal strings including syntax characters.
* A quoted literal string begins with an ASCII apostrophe and a syntax character
* (usually a {curly brace}) and continues until the next single apostrophe.
* A double ASCII apostrohpe inside or outside of a quoted string represents
* one literal apostrophe.
* <li>Quotable syntax characters are the {curly braces} in all messageText parts,
* plus the '#' sign in a messageText immediately inside a pluralStyle,
* and the '|' symbol in a messageText immediately inside a choiceStyle.
* <li>See also {@link MessagePattern.ApostropheMode}
* <li>In argStyleText, every single ASCII apostrophe begins and ends quoted literal text,
* and unquoted {curly braces} must occur in matched pairs.
* </ul>
*
* <p>Recommendation: Use the real apostrophe (single quote) character
* \htmlonly&#x2019;\endhtmlonly (U+2019) for
* human-readable text, and use the ASCII apostrophe ' (U+0027)
* only in program syntax, like quoting in MessageFormat.
* See the annotations for U+0027 Apostrophe in The Unicode Standard.
*
* <p>The <code>argType</code> and <code>argStyle</code> values are used to create
* a <code>Format</code> instance for the format element. The following
* table shows how the values map to Format instances. Combinations not
* shown in the table are illegal. Any <code>argStyleText</code> must
* be a valid pattern string for the Format subclass used.
*
* <p><table border=1>
* <tr>
* <th>argType
* <th>argStyle
* <th>resulting Format object
* <tr>
* <td colspan=2><i>(none)</i>
* <td><code>null</code>
* <tr>
* <td rowspan=5><code>number</code>
* <td><i>(none)</i>
* <td><code>NumberFormat.createInstance(getLocale(), status)</code>
* <tr>
* <td><code>integer</code>
* <td><code>NumberFormat.createInstance(getLocale(), kNumberStyle, status)</code>
* <tr>
* <td><code>currency</code>
* <td><code>NumberFormat.createCurrencyInstance(getLocale(), status)</code>
* <tr>
* <td><code>percent</code>
* <td><code>NumberFormat.createPercentInstance(getLocale(), status)</code>
* <tr>
* <td><i>argStyleText</i>
* <td><code>new DecimalFormat(argStyleText, new DecimalFormatSymbols(getLocale(), status), status)</code>
* <tr>
* <td rowspan=6><code>date</code>
* <td><i>(none)</i>
* <td><code>DateFormat.createDateInstance(kDefault, getLocale(), status)</code>
* <tr>
* <td><code>short</code>
* <td><code>DateFormat.createDateInstance(kShort, getLocale(), status)</code>
* <tr>
* <td><code>medium</code>
* <td><code>DateFormat.createDateInstance(kDefault, getLocale(), status)</code>
* <tr>
* <td><code>long</code>
* <td><code>DateFormat.createDateInstance(kLong, getLocale(), status)</code>
* <tr>
* <td><code>full</code>
* <td><code>DateFormat.createDateInstance(kFull, getLocale(), status)</code>
* <tr>
* <td><i>argStyleText</i>
* <td><code>new SimpleDateFormat(argStyleText, getLocale(), status)
* <tr>
* <td rowspan=6><code>time</code>
* <td><i>(none)</i>
* <td><code>DateFormat.createTimeInstance(kDefault, getLocale(), status)</code>
* <tr>
* <td><code>short</code>
* <td><code>DateFormat.createTimeInstance(kShort, getLocale(), status)</code>
* <tr>
* <td><code>medium</code>
* <td><code>DateFormat.createTimeInstance(kDefault, getLocale(), status)</code>
* <tr>
* <td><code>long</code>
* <td><code>DateFormat.createTimeInstance(kLong, getLocale(), status)</code>
* <tr>
* <td><code>full</code>
* <td><code>DateFormat.createTimeInstance(kFull, getLocale(), status)</code>
* <tr>
* <td><i>argStyleText</i>
* <td><code>new SimpleDateFormat(argStyleText, getLocale(), status)
* <tr>
* <td><code>spellout</code>
* <td><i>argStyleText (optional)</i>
* <td><code>new RuleBasedNumberFormat(URBNF_SPELLOUT, getLocale(), status)
* <br/>&nbsp;&nbsp;&nbsp;&nbsp;.setDefaultRuleset(argStyleText, status);</code>
* <tr>
* <td><code>ordinal</code>
* <td><i>argStyleText (optional)</i>
* <td><code>new RuleBasedNumberFormat(URBNF_ORDINAL, getLocale(), status)
* <br/>&nbsp;&nbsp;&nbsp;&nbsp;.setDefaultRuleset(argStyleText, status);</code>
* <tr>
* <td><code>duration</code>
* <td><i>argStyleText (optional)</i>
* <td><code>new RuleBasedNumberFormat(URBNF_DURATION, getLocale(), status)
* <br/>&nbsp;&nbsp;&nbsp;&nbsp;.setDefaultRuleset(argStyleText, status);</code>
* </table>
* <p>
*
* <h4>Usage Information</h4>
*
* <p>Here are some examples of usage:
* Example 1:
*
* <pre>
* \code
* UErrorCode success = U_ZERO_ERROR;
@ -148,10 +255,12 @@ class DateFormat;
* // in the Force on planet 7.
* \endcode
* </pre>
*
* Typically, the message format will come from resources, and the
* arguments will be dynamically set at runtime.
* <P>
* Example 2:
*
* <p>Example 2:
*
* <pre>
* \code
* success = U_ZERO_ERROR;
@ -171,122 +280,40 @@ class DateFormat;
* \endcode
* </pre>
*
* The pattern is of the following form. Legend:
* <pre>
* \code
* {optional item}
* (group that may be repeated)*
* \endcode
* </pre>
* Do not confuse optional items with items inside quoted braces, such
* as this: "{". Quoted braces are literals.
* <pre>
* \code
* messageFormatPattern := string ( "{" messageFormatElement "}" string )*
*
* messageFormatElement := argumentIndex | argumentName { "," elementFormat }
*
* elementFormat := "time" { "," datetimeStyle }
* | "date" { "," datetimeStyle }
* | "number" { "," numberStyle }
* | "choice" "," choiceStyle
* | "spellout" { "," spelloutStyle }
* | "ordinal" { "," spelloutStyle }
* | "duration" { "," spelloutStyle }
* | "plural" "," pluralStyle
* | "select" "," selectStyle
*
* datetimeStyle := "short"
* | "medium"
* | "long"
* | "full"
* | dateFormatPattern
*
* numberStyle := "currency"
* | "percent"
* | "integer"
* | numberFormatPattern
*
* choiceStyle := choiceFormatPattern
*
* pluralStyle := pluralFormatPattern
*
* selectStyle := selectFormatPattern
*
* spelloutStyle := ruleSetName
* \endcode
* </pre>
* If there is no elementFormat, then the argument must be a string,
* which is substituted. If there is no dateTimeStyle or numberStyle,
* then the default format is used (e.g. NumberFormat::createInstance(),
* DateFormat::createTimeInstance(DateFormat::kDefault, ...) or
* DateFormat::createDateInstance(DateFormat::kDefault, ...). For
* a RuleBasedNumberFormat, if there is no ruleSetName, the default
* rule set is used. For a ChoiceFormat or PluralFormat or SelectFormat, the pattern
* must always be specified, since there is no default.
* <P>
* In strings, single quotes can be used to quote syntax characters.
* A literal single quote is represented by '', both within and outside
* of single-quoted segments. Inside a
* messageFormatElement, quotes are <EM>not</EM> removed. For example,
* {1,number,$'#',##} will produce a number format with the pound-sign
* quoted, with a result such as: "$#31,45".
* <P>
* If a pattern is used, then unquoted braces in the pattern, if any,
* must match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab
* {0'}' de" and "ab } de" are not.
* <p>
* <dl><dt><b>Warning:</b><dd>The rules for using quotes within message
* format patterns unfortunately have shown to be somewhat confusing.
* In particular, it isn't always obvious to localizers whether single
* quotes need to be doubled or not. Make sure to inform localizers about
* the rules, and tell them (for example, by using comments in resource
* bundle source files) which strings will be processed by MessageFormat.
* Note that localizers may need to use single quotes in translated
* strings where the original version doesn't have them.
* <br>Note also that the simplest way to avoid the problem is to
* use the real apostrophe (single quote) character U+2019 (') for
* human-readable text, and to use the ASCII apostrophe (U+0027 ' )
* only in program syntax, like quoting in MessageFormat.
* See the annotations for U+0027 Apostrophe in The Unicode Standard.</p>
* </dl>
* <P>
* The argumentIndex is a non-negative integer, which corresponds to the
* index of the arguments presented in an array to be formatted. The
* first argument has argumentIndex 0.
* <P>
* It is acceptable to have unused arguments in the array. With missing
* arguments, or arguments that are not of the right class for the
* specified format, a failing UErrorCode result is set.
* <P>
* <strong>Creating internationalized messages that include plural forms, you
* can use a PluralFormat:</strong>
* <p>For messages that include plural forms, you can use a plural argument:
* <pre>
* \code
* UErrorCode err = U_ZERO_ERROR;
* UnicodeString t1("{0, plural, one{C''est # fichier} other{Ce sont # fichiers}} dans la liste.");
* MessageFormat* msgFmt = new MessageFormat(t1, Locale("fr"), err);
* if (U_FAILURE(err)) {
* return err;
* }
*
* Formattable args1[] = {(int32_t)0};
* Formattable args2[] = {(int32_t)3};
* FieldPosition ignore(FieldPosition::DONT_CARE);
* success = U_ZERO_ERROR;
* MessageFormat msgFmt(
* "{num_files, plural, "
* "=0{There are no files on disk \"{disk_name}\".}"
* "=1{There is one file on disk \"{disk_name}\".}"
* "other{There are # files on disk \"{disk_name}\".}}",
* Locale("en"),
* success);
* FieldPosition fpos = 0;
* Formattable testArgs[] = {0L, "MyDisk"};
* UnicodeString testArgsNames[] = {"num_files", "disk_name"};
* UnicodeString result;
* msgFmt->format(args1, 1, result, ignore, status);
* cout << result << endl;
* result.remove();
* msgFmt->format(args2, 1, result, ignore, status);
* cout << result << endl;
*
* // output, with different args
* // output: C'est 0,0 fichier dans la liste.
* // output: Ce sont 3 fichiers dans la liste."
* cout << msgFmt.format(testArgs, testArgsNames, 2, result, fpos, 0, success);
* testArgs[0] = 3L;
* cout << msgFmt.format(testArgs, testArgsNames, 2, result, fpos, 0, success);
* \endcode
* <em>output</em>:
* There are no files on disk "MyDisk".
* There are 3 files on "MyDisk".
* </pre>
* Please check PluralFormat and PluralRules for details.
* </P>
* See {@link PluralFormat} and {@link PluralRules} for details.
*
* <h4><a name="synchronization">Synchronization</a></h4>
*
* <p>MessageFormats are not synchronized.
* It is recommended to create separate format instances for each thread.
* If multiple threads access a format concurrently, it must be synchronized
* externally.
*
* @stable ICU 2.0
*/
class U_I18N_API MessageFormat : public Format {
public:
@ -331,8 +358,8 @@ public:
* Constructs a new MessageFormat using the given pattern and locale.
* @param pattern Pattern used to construct object.
* @param newLocale The locale to use for formatting dates and numbers.
* @param parseError Struct to recieve information on position
* of error within the pattern.
* @param parseError Struct to receive information on the position
* of an error within the pattern.
* @param status Input/output error code. If the
* pattern cannot be parsed, set to failure code.
* @stable ICU 2.0
@ -376,15 +403,14 @@ public:
virtual UBool operator==(const Format& other) const;
/**
* Sets the locale. This locale is used for fetching default number or date
* format information.
* Sets the locale to be used for creating argument Format objects.
* @param theLocale the new locale value to be set.
* @stable ICU 2.0
*/
virtual void setLocale(const Locale& theLocale);
/**
* Gets the locale. This locale is used for fetching default number or date
* Gets the locale used for creating argument Format objects.
* format information.
* @return the locale of the object.
* @stable ICU 2.0
@ -405,8 +431,8 @@ public:
* Applies the given pattern string to this message format.
*
* @param pattern The pattern to be applied.
* @param parseError Struct to recieve information on position
* of error within pattern.
* @param parseError Struct to receive information on the position
* of an error within the pattern.
* @param status Input/output error code. If the
* pattern cannot be parsed, set to failure code.
* @stable ICU 2.0
@ -415,6 +441,37 @@ public:
UParseError& parseError,
UErrorCode& status);
/**
* Sets the UMessagePatternApostropheMode and the pattern used by this message format.
* Parses the pattern and caches Format objects for simple argument types.
* Patterns and their interpretation are specified in the
* <a href="#patterns">class description</a>.
* <p>
* This method is best used only once on a given object to avoid confusion about the mode,
* and after constructing the object with an empty pattern string to minimize overhead.
*
* @param pattern The pattern to be applied.
* @param aposMode The new apostrophe mode.
* @param parseError Struct to receive information on the position
* of an error within the pattern.
* Can be NULL.
* @param status Input/output error code. If the
* pattern cannot be parsed, set to failure code.
* @draft ICU 4.8
*/
virtual void applyPattern(const UnicodeString& pattern,
UMessagePatternApostropheMode aposMode,
UParseError* parseError,
UErrorCode& status);
/**
* @return this instance's UMessagePatternApostropheMode.
* @draft ICU 4.8
*/
UMessagePatternApostropheMode getApostropheMode() const {
return msgPattern.getApostropheMode();
}
/**
* Returns a pattern that can be used to recreate this object.
*
@ -490,7 +547,7 @@ public:
/**
* Gets format names. This function returns formatNames in StringEnumerations
* which can be used with getFormat() and setFormat() to export formattable
* array from current MessageFormat to another. It is caller's resposibility
* array from current MessageFormat to another. It is the caller's responsibility
* to delete the returned formatNames.
* @param status output param set to success/failure code.
* @stable ICU 4.0
@ -747,6 +804,7 @@ public:
static UnicodeString autoQuoteApostrophe(const UnicodeString& pattern,
UErrorCode& status);
/**
* Returns true if this MessageFormat uses named arguments,
* and false otherwise. See class description.
@ -795,33 +853,44 @@ public:
*/
static UClassID U_EXPORT2 getStaticClassID(void);
/**
* Compares two Format objects. This is used for constructing the hash
* tables.
*
* @param left pointer to a Format object. Must not be NULL.
* @param right pointer to a Format object. Must not be NULL.
*
* @return whether the two objects are the same
* @internal
*/
static UBool equalFormats(const void* left, const void* right);
private:
Locale fLocale;
UnicodeString fPattern;
MessagePattern msgPattern;
Format** formatAliases; // see getFormats
int32_t formatAliasesCapacity;
UProperty idStart;
UProperty idContinue;
MessageFormat(); // default constructor not implemented
/*
* A structure representing one subformat of this MessageFormat.
* Each subformat has a Format object, an offset into the plain
* pattern text fPattern, and an argument number. The argument
* number corresponds to the array of arguments to be formatted.
* @internal
*/
class Subformat;
/**
* This provider helps defer instantiation of a PluralRules object
* until we actually need to select a keyword.
* For example, if the number matches an explicit-value selector like "=1"
* we do not need any PluralRules.
*/
class PluralSelectorProvider : public PluralFormat::PluralSelector {
public:
PluralSelectorProvider(const Locale* loc);
virtual ~PluralSelectorProvider();
virtual UnicodeString select(double number, UErrorCode& ec) const;
/**
* A MessageFormat contains an array of subformats. This array
* needs to grow dynamically if the MessageFormat is modified.
*/
Subformat* subformats;
int32_t subformatCount;
int32_t subformatCapacity;
void reset(const Locale* loc);
private:
const Locale* locale;
PluralRules* rules;
};
/**
* A MessageFormat formats an array of arguments. Each argument
@ -836,14 +905,14 @@ private:
int32_t argTypeCapacity;
/**
* Is true iff all argument names are non-negative numbers.
*
*/
UBool isArgNumeric;
* TRUE if there are different argTypes for the same argument.
* This only matters when the MessageFormat is used in the plain C (umsg_xxx) API
* where the pattern argTypes determine how the va_arg list is read.
*/
UBool hasArgTypeConflicts;
// Variable-size array management
UBool allocateSubformats(int32_t capacity);
UBool allocateArgTypes(int32_t capacity);
UBool allocateArgTypes(int32_t capacity, UErrorCode& status);
/**
* Default Format objects used when no format is specified and a
@ -855,6 +924,11 @@ private:
NumberFormat* defaultNumberFormat;
DateFormat* defaultDateFormat;
UHashtable* cachedFormatters;
UHashtable* customFormatArgStarts;
PluralSelectorProvider pluralProvider;
/**
* Method to retrieve default formats (or NULL on failure).
* These are semantically const, but may modify *this.
@ -872,57 +946,93 @@ private:
const UChar * const *list);
/**
* Formats the array of arguments and copies the result into the
* result buffer, updates the field position.
*
* @param arguments The formattable objects array.
* @param cnt The array count.
* @param appendTo Output parameter to receive result.
* Result is appended to existing contents.
* @param status Field position status.
* @param recursionProtection
* Initially zero. Bits 0..9 are used to indicate
* that a parameter has already been seen, to
* avoid recursion. Currently unused.
* @param success The error code status.
* @return Reference to 'appendTo' parameter.
* Thin wrapper around the format(... AppendableWrapper ...) variant.
* Wraps the destination UnicodeString into an AppendableWrapper and
* supplies default values for some other parameters.
*/
UnicodeString& format( const Formattable* arguments,
int32_t cnt,
UnicodeString& appendTo,
FieldPosition& status,
int32_t recursionProtection,
UErrorCode& success) const;
UnicodeString& format(const Formattable* arguments,
const UnicodeString *argumentNames,
int32_t cnt,
UnicodeString& appendTo,
FieldPosition* pos,
UErrorCode& status) const;
UnicodeString& format( const Formattable* arguments,
const UnicodeString *argumentNames,
int32_t cnt,
UnicodeString& appendTo,
FieldPosition& status,
int32_t recursionProtection,
UErrorCode& success) const;
/**
* Formats the arguments and writes the result into the
* AppendableWrapper, updates the field position.
*
* @param msgStart Index to msgPattern part to start formatting from.
* @param pluralNumber Zero except when formatting a plural argument sub-message
* where a '#' is replaced by the format string for this number.
* @param arguments The formattable objects array. (Must not be NULL.)
* @param argumentNames NULL if numbered values are used. Otherwise the same
* length as "arguments", and each entry is the name of the
* corresponding argument in "arguments".
* @param cnt The length of arguments (and of argumentNames if that is not NULL).
* @param appendTo Output parameter to receive the result.
* The result string is appended to existing contents.
* @param pos Field position status.
* @param success The error code status.
*/
void format(int32_t msgStart,
double pluralNumber,
const Formattable* arguments,
const UnicodeString *argumentNames,
int32_t cnt,
AppendableWrapper& appendTo,
FieldPosition* pos,
UErrorCode& success) const;
void makeFormat(int32_t offsetNumber,
UnicodeString* segments,
UParseError& parseError,
UErrorCode& success);
UnicodeString getArgName(int32_t partIndex);
void setArgStartFormat(int32_t argStart, Format* formatter, UErrorCode& status);
void setCustomArgStartFormat(int32_t argStart, Format* formatter, UErrorCode& status);
int32_t nextTopLevelArgStart(int32_t partIndex) const;
bool argNameMatches(int32_t partIndex, const UnicodeString& argName, int32_t argNumber);
void cacheExplicitFormats(UErrorCode& status);
Format* createAppropriateFormat(UnicodeString& type,
UnicodeString& style,
Formattable::Type& formattableType,
UParseError& parseError,
UErrorCode& ec);
const Formattable* getArgFromListByName(const Formattable* arguments,
const UnicodeString *argumentNames,
int32_t cnt, UnicodeString& name) const;
Formattable* parse(int32_t msgStart,
const UnicodeString& source,
ParsePosition& pos,
int32_t& count,
UErrorCode& ec) const;
FieldPosition* updateMetaData(AppendableWrapper& dest, int32_t prevLength,
FieldPosition* fp, const Formattable* argId) const;
Format* getCachedFormatter(int32_t argumentNumber) const;
UnicodeString getLiteralStringUntilNextArgument(int32_t from) const;
void copyObjects(const MessageFormat& that, UErrorCode& ec);
void formatComplexSubMessage(int32_t msgStart,
double pluralNumber,
const Formattable* arguments,
const UnicodeString *argumentNames,
int32_t cnt,
AppendableWrapper& appendTo,
UErrorCode& success) const;
/**
* Convenience method that ought to be in NumberFormat
*/
NumberFormat* createIntegerFormat(const Locale& locale, UErrorCode& status) const;
/**
* Checks the range of the source text to quote the special
* characters, { and ' and copy to target buffer.
* @param source
* @param start the text offset to start the process of in the source string
* @param end the text offset to end the process of in the source string
* @param appendTo Output parameter to receive result.
* Result is appended to existing contents.
*/
static void copyAndFixQuotes(const UnicodeString& appendTo, int32_t start, int32_t end, UnicodeString& target);
/**
* Returns array of argument types in the parsed pattern
* for use in C API. Only for the use of umsg_vformat(). Not
@ -937,11 +1047,25 @@ private:
}
/**
* Returns FALSE if the argument name is not legal.
* @param argName argument name.
* @return TRUE if the argument name is legal, otherwise return FALSE.
* Resets the internal MessagePattern, and other associated caches.
*/
UBool isLegalArgName(const UnicodeString& argName) const;
void resetPattern();
// A DummyFormatter that we use solely to store a NULL value. UHash does
// not support storing NULL values.
class U_I18N_API DummyFormat : public Format {
public:
virtual UBool operator==(const Format&) const;
virtual Format* clone() const;
virtual UnicodeString& format(const Formattable&,
UnicodeString& appendTo,
FieldPosition&,
UErrorCode& status) const;
virtual void parseObject(const UnicodeString&,
Formattable&,
ParsePosition&) const;
virtual UClassID getDynamicClassID() const;
};
friend class MessageFormatAdapter; // getFormatTypeList() access
};
@ -953,6 +1077,7 @@ MessageFormat::format(const Formattable& obj,
return Format::format(obj, appendTo, status);
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2007-2010, International Business Machines Corporation and
* Copyright (C) 2007-2011, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*
@ -25,6 +25,7 @@
#if !UCONFIG_NO_FORMATTING
#include "unicode/messagepattern.h"
#include "unicode/numfmt.h"
#include "unicode/plurrule.h"
@ -37,7 +38,7 @@ class Hashtable;
* <code>PluralFormat</code> supports the creation of internationalized
* messages with plural inflection. It is based on <i>plural
* selection</i>, i.e. the caller specifies messages for each
* plural case that can appear in the users language and the
* plural case that can appear in the user's language and the
* <code>PluralFormat</code> selects the appropriate message based on
* the number.
* </p>
@ -51,7 +52,7 @@ class Hashtable;
* each message and selects the message whose interval contains a
* given number. This can only handle a finite number of
* intervals. But in some languages, like Polish, one plural case
* applies to infinitely many intervals (e.g., paucal applies to
* applies to infinitely many intervals (e.g., the plural case applies to
* numbers ending with 2, 3, or 4 except those ending with 12, 13, or
* 14). Thus <code>ChoiceFormat</code> is not adequate.
* </p><p>
@ -62,17 +63,20 @@ class Hashtable;
* conditions for a plural case than just a single interval. These plural
* rules define both what plural cases exist in a language, and to
* which numbers these cases apply.
* <li>It provides predefined plural rules for many locales. Thus, the programmer
* need not worry about the plural cases of a language. On the flip side,
* the localizer does not have to specify the plural cases; he can simply
* <li>It provides predefined plural rules for many languages. Thus, the programmer
* need not worry about the plural cases of a language and
* does not have to define the plural cases; they can simply
* use the predefined keywords. The whole plural formatting of messages can
* be done using localized patterns from resource bundles. For predefined plural
* rules, see CLDR <i>Language Plural Rules</i> page at
* rules, see the CLDR <i>Language Plural Rules</i> page at
* http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
* </ul>
* </p>
* <h4>Usage of <code>PluralFormat</code></h4>
* <p>
* <p>Note: Typically, plural formatting is done via <code>MessageFormat</code>
* with a <code>plural</code> argument type,
* rather than using a stand-alone <code>PluralFormat</code>.
* </p><p>
* This discussion assumes that you use <code>PluralFormat</code> with
* a predefined set of plural rules. You can create one using one of
* the constructors that takes a <code>locale</code> object. To
@ -85,82 +89,46 @@ class Hashtable;
* <h5>Patterns and Their Interpretation</h5>
* <p>
* The pattern text defines the message output for each plural case of the
* used locale. The pattern is a sequence of
* <code><i>caseKeyword</i>{<i>message</i>}</code> clauses, separated by white
* space characters. Each clause assigns the message <code><i>message</i></code>
* to the plural case identified by <code><i>caseKeyword</i></code>.
* </p><p>
* There are 6 predefined casekeyword in ICU - 'zero', 'one', 'two', 'few', 'many' and
* 'other'. You always have to define a message text for the default plural case
* "<code>other</code>" which is contained in every rule set. If the plural
* rules of the <code>PluralFormat</code> object do not contain a plural case
* identified by <code><i>caseKeyword</i></code>, U_DEFAULT_KEYWORD_MISSING
* will be set to status.
* If you do not specify a message text for a particular plural case, the
* message text of the plural case "<code>other</code>" gets assigned to this
* plural case. If you specify more than one message for the same plural case,
* U_DUPLICATE_KEYWORD will be set to status.
* <br>
* Spaces between <code><i>caseKeyword</i></code> and
* <code><i>message</i></code> will be ignored; spaces within
* <code><i>message</i></code> will be preserved.
* </p><p>
* The message text for a particular plural case may contain other message
* format patterns. <code>PluralFormat</code> preserves these so that you
* can use the strings produced by <code>PluralFormat</code> with other
* formatters. If you are using <code>PluralFormat</code> inside a
* <code>MessageFormat</code> pattern, <code>MessageFormat</code> will
* automatically evaluate the resulting format pattern.<br>
* Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed
* in message texts to define a nested format pattern.<br>
* The pound sign (<code>#</code>) will be interpreted as the number placeholder
* in the message text, if it is not contained in curly braces (to preserve
* <code>NumberFormat</code> patterns). <code>PluralFormat</code> will
* replace each of those pound signs by the number passed to the
* <code>format()</code> method. It will be formatted using a
* <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you
* need special number formatting, you have to explicitly specify a
* <code>NumberFormat</code> for the <code>PluralFormat</code> to use.
* </p>
* Example
* specified locale. Syntax:
* <pre>
* \code
* UErrorCode status = U_ZERO_ERROR;
* MessageFormat* msgFmt = new MessageFormat(UnicodeString("{0, plural,
* one{{0, number, C''est #,##0.0# fichier}} other {Ce sont # fichiers}} dans la liste."),
* Locale("fr"), status);
* if (U_FAILURE(status)) {
* return;
* }
* Formattable args1[] = {(int32_t)0};
* Formattable args2[] = {(int32_t)3};
* FieldPosition ignore(FieldPosition::DONT_CARE);
* UnicodeString result;
* msgFmt->format(args1, 1, result, ignore, status);
* cout << result << endl;
* result.remove();
* msgFmt->format(args2, 1, result, ignore, status);
* cout << result << endl;
* \endcode
* pluralStyle = [offsetValue] (selector '{' message '}')+
* offsetValue = "offset:" number
* selector = explicitValue | keyword
* explicitValue = '=' number // adjacent, no white space in between
* keyword = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+
* message: see {@link MessageFormat}
* </pre>
* Produces the output:<br>
* <code>C'est 0,0 fichier dans la liste.</code><br>
* <code>Ce sont 3 fichiers dans la liste.</code>
* <p>
* <strong>Note:</strong><br>
* Currently <code>PluralFormat</code>
* does not make use of quotes like <code>MessageFormat</code>.
* If you use plural format strings with <code>MessageFormat</code> and want
* to use a quote sign <code>'</code>, you have to write <code>''</code>.
* <code>MessageFormat</code> unquotes this pattern and passes the unquoted
* pattern to <code>PluralFormat</code>. It's a bit trickier if you use
* nested formats that do quoting. In the example above, we wanted to insert
* <code>'</code> in the number format pattern. Since
* <code>NumberFormat</code> supports quotes, we had to insert
* <code>''</code>. But since <code>MessageFormat</code> unquotes the
* pattern before it gets passed to <code>PluralFormat</code>, we have to
* double these quotes, i.e. write <code>''''</code>.
* Pattern_White_Space between syntax elements is ignored, except
* between the {curly braces} and their sub-message,
* and between the '=' and the number of an explicitValue.
*
* </p><p>
* There are 6 predefined casekeyword in CLDR/ICU - 'zero', 'one', 'two', 'few', 'many' and
* 'other'. You always have to define a message text for the default plural case
* <code>other</code> which is contained in every rule set.
* If you do not specify a message text for a particular plural case, the
* message text of the plural case <code>other</code> gets assigned to this
* plural case.
* </p><p>
* When formatting, the input number is first matched against the explicitValue clauses.
* If there is no exact-number match, then a keyword is selected by calling
* the <code>PluralRules</code> with the input number <em>minus the offset</em>.
* (The offset defaults to 0 if it is omitted from the pattern string.)
* If there is no clause with that keyword, then the "other" clauses is returned.
* </p><p>
* An unquoted pound sign (<code>#</code>) in the selected sub-message
* itself (i.e., outside of arguments nested in the sub-message)
* is replaced by the input number minus the offset.
* The number-minus-offset value is formatted using a
* <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you
* need special number formatting, you have to use a <code>MessageFormat</code>
* and explicitly specify a <code>NumberFormat</code> argument.
* <strong>Note:</strong> That argument is formatting without subtracting the offset!
* If you need a custom format and have a non-zero offset, then you need to pass the
* number-minus-offset value as a separate parameter.
* </p>
* For a usage example, see the {@link MessageFormat} class documentation.
*
* <h4>Defining Custom Plural Rules</h4>
* <p>If you need to use <code>PluralFormat</code> with custom rules, you can
* create a <code>PluralRules</code> object and pass it to
@ -511,34 +479,63 @@ public:
*/
virtual UClassID getDynamicClassID() const;
private:
typedef enum fmtToken {
none,
tLetter,
tNumber,
tSpace,
tNumberSign,
tLeftBrace,
tRightBrace
}fmtToken;
private:
class PluralSelector {
public:
/**
* Given a number, returns the appropriate PluralFormat keyword.
*
* @param number The number to be plural-formatted.
* @param ec Error code.
* @return The selected PluralFormat keyword.
*/
virtual UnicodeString select(double number, UErrorCode& ec) const = 0;
};
class PluralSelectorAdapter : public PluralSelector {
public:
PluralSelectorAdapter() : pluralRules(NULL) {
}
virtual ~PluralSelectorAdapter();
virtual UnicodeString select(double number, UErrorCode& /*ec*/) const;
void reset();
PluralRules* pluralRules;
};
Locale locale;
PluralRules* pluralRules;
UnicodeString pattern;
Hashtable *fParsedValuesHash;
MessagePattern msgPattern;
NumberFormat* numberFormat;
NumberFormat* replacedNumberFormat;
double offset;
PluralSelectorAdapter pluralRulesWrapper;
PluralFormat(); // default constructor not implemented
void init(const PluralRules* rules, const Locale& curlocale, UErrorCode& status);
UBool inRange(UChar ch, fmtToken& type);
UBool checkSufficientDefinition();
void parsingFailure();
UnicodeString insertFormattedNumber(double number,
UnicodeString& message,
UnicodeString& appendTo,
FieldPosition& pos) const;
void copyHashtable(Hashtable *other, UErrorCode& status);
void init(const PluralRules* rules, UErrorCode& status);
/**
* Copies dynamically allocated values (pointer fields).
* Others are copied using their copy constructors and assignment operators.
*/
void copyObjects(const PluralFormat& other);
/**
* Finds the PluralFormat sub-message for the given number, or the "other" sub-message.
* @param pattern A MessagePattern.
* @param partIndex the index of the first PluralFormat argument style part.
* @param selector the PluralSelector for mapping the number (minus offset) to a keyword.
* @param number a number to be matched to one of the PluralFormat argument's explicit values,
* or mapped via the PluralSelector.
* @param ec ICU error code.
* @return the sub-message start part index.
*/
static int32_t findSubMessage(
const MessagePattern& pattern, int32_t partIndex,
const PluralSelector& selector, double number, UErrorCode& ec);
friend class MessageFormat;
};
U_NAMESPACE_END

View file

@ -84,15 +84,18 @@ class PluralKeywordEnumeration;
* \endcode
* </pre></p>
* <p>
* The difference between 'in' and 'within' is that 'in' only includes
* integers in the specified range, while 'within' includes all values.</p>
* <p>
* Keywords
* could be defined by users or from ICU locale data. There are 6
* predefined values in ICU - 'zero', 'one', 'two', 'few', 'many' and
* 'other'. Callers need to check the value of keyword returned by
* {@link #select} method.
* </p>
* An "identifier" is a sequence of characters that do not have the
* Unicode Pattern_Syntax or Pattern_White_Space properties.
* <p>
* The difference between 'in' and 'within' is that 'in' only includes
* integers in the specified range, while 'within' includes all values.</p>
* <p>
* Keywords
* could be defined by users or from ICU locale data. There are 6
* predefined values in ICU - 'zero', 'one', 'two', 'few', 'many' and
* 'other'. Callers need to check the value of keyword returned by
* {@link #select} method.
* </p>
*
* Examples:<pre>
* UnicodeString keyword = pl->select(number);

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2010, International Business Machines Corporation and
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
* Copyright (C) 2010 , Yahoo! Inc.
********************************************************************
@ -16,8 +16,9 @@
#ifndef SELFMT
#define SELFMT
#include "unicode/utypes.h"
#include "unicode/messagepattern.h"
#include "unicode/numfmt.h"
#include "unicode/utypes.h"
/**
* \file
@ -28,7 +29,7 @@
U_NAMESPACE_BEGIN
class Hashtable;
class MessageFormat;
/**
* <p><code>SelectFormat</code> supports the creation of internationalized
@ -40,6 +41,10 @@ class Hashtable;
*
* <h4>Using <code>SelectFormat</code> for Gender Agreement</h4>
*
* <p>Note: Typically, select formatting is done via <code>MessageFormat</code>
* with a <code>select</code> argument type,
* rather than using a stand-alone <code>SelectFormat</code>.</p>
*
* <p>The main use case for the select format is gender based inflection.
* When names or nouns are inserted into sentences, their gender can affect pronouns,
* verb forms, articles, and adjectives. Special care needs to be
@ -73,6 +78,9 @@ class Hashtable;
* but similar in grammatical use.
* Some African languages have around 20 noun classes.</p>
*
* <p><b>Note:</b>For the gender of a <i>person</i> in a given sentence,
* we usually need to distinguish only between female, male and other/unknown.</p>
*
* <p>To enable localizers to create sentence patterns that take their
* language's gender dependencies into consideration, software has to provide
* information about the gender associated with a noun or name to
@ -81,8 +89,8 @@ class Hashtable;
*
* <ul>
* <li>For people, natural gender information should be maintained for each person.
* The keywords "male", "female", "mixed" (for groups of people)
* and "unknown" are used.
* Keywords like "male", "female", "mixed" (for groups of people)
* and "unknown" could be used.
*
* <li>For nouns, grammatical gender information should be maintained for
* each noun and per language, e.g., in resource bundles.
@ -100,6 +108,11 @@ class Hashtable;
*
* <pre>{0} went to {2}.</pre>
*
* <p><b>Note:</b> The entire sentence should be included (and partially repeated)
* inside each phrase. Otherwise translators would have to be trained on how to
* move bits of the sentence in and out of the select argument of a message.
* (The examples below do not follow this recommendation!)</p>
*
* <p>The sentence pattern for French, where the gender of the person affects
* the form of the participle, uses a select format based on argument 1:</p>
*
@ -121,39 +134,24 @@ class Hashtable;
*
* <h4>Patterns and Their Interpretation</h4>
*
* <p>The <code>SelectFormat</code> pattern text defines the phrase output
* <p>The <code>SelectFormat</code> pattern string defines the phrase output
* for each user-defined keyword.
* The pattern is a sequence of <code><i>keyword</i>{<i>phrase</i>}</code>
* clauses.
* Each clause assigns the phrase <code><i>phrase</i></code>
* to the user-defined <code><i>keyword</i></code>.</p>
* The pattern is a sequence of (keyword, message) pairs.
* A keyword is a "pattern identifier": [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+</p>
*
* <p>Keywords must match the pattern [a-zA-Z][a-zA-Z0-9_-]*; keywords
* that don't match this pattern result in the error code
* <code>U_ILLEGAL_CHARACTER</code>.
* You always have to define a phrase for the default keyword
* <p>Each message is a MessageFormat pattern string enclosed in {curly braces}.</p>
*
* <p>You always have to define a phrase for the default keyword
* <code>other</code>; this phrase is returned when the keyword
* provided to
* the <code>format</code> method matches no other keyword.
* If a pattern does not provide a phrase for <code>other</code>, the method
* it's provided to returns the error <code>U_DEFAULT_KEYWORD_MISSING</code>.
* If a pattern provides more than one phrase for the same keyword, the
* error <code>U_DUPLICATE_KEYWORD</code> is returned.
* <br>
* Spaces between <code><i>keyword</i></code> and
* <code>{<i>phrase</i>}</code> will be ignored; spaces within
* <code>{<i>phrase</i>}</code> will be preserved.<p>
* Pattern_White_Space between keywords and messages is ignored.
* Pattern_White_Space within a message is preserved and output.</p>
*
* <p>The phrase for a particular select case may contain other message
* format patterns. <code>SelectFormat</code> preserves these so that you
* can use the strings produced by <code>SelectFormat</code> with other
* formatters. If you are using <code>SelectFormat</code> inside a
* <code>MessageFormat</code> pattern, <code>MessageFormat</code> will
* automatically evaluate the resulting format pattern.
* Thus, curly braces (<code>{</code>, <code>}</code>) are <i>only</i> allowed
* in phrases to define a nested format pattern.</p>
*
* <p>Example:
* <p><pre>Example:
* \htmlonly
*
* UErrorCode status = U_ZERO_ERROR;
@ -342,31 +340,22 @@ public:
virtual UClassID getDynamicClassID() const;
private:
typedef enum classesForSelectFormat{
tStartKeyword,
tContinueKeyword,
tLeftBrace,
tRightBrace,
tSpace,
tOther
}CharacterClass;
UnicodeString pattern;
//Hash to store the keyword, phrase pairs.
Hashtable *parsedValuesHash;
friend class MessageFormat;
SelectFormat(); // default constructor not implemented.
void initHashTable(UErrorCode &status);
void cleanHashTable();
//For the applyPattern , classifies char.s in one of the characterClass.
CharacterClass classifyCharacter(UChar ch) const;
//Checks if the "other" keyword is present in pattern.
UBool checkSufficientDefinition();
//Checks if the keyword passed is valid.
UBool checkValidKeyword(const UnicodeString& argKeyword) const;
void parsingFailure();
void copyHashtable(Hashtable *other, UErrorCode& status);
/**
* Finds the SelectFormat sub-message for the given keyword, or the "other" sub-message.
* @param pattern A MessagePattern.
* @param partIndex the index of the first SelectFormat argument style part.
* @param keyword a keyword to be matched to one of the SelectFormat argument's keywords.
* @param ec Error code.
* @return the sub-message start part index.
*/
static int32_t findSubMessage(const MessagePattern& pattern, int32_t partIndex,
const UnicodeString& keyword, UErrorCode& ec);
MessagePattern msgPattern;
};
U_NAMESPACE_END

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2010, International Business Machines Corporation and
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
* Copyright (C) 2010 , Yahoo! Inc.
********************************************************************
@ -13,7 +13,6 @@
* Change history:
*
* 08/5/2001 Ram Added C wrappers for C++ API.
*
********************************************************************/
#ifndef UMSG_H
@ -27,19 +26,30 @@
#include "unicode/uloc.h"
#include "unicode/parseerr.h"
#include <stdarg.h>
/**
* \file
* \brief C API: MessageFormat
*
* <h2>Message Format C API </h2>
* <h2>MessageFormat C API </h2>
*
* Provides means to produce concatenated messages in language-neutral way.
* Use this for all concatenations that show up to end users.
* <P>
* Takes a set of objects, formats them, then inserts the formatted
* strings into the pattern at the appropriate places.
* <P>
* Here are some examples of usage:
* <p>MessageFormat prepares strings for display to users,
* with optional arguments (variables/placeholders).
* The arguments can occur in any order, which is necessary for translation
* into languages with different grammars.
*
* <p>The opaque UMessageFormat type is a thin C wrapper around
* a C++ MessageFormat. It is constructed from a <em>pattern</em> string
* with arguments in {curly braces} which will be replaced by formatted values.
*
* <p>Currently, the C API supports only numbered arguments.
*
* <p>For details about the pattern syntax and behavior,
* especially about the ASCII apostrophe vs. the
* real apostrophe (single quote) character \htmlonly&#x2019;\endhtmlonly (U+2019),
* see the C++ MessageFormat class documentation.
*
* <p>Here are some examples of C API usage:
* Example 1:
* <pre>
* \code
@ -143,102 +153,6 @@
* }
* \endcode
* </pre>
*
* The pattern is of the following form. Legend:
* <pre>
* \code
* {optional item}
* (group that may be repeated)*
* \endcode
* </pre>
* Do not confuse optional items with items inside quotes braces, such
* as this: "{". Quoted braces are literals.
* <pre>
* \code
* messageFormatPattern := string ( "{" messageFormatElement "}" string )*
*
* messageFormatElement := argument { "," elementFormat }
*
* elementFormat := "time" { "," datetimeStyle }
* | "date" { "," datetimeStyle }
* | "number" { "," numberStyle }
* | "choice" "," choiceStyle
* | "select" "," selectStyle
*
* datetimeStyle := "short"
* | "medium"
* | "long"
* | "full"
* | dateFormatPattern
*
* numberStyle := "currency"
* | "percent"
* | "integer"
* | numberFormatPattern
*
* choiceStyle := choiceFormatPattern
*
* selectStyle := selectFormatPattern
* \endcode
* </pre>
* If there is no elementFormat, then the argument must be a string,
* which is substituted. If there is no dateTimeStyle or numberStyle,
* then the default format is used (e.g. NumberFormat.getInstance(),
* DateFormat.getDefaultTime() or DateFormat.getDefaultDate(). For
* a ChoiceFormat, the pattern must always be specified, since there
* is no default.
* <P>
* In strings, single quotes can be used to quote the "{" sign if
* necessary. A real single quote is represented by ''. Inside a
* messageFormatElement, quotes are [not] removed. For example,
* {1,number,$'#',##} will produce a number format with the pound-sign
* quoted, with a result such as: "$#31,45".
* <P>
* If a pattern is used, then unquoted braces in the pattern, if any,
* must match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab
* {0'}' de" and "ab } de" are not.
* <p>
* <dl><dt><b>Warning:</b><dd>The rules for using quotes within message
* format patterns unfortunately have shown to be somewhat confusing.
* In particular, it isn't always obvious to localizers whether single
* quotes need to be doubled or not. Make sure to inform localizers about
* the rules, and tell them (for example, by using comments in resource
* bundle source files) which strings will be processed by MessageFormat.
* Note that localizers may need to use single quotes in translated
* strings where the original version doesn't have them.
* <br>Note also that the simplest way to avoid the problem is to
* use the real apostrophe (single quote) character U+2019 (') for
* human-readable text, and to use the ASCII apostrophe (U+0027 ' )
* only in program syntax, like quoting in MessageFormat.
* See the annotations for U+0027 Apostrophe in The Unicode Standard.</p>
* </dl>
* <P>
* The argument is a number from 0 to 9, which corresponds to the
* arguments presented in an array to be formatted.
* <P>
* It is ok to have unused arguments in the array. With missing
* arguments or arguments that are not of the right class for the
* specified format, a failing UErrorCode result is set.
* <P>
* <P>
* [Note:] As we see above, the string produced by a choice Format in
* MessageFormat is treated specially; occurances of '{' are used to
* indicated subformats.
* <P>
* [Note:] Formats are numbered by order of variable in the string.
* This is [not] the same as the argument numbering!
* <pre>
* \code
* For example: with "abc{2}def{3}ghi{0}...",
*
* format0 affects the first variable {2}
* format1 affects the second variable {3}
* format2 affects the second variable {0}
* \endcode
* </pre>
* and so on.
*/
/**

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2010, International Business Machines Corporation and
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************
*
@ -736,8 +736,14 @@ static void TestMsgFormatChoice(void)
str=(UChar*)malloc(sizeof(UChar) * 25);
u_uastrcpy(str, "MyDisk");
log_verbose("Testing message format with choice test #6\n:");
/*There {0,choice,0#are no files|1#is one file|1<are {0,number,integer} files}.*/
u_uastrcpy(pattern, "The disk {1} contains {0,choice,0#no files|1#one file|1<{0,number,integer} files}");
/*
* Before ICU 4.8, umsg_xxx() did not detect conflicting argument types,
* and this pattern had {0,number,integer} as the inner argument.
* The choice argument has kDouble type while {0,number,integer} has kLong (int32_t).
* ICU 4.8 and above detects this as an error.
* We changed this pattern to work as intended.
*/
u_uastrcpy(pattern, "The disk {1} contains {0,choice,0#no files|1#one file|1<{0,number} files}");
u_uastrcpy(expected, "The disk MyDisk contains 100 files");
resultlength=0;
resultLengthOut=u_formatMessage( "en_US", pattern, u_strlen(pattern), NULL, resultlength, &status, 100., str);

View file

@ -1,6 +1,6 @@
/***********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2009, International Business Machines Corporation
* Copyright (c) 1997-2011, International Business Machines Corporation
* and others. All Rights Reserved.
***********************************************************************/
@ -25,36 +25,33 @@
#define CASE(id,test) case id: name = #test; if (exec) { logln(#test "---"); logln((UnicodeString)""); test(); } break;
void
void
MessageFormatRegressionTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
{
// if (exec) logln((UnicodeString)"TestSuite MessageFormatRegressionTest");
switch (index) {
CASE(0,Test4074764)
CASE(1,Test4058973)
CASE(2,Test4031438)
CASE(3,Test4052223)
CASE(4,Test4104976)
CASE(5,Test4106659)
CASE(6,Test4106660)
CASE(7,Test4111739)
CASE(8,Test4114743)
CASE(9,Test4116444)
CASE(10,Test4114739)
CASE(11,Test4113018)
CASE(12,Test4106661)
CASE(13,Test4094906)
CASE(14,Test4118592)
CASE(15,Test4118594)
CASE(16,Test4105380)
CASE(17,Test4120552)
CASE(18,Test4142938)
CASE(19,TestChoicePatternQuote)
CASE(20,Test4112104)
CASE(21,TestAPI)
default: name = ""; break;
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(Test4074764)
//TESTCASE_AUTO(Test4058973) -- disabled/obsolete in ICU 4.8
TESTCASE_AUTO(Test4031438)
TESTCASE_AUTO(Test4052223)
TESTCASE_AUTO(Test4104976)
TESTCASE_AUTO(Test4106659)
TESTCASE_AUTO(Test4106660)
TESTCASE_AUTO(Test4111739)
TESTCASE_AUTO(Test4114743)
TESTCASE_AUTO(Test4116444)
TESTCASE_AUTO(Test4114739)
TESTCASE_AUTO(Test4113018)
TESTCASE_AUTO(Test4106661)
TESTCASE_AUTO(Test4094906)
TESTCASE_AUTO(Test4118592)
TESTCASE_AUTO(Test4118594)
TESTCASE_AUTO(Test4105380)
TESTCASE_AUTO(Test4120552)
TESTCASE_AUTO(Test4142938)
TESTCASE_AUTO(TestChoicePatternQuote)
TESTCASE_AUTO(Test4112104)
TESTCASE_AUTO(TestAPI)
TESTCASE_AUTO_END;
}
UBool
@ -149,8 +146,13 @@ void MessageFormatRegressionTest::Test4074764() {
/* @bug 4058973
* MessageFormat.toPattern has weird rounding behavior.
*
* ICU 4.8: This test is commented out because toPattern() has been changed to return
* the original pattern string, rather than reconstituting a new (equivalent) one.
* This trivially eliminates issues with rounding or any other pattern string differences.
*/
void MessageFormatRegressionTest::Test4058973()
/*
void MessageFormatRegressionTest::Test4058973()
{
UErrorCode status = U_ZERO_ERROR;
MessageFormat *fmt = new MessageFormat("{0,choice,0#no files|1#one file|1< {0,number,integer} files}", status);
@ -166,7 +168,7 @@ void MessageFormatRegressionTest::Test4058973()
}
delete fmt;
}
}*/
/* @bug 4031438
* More robust message formats.
*/
@ -258,7 +260,7 @@ void MessageFormatRegressionTest::Test4031438()
failure(status, "messageFormatter->applyPattern", possibleDataError);
tempBuffer.remove();
tempBuffer = messageFormatter->format(params, 1, tempBuffer, pos, status);
if (tempBuffer != "Double ' Quotes 7 test and quoted {1} test plus other {2} stuff.")
if (tempBuffer != "Double ' Quotes 7 test and quoted {1} test plus 'other {2} stuff'.")
dataerrln("quote format test (w/ params) failed. - %s", u_errorName(status));
logln("Formatted with params : " + tempBuffer);
@ -911,12 +913,21 @@ void MessageFormatRegressionTest::Test4142938()
*/
void MessageFormatRegressionTest::TestChoicePatternQuote()
{
// ICU 4.8 ChoiceFormat (like PluralFormat & SelectFormat)
// returns the chosen string unmodified, so that it is usable in a MessageFormat.
// We modified the test strings accordingly.
// Note: Without further formatting/trimming/etc., it is not possible
// to get a single apostrophe as the last character of a non-final choice sub-message
// because the single apostrophe before the pipe '|' would start quoted text.
// Normally, ChoiceFormat is used inside a MessageFormat, where a double apostrophe
// can be used in that case and will be formatted as a single one.
// (Better: Use a "real" apostrophe, U+2019.)
UnicodeString DATA [] = {
// Pattern 0 value 1 value
// {sfb} hacked - changed \u2264 to = (copied from Character Map)
(UnicodeString)"0#can''t|1#can", (UnicodeString)"can't", (UnicodeString)"can",
(UnicodeString)"0#'pound(#)=''#'''|1#xyz", (UnicodeString)"pound(#)='#'", (UnicodeString)"xyz",
(UnicodeString)"0#'1<2 | 1=1'|1#''", (UnicodeString)"1<2 | 1=1", (UnicodeString)"'",
"0#can't|1#can", "can't", "can",
"0#pound(#)='#''|1#xyz", "pound(#)='#''", "xyz",
"0#1<2 '| 1=1'|1#'", "1<2 '| 1=1'", "'",
};
for (int i=0; i<9; i+=3) {
//try {
@ -929,7 +940,7 @@ void MessageFormatRegressionTest::TestChoicePatternQuote()
out = cf->format((double)j, out, pos);
if (out != DATA[i+1+j])
errln("Fail: Pattern \"" + DATA[i] + "\" x "+j+" -> " +
out + "; want \"" + DATA[i+1+j] + '"');
out + "; want \"" + DATA[i+1+j] + "\"");
}
UnicodeString pat;
pat = cf->toPattern(pat);
@ -937,9 +948,9 @@ void MessageFormatRegressionTest::TestChoicePatternQuote()
ChoiceFormat *cf2 = new ChoiceFormat(pat, status);
pat2 = cf2->toPattern(pat2);
if (pat != pat2)
errln("Fail: Pattern \"" + DATA[i] + "\" x toPattern -> \"" + pat + '"');
errln("Fail: Pattern \"" + DATA[i] + "\" x toPattern -> \"" + pat + "\"");
else
logln("Ok: Pattern \"" + DATA[i] + "\" x toPattern -> \"" + pat + '"');
logln("Ok: Pattern \"" + DATA[i] + "\" x toPattern -> \"" + pat + "\"");
/*}
catch (IllegalArgumentException e) {
errln("Fail: Pattern \"" + DATA[i] + "\" -> " + e);
@ -980,12 +991,12 @@ void MessageFormatRegressionTest::TestAPI() {
// Test adoptFormat
MessageFormat *fmt = new MessageFormat("",status);
format->adoptFormat("",fmt,status);
format->adoptFormat("some_name",fmt,status); // Must at least pass a valid identifier.
failure(status, "adoptFormat");
// Test getFormat
format->setFormat((int32_t)0,*fmt);
format->getFormat("",status);
format->getFormat("some_other_name",status); // Must at least pass a valid identifier.
failure(status, "getFormat");
delete format;
}

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 2007-2010, International Business Machines Corporation and
* Copyright (c) 2007-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -35,6 +35,8 @@ void PluralFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n
TESTCASE(0, pluralFormatBasicTest);
TESTCASE(1, pluralFormatUnitTest);
TESTCASE(2, pluralFormatLocaleTest);
TESTCASE(3, pluralFormatExtendedTest);
TESTCASE(4, pluralFormatExtendedParseTest);
default: name = "";
break;
}
@ -159,26 +161,28 @@ void PluralFormatTest::pluralFormatUnitTest(/*char *par*/)
UNICODE_STRING_SIMPLE("odd {# is odd.} other{# is even.}"),
UNICODE_STRING_SIMPLE("other{# is odd or even.}"),
UNICODE_STRING_SIMPLE("odd{The number {0, number, #.#0} is odd.}other{The number {0, number, #.#0} is even.}"),
UNICODE_STRING_SIMPLE("odd{The number {#} is odd.}other{The number {#} is even.}"),
UNICODE_STRING_SIMPLE("odd{The number {1, number, #} is odd.}other{The number {2, number, #} is even.}"),
};
UnicodeString patternOddTestResult[PLURAL_PATTERN_DATA] = {
UNICODE_STRING_SIMPLE(" is odd."),
UNICODE_STRING_SIMPLE(" is odd or even."),
UNICODE_STRING_SIMPLE("The number {0, number, #.#0} is odd."),
UNICODE_STRING_SIMPLE("The number {#} is odd."),
UNICODE_STRING_SIMPLE("The number {1, number, #} is odd."),
};
UnicodeString patternEvenTestResult[PLURAL_PATTERN_DATA] = {
UNICODE_STRING_SIMPLE(" is even."),
UNICODE_STRING_SIMPLE(" is odd or even."),
UNICODE_STRING_SIMPLE("The number {0, number, #.#0} is even."),
UNICODE_STRING_SIMPLE("The number {#} is even."),
UNICODE_STRING_SIMPLE("The number {2, number, #} is even."),
};
UnicodeString checkSyntaxtData[PLURAL_SYNTAX_DATA] = {
UNICODE_STRING_SIMPLE("odd{foo} odd{bar} other{foobar}"),
UNICODE_STRING_SIMPLE("odd{foo} other{bar} other{foobar}"),
// ICU 4.8 does not check for duplicate keywords any more.
//UNICODE_STRING_SIMPLE("odd{foo} odd{bar} other{foobar}"),
//UNICODE_STRING_SIMPLE("odd{foo} other{bar} other{foobar}"),
UNICODE_STRING_SIMPLE("odd{foo}"),
UNICODE_STRING_SIMPLE("otto{foo} other{bar}"),
UNICODE_STRING_SIMPLE("1odd{foo} other{bar}"),
// ICU 4.8 does not check for unknown keywords any more.
//UNICODE_STRING_SIMPLE("otto{foo} other{bar}"),
UNICODE_STRING_SIMPLE("*odd{foo} other{bar}"),
UNICODE_STRING_SIMPLE("odd{foo},other{bar}"),
UNICODE_STRING_SIMPLE("od d{foo} other{bar}"),
UNICODE_STRING_SIMPLE("odd{foo}{foobar}other{foo}"),
@ -264,7 +268,7 @@ void PluralFormatTest::pluralFormatUnitTest(/*char *par*/)
}
numberFormatTest(&pluralFmt, numFmt, 5, 5, NULL, NULL, FALSE, &message);
pluralFmt.applyPattern(UNICODE_STRING_SIMPLE("odd__{odd} other{even}"), status);
if (U_SUCCESS(status)) {
if (pluralFmt.format(1, status) != UNICODE_STRING_SIMPLE("even")) {
errln("SetLocale should reset rules but did not.");
}
status = U_ZERO_ERROR;
@ -491,6 +495,73 @@ PluralFormatTest::pluralFormatLocaleTest(/*char *par*/)
}
}
void
PluralFormatTest::pluralFormatExtendedTest(void) {
const char *targets[] = {
"There are no widgets.",
"There is one widget.",
"There is a bling widget and one other widget.",
"There is a bling widget and 2 other widgets.",
"There is a bling widget and 3 other widgets.",
"Widgets, five (5-1=4) there be.",
"There is a bling widget and 5 other widgets.",
"There is a bling widget and 6 other widgets.",
};
const char* fmt =
"offset:1.0 "
"=0 {There are no widgets.} "
"=1.0 {There is one widget.} "
"=5 {Widgets, five (5-1=#) there be.} "
"one {There is a bling widget and one other widget.} "
"other {There is a bling widget and # other widgets.}";
UErrorCode status = U_ZERO_ERROR;
UnicodeString fmtString(fmt, -1, US_INV);
PluralFormat pf(fmtString, status);
if (U_FAILURE(status)) {
errln("Failed to apply pattern - %s\n", u_errorName(status));
return;
}
for (int i = 0; i < 7; ++i) {
UnicodeString result = pf.format(i, status);
if (U_FAILURE(status)) {
errln("Failed to format - %s\n", u_errorName(status));
}
UnicodeString expected(targets[i], -1, US_INV);
if (expected != result) {
UnicodeString message("Expected '", -1, US_INV);
message.append(expected);
message.append(UnicodeString("' but got '", -1, US_INV));
message.append(result);
message.append("'", -1, US_INV);
errln(message);
return;
}
}
}
void
PluralFormatTest::pluralFormatExtendedParseTest(void) {
const char *failures[] = {
"offset:1..0 =0 {Foo}",
"offset:1.0 {Foo}",
"=0= {Foo}",
"=0 {Foo} =0.0 {Bar}",
" = {Foo}",
};
int len = sizeof(failures)/sizeof(failures[0]);
for (int i = 0; i < len; ++i) {
UErrorCode status = U_ZERO_ERROR;
UnicodeString fmt(failures[i], -1, US_INV);
PluralFormat pf(fmt, status);
if (U_SUCCESS(status)) {
errln("expected failure when parsing '" + fmt + "'");
}
}
}
void
PluralFormatTest::numberFormatTest(PluralFormat* plFmt,
NumberFormat *numFmt,

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2001, International Business Machines Corporation and
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -29,6 +29,8 @@ private:
void pluralFormatBasicTest(/* char* par */);
void pluralFormatUnitTest(/* char* par */);
void pluralFormatLocaleTest(/* char* par */);
void pluralFormatExtendedTest();
void pluralFormatExtendedParseTest();
void numberFormatTest(PluralFormat* plFmt,
NumberFormat *numFmt,
int32_t start,

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2010, International Business Machines Corporation and
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
* Copyright (C) 2010 , Yahoo! Inc.
********************************************************************/
@ -12,7 +12,8 @@
#include "selfmts.h"
#include "cmemory.h"
#include "unicode/selfmt.h"
#include "stdio.h"
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
#define SIMPLE_PATTERN_STRING "feminine {feminineVerbValue} other{otherVerbValue}"
@ -81,10 +82,8 @@ void SelectFormatTest::selectFormatUnitTest(/*char *par*/)
};
UnicodeString checkSyntaxData[SELECT_SYNTAX_DATA] = {
UNICODE_STRING_SIMPLE("odd{foo} odd{bar} other{foobar}"),
UNICODE_STRING_SIMPLE("odd{foo} other{bar} other{foobar}"),
UNICODE_STRING_SIMPLE("odd{foo}"),
UNICODE_STRING_SIMPLE("1odd{foo} other{bar}"),
UNICODE_STRING_SIMPLE("*odd{foo} other{bar}"),
UNICODE_STRING_SIMPLE("odd{foo},other{bar}"),
UNICODE_STRING_SIMPLE("od d{foo} other{bar}"),
UNICODE_STRING_SIMPLE("odd{foo}{foobar}other{foo}"),
@ -93,19 +92,6 @@ void SelectFormatTest::selectFormatUnitTest(/*char *par*/)
UNICODE_STRING_SIMPLE("odd{fo{o1}other{foo2}}")
};
UErrorCode expErrorCodes[SELECT_SYNTAX_DATA]={
U_DUPLICATE_KEYWORD,
U_DUPLICATE_KEYWORD,
U_DEFAULT_KEYWORD_MISSING,
U_PATTERN_SYNTAX_ERROR,
U_PATTERN_SYNTAX_ERROR,
U_PATTERN_SYNTAX_ERROR,
U_PATTERN_SYNTAX_ERROR,
U_PATTERN_SYNTAX_ERROR,
U_PATTERN_SYNTAX_ERROR,
U_DEFAULT_KEYWORD_MISSING
};
UErrorCode status = U_ZERO_ERROR;
VERBOSE_USTRING(SIMPLE_PATTERN);
SelectFormat* selFmt = new SelectFormat( SIMPLE_PATTERN , status);
@ -113,7 +99,7 @@ void SelectFormatTest::selectFormatUnitTest(/*char *par*/)
dataerrln("ERROR: SelectFormat Unit Test constructor failed in unit tests.- exitting");
return;
}
// ======= Test SelectFormat pattern syntax.
logln("SelectFormat Unit Test : Testing SelectFormat pattern syntax.");
for (int32_t i=0; i<SELECT_SYNTAX_DATA; ++i) {
@ -121,11 +107,23 @@ void SelectFormatTest::selectFormatUnitTest(/*char *par*/)
VERBOSE_INT(i);
VERBOSE_USTRING(checkSyntaxData[i]);
selFmt->applyPattern(checkSyntaxData[i], status);
if( status!= expErrorCodes[i] ){
errln("\nERROR: Unexpected result - SelectFormat Unit Test failed to detect syntax error with pattern: "+checkSyntaxData[i]+" and expected status="+ u_errorName(expErrorCodes[i]) + " and resulted status="+u_errorName(status));
if (U_SUCCESS(status)){
errln("\nERROR: Unexpected result - SelectFormat Unit Test failed to detect syntax error with pattern: "+checkSyntaxData[i]);
}
}
// ICU 4.8 does not check for duplicate keywords any more.
status = U_ZERO_ERROR;
selFmt->applyPattern("odd{foo} odd{bar} other{foobar}", status);
FieldPosition format_ignore(FieldPosition::DONT_CARE);
UnicodeString format_result;
selFmt->format(UnicodeString("odd"), format_result, format_ignore, status);
assertEquals("should use first occurrence of the 'odd' keyword", "foo", format_result);
format_result.remove();
selFmt->applyPattern("odd{foo} other{bar} other{foobar}", status);
selFmt->format(UnicodeString("other"), format_result, format_ignore, status);
assertEquals("should use first occurrence of the 'other' keyword", "bar", format_result);
delete selFmt;
selFmt = NULL;
@ -166,27 +164,31 @@ void SelectFormatTest::selectFormatUnitTest(/*char *par*/)
}
//Test with an invalid keyword
// one which contains Pattern_Syntax or Pattern_White_Space.
logln("SelectFormat Unit test: Testing format() with keyword method and with invalid keywords...");
status = U_ZERO_ERROR;
result.remove();
UnicodeString keywords[] = {
"9Keyword-_", //Starts with a digit
"-Keyword-_", //Starts with a hyphen
"_Keyword-_", //Starts with a underscore
"\\u00E9Keyword-_", //Starts with non-ASCII character
"Key*word-_", //Contains a sepial character not allowed
"*Keyword-_" //Starts with a sepial character not allowed
"9Keyword-_",
"-Keyword-_",
"_Keyword-_",
"\\u00E9Keyword-_",
"Key word-_",
" Keyword-_",
"Key*word-_",
"*Keyword-_"
};
delete selFmt;
selFmt = NULL;
selFmt = new SelectFormat( SIMPLE_PATTERN , status);
for (int32_t i = 0; i< 6; i++ ){
for (int32_t i = 0; i < LENGTHOF(keywords); i++ ){
status = U_ZERO_ERROR;
selFmt->format( keywords[i], result , ignore , status);
if (!U_FAILURE(status)) {
errln("ERROR: SelectFormat Unit test failed in format() with keyWord and with an invalid keyword as : "+ keywords[i]);
errln("ERROR: SelectFormat Unit test failed in format() with keyWord and with an invalid keyword as : "+
keywords[i]+" ("+u_errorName(status)+")");
}
}

View file

@ -1,7 +1,7 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2009, International Business Machines Corporation and
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -80,11 +80,23 @@ TestChoiceFormat::TestSimpleExample( void )
}
delete formequal;
delete formnew;
//Testing getLimits()
double *gotLimits=0;
int32_t count=0;
gotLimits=(double*)form->getLimits(count);
const double *gotLimits=form->getLimits(count);
#if 1 // ICU 4.8 deprecates and disables the ChoiceFormat getters.
if(count != 0 || gotLimits != NULL) {
errln("getLimits() returns something, should be disabled");
}
const UnicodeString *gotFormats=form->getFormats(count);
if(count != 0 || gotFormats != NULL) {
errln("getFormats() returns something, should be disabled");
}
const UBool *gotClosures=form->getClosures(count);
if(count != 0 || gotClosures != NULL) {
errln("getClosures() returns something, should be disabled");
}
#else
if(count != 7){
errln("getLimits didn't update the count correctly\n");
}
@ -93,10 +105,9 @@ TestChoiceFormat::TestSimpleExample( void )
errln((UnicodeString)"getLimits didn't get the limits correctly. Expected " + limits[ix] + " Got " + gotLimits[ix]);
}
}
//Testing getFormat()
//Testing getFormats()
count=0;
UnicodeString *gotFormats=0;
gotFormats=(UnicodeString*)form->getFormats(count);
const UnicodeString *gotFormats=form->getFormats(count);
if(count != 7){
errln("getFormats didn't update the count correctly\n");
}
@ -105,10 +116,9 @@ TestChoiceFormat::TestSimpleExample( void )
errln((UnicodeString)"getFormats didn't get the Formats correctly. Expected " + monthNames[ix] + " Got " + gotFormats[ix]);
}
}
#endif
delete form;
}
void
@ -216,6 +226,7 @@ TestChoiceFormat::TestComplexExample( void )
it_logln("------ additional testing in complex test ------");
it_logln();
//
#if 0 // ICU 4.8 deprecates and disables the ChoiceFormat getters.
int32_t retCount;
const double* retLimits = fileform->getLimits( retCount );
if ((retCount == 4) && (retLimits)
@ -238,6 +249,7 @@ TestChoiceFormat::TestComplexExample( void )
}else{
it_errln("*** getFormats unexpected result!");
}
#endif
UnicodeString checkstr2[] = {
"There is no folder on Disk_A",
@ -486,6 +498,7 @@ void TestChoiceFormat::TestClosures(void) {
errln("FAIL: fmt1 != fmt2");
}
#if 0 // ICU 4.8 deprecates and disables the ChoiceFormat getters.
int32_t i;
int32_t count2 = 0;
const double *limits2 = fmt2.getLimits(count2);
@ -507,6 +520,7 @@ void TestChoiceFormat::TestClosures(void) {
}
}
}
#endif
// Now test both format objects
UnicodeString exp[] = {
@ -596,6 +610,7 @@ void TestChoiceFormat::TestPatterns(void) {
1.0, "b",
1.0 + 1e-9, "c");
#if 0 // ICU 4.8 only checks the pattern syntax, not whether the ranges make sense.
// Try an invalid pattern that isolates a single value.
// [-Inf,1.0) [1.0,1.0) [1.0,+Inf]
_testPattern("0.0#a|1.0#b|1.0#c", FALSE,
@ -614,6 +629,7 @@ void TestChoiceFormat::TestPatterns(void) {
// [-Inf,2.0) [2.0,1.0) [1.0,+Inf]
_testPattern("0.0#a|2.0#b|1.0#c", FALSE,
0, 0, 0, 0, 0, 0);
#endif
}
void TestChoiceFormat::TestChoiceFormatToPatternOverflow()

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2010, International Business Machines Corporation and
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************
* File TMSGFMT.CPP
@ -25,43 +25,46 @@
#include "unicode/msgfmt.h"
#include "unicode/numfmt.h"
#include "unicode/choicfmt.h"
#include "unicode/messagepattern.h"
#include "unicode/selfmt.h"
#include "unicode/gregocal.h"
#include <stdio.h>
#define E_WITH_ACUTE ((char)0x00E9)
static const char E_ACCENTED[]={E_WITH_ACUTE,0};
#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
void
TestMessageFormat::runIndexedTest(int32_t index, UBool exec,
const char* &name, char* /*par*/) {
switch (index) {
TESTCASE(0,testBug1);
TESTCASE(1,testBug2);
TESTCASE(2,sample);
TESTCASE(3,PatternTest);
TESTCASE(4,testStaticFormat);
TESTCASE(5,testSimpleFormat);
TESTCASE(6,testMsgFormatChoice);
TESTCASE(7,testCopyConstructor);
TESTCASE(8,testAssignment);
TESTCASE(9,testClone);
TESTCASE(10,testEquals);
TESTCASE(11,testNotEquals);
TESTCASE(12,testSetLocale);
TESTCASE(13,testFormat);
TESTCASE(14,testParse);
TESTCASE(15,testAdopt);
TESTCASE(16,testCopyConstructor2);
TESTCASE(17,TestUnlimitedArgsAndSubformats);
TESTCASE(18,TestRBNF);
TESTCASE(19,TestTurkishCasing);
TESTCASE(20,testAutoQuoteApostrophe);
TESTCASE(21,testMsgFormatPlural);
TESTCASE(22,testCoverage);
TESTCASE(23,testMsgFormatSelect);
default: name = ""; break;
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testBug1);
TESTCASE_AUTO(testBug2);
TESTCASE_AUTO(sample);
TESTCASE_AUTO(PatternTest);
TESTCASE_AUTO(testStaticFormat);
TESTCASE_AUTO(testSimpleFormat);
TESTCASE_AUTO(testMsgFormatChoice);
TESTCASE_AUTO(testCopyConstructor);
TESTCASE_AUTO(testAssignment);
TESTCASE_AUTO(testClone);
TESTCASE_AUTO(testEquals);
TESTCASE_AUTO(testNotEquals);
TESTCASE_AUTO(testSetLocale);
TESTCASE_AUTO(testFormat);
TESTCASE_AUTO(testParse);
TESTCASE_AUTO(testAdopt);
TESTCASE_AUTO(testCopyConstructor2);
TESTCASE_AUTO(TestUnlimitedArgsAndSubformats);
TESTCASE_AUTO(TestRBNF);
TESTCASE_AUTO(TestTurkishCasing);
TESTCASE_AUTO(testAutoQuoteApostrophe);
TESTCASE_AUTO(testMsgFormatPlural);
TESTCASE_AUTO(testMsgFormatSelect);
TESTCASE_AUTO(testApostropheInPluralAndSelect);
TESTCASE_AUTO(TestApostropheMode);
TESTCASE_AUTO(TestCompatibleApostrophe);
TESTCASE_AUTO(testCoverage);
TESTCASE_AUTO(TestTrimArgumentName);
TESTCASE_AUTO_END;
}
void TestMessageFormat::testBug3()
@ -261,7 +264,9 @@ void TestMessageFormat::PatternTest()
"'{1,number,#,##}' {1,number,#,##}",
};
UnicodeString testResultPatterns[] = {
// ICU 4.8 returns the original pattern (testCases),
// rather than toPattern() reconstituting a new, equivalent pattern string (testResultPatterns).
/*UnicodeString testResultPatterns[] = {
"Quotes '', '{', a {0} '{'0}",
"Quotes '', '{', a {0,number} '{'0}",
"'{'1,number,#,##} {1,number,'#'#,##}",
@ -271,12 +276,12 @@ void TestMessageFormat::PatternTest()
"'{'1,date,full}, {1,date,full},",
"'{'3,date,full}, {3,date,full},",
"'{'1,number,#,##} {1,number,#,##}"
};
};*/
UnicodeString testResultStrings[] = {
"Quotes ', {, a 1 {0}",
"Quotes ', {, a 1 {0}",
"{1,number,#,##} #34,56",
"Quotes ', {, 'a' 1 {0}",
"Quotes ', {, 'a' 1 {0}",
"{1,number,'#',##} #34,56",
"There are 3,456 files on Disk at 1/12/70 5:46 AM.",
"On Disk, there are 3,456 files, with $1.00.",
"{1,number,percent}, 345,600%,",
@ -298,11 +303,17 @@ void TestMessageFormat::PatternTest()
logln(((UnicodeString)"MessageFormat for ") + testCases[i] + " creation failed.\n");
continue;
}
if (form->toPattern(buffer) != testResultPatterns[i]) {
// ICU 4.8 returns the original pattern (testCases),
// rather than toPattern() reconstituting a new, equivalent pattern string (testResultPatterns).
if (form->toPattern(buffer) != testCases[i]) {
// Note: An alternative test would be to build MessagePattern objects for
// both the input and output patterns and compare them, taking SKIP_SYNTAX etc.
// into account.
// (Too much trouble...)
errln(UnicodeString("TestMessageFormat::PatternTest failed test #2, i = ") + i);
//form->toPattern(buffer);
errln(((UnicodeString)" Orig: ") + testCases[i]);
errln(((UnicodeString)" Exp: ") + testResultPatterns[i]);
errln(((UnicodeString)" Exp: ") + testCases[i]);
errln(((UnicodeString)" Got: ") + buffer);
}
@ -322,7 +333,7 @@ void TestMessageFormat::PatternTest()
logln(UnicodeString(" Result: ") + result );
logln(UnicodeString(" Expected: ") + testResultStrings[i] );
}
//it_out << "Result: " << result);
#if 0
@ -534,7 +545,7 @@ void TestMessageFormat::testMsgFormatPlural(/* char* par */)
UnicodeString t2("{argument, plural, one{C''est # fichier} other {Ce sont # fichiers}} dans la liste.");
UnicodeString t3("There {0, plural, one{is # zavod}few{are {0, number,###.0} zavoda} other{are # zavodov}} in the directory.");
UnicodeString t4("There {argument, plural, one{is # zavod}few{are {argument, number,###.0} zavoda} other{are #zavodov}} in the directory.");
UnicodeString t5("{0, plural, one {{0, number,C''''est #,##0.0# fichier}} other {Ce sont # fichiers}} dans la liste.");
UnicodeString t5("{0, plural, one {{0, number,C''est #,##0.0# fichier}} other {Ce sont # fichiers}} dans la liste.");
MessageFormat* mfNum = new MessageFormat(t1, Locale("fr"), err);
if (U_FAILURE(err)) {
dataerrln("TestMessageFormat::testMsgFormatPlural #1 - argumentIndex - %s", u_errorName(err));
@ -611,15 +622,32 @@ void TestMessageFormat::testMsgFormatPlural(/* char* par */)
errln("TestMessageFormat::test nested PluralFormat with argumentName");
}
if ( argNameResult!= UnicodeString("C'est 0,0 fichier dans la liste.")) {
errln(UnicodeString("TestMessageFormat::test nested named PluralFormat."));
errln(UnicodeString("TestMessageFormat::test nested named PluralFormat: ") + argNameResult);
logln(UnicodeString("The unexpected nested named PluralFormat."));
}
delete msgFmt;
}
void TestMessageFormat::testApostropheInPluralAndSelect() {
UErrorCode errorCode = U_ZERO_ERROR;
MessageFormat msgFmt(UNICODE_STRING_SIMPLE(
"abc_{0,plural,other{#'#'#'{'#''}}_def_{1,select,other{sel'}'ect''}}_xyz"),
Locale::getEnglish(),
errorCode);
if (U_FAILURE(errorCode)) {
errln("MessageFormat constructor failed - %s\n", u_errorName(errorCode));
return;
}
UnicodeString expected = UNICODE_STRING_SIMPLE("abc_3#3{3'_def_sel}ect'_xyz");
Formattable args[] = { 3, UNICODE_STRING_SIMPLE("x") };
internalFormat(
&msgFmt, args, 2, expected,
"MessageFormat with apostrophes in plural/select arguments failed:\n");
}
void TestMessageFormat::internalFormat(MessageFormat* msgFmt ,
Formattable* args , int32_t numOfArgs ,
UnicodeString expected ,char* errMsg)
UnicodeString expected, const char* errMsg)
{
UnicodeString result;
FieldPosition ignore(FieldPosition::DONT_CARE);
@ -1236,7 +1264,12 @@ void TestMessageFormat::testAdopt()
}
assertEquals("msgCmp.toPattern()", formatStr, msgCmp.toPattern(patCmp.remove()));
assertEquals("msg.toPattern()", formatStr, msg.toPattern(patAct.remove()));
// ICU 4.8 does not support toPattern() when there are custom formats (from setFormat() etc.).
// assertEquals("msg.toPattern()", formatStr, msg.toPattern(patAct.remove()));
msg.toPattern(patCmp.remove());
if (!patCmp.isBogus()) {
errln("msg.setFormat().toPattern() succeeds.");
}
for (i = 0; i < countAct; i++) {
a = formatsAct[i];
@ -1279,7 +1312,8 @@ void TestMessageFormat::testAdopt()
delete[] formatsToAdopt;
assertEquals("msgCmp.toPattern()", formatStr, msgCmp.toPattern(patCmp.remove()));
assertEquals("msg.toPattern()", formatStr, msg.toPattern(patAct.remove()));
// ICU 4.8 does not support toPattern() when there are custom formats (from setFormat() etc.).
// assertEquals("msg.toPattern()", formatStr, msg.toPattern(patAct.remove()));
formatsAct = msg.getFormats(countAct);
if (!formatsAct || (countAct <=0) || (countAct != countCmp)) {
@ -1330,7 +1364,8 @@ void TestMessageFormat::testAdopt()
delete[] formatsToAdopt; // array itself not needed in this case;
assertEquals("msgCmp.toPattern()", formatStr, msgCmp.toPattern(patCmp.remove()));
assertEquals("msg.toPattern()", formatStr, msg.toPattern(patAct.remove()));
// ICU 4.8 does not support toPattern() when there are custom formats (from setFormat() etc.).
// assertEquals("msg.toPattern()", formatStr, msg.toPattern(patAct.remove()));
formatsAct = msg.getFormats(countAct);
if (!formatsAct || (countAct <=0) || (countAct != countCmp)) {
@ -1519,6 +1554,116 @@ void TestMessageFormat::TestRBNF(void) {
delete numFmt;
}
UnicodeString TestMessageFormat::GetPatternAndSkipSyntax(const MessagePattern& pattern) {
UnicodeString us(pattern.getPatternString());
int count = pattern.countParts();
for (int i = count; i > 0;) {
const MessagePattern::Part& part = pattern.getPart(--i);
if (part.getType() == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
us.remove(part.getIndex(), part.getLimit() - part.getIndex());
}
}
return us;
}
void TestMessageFormat::TestApostropheMode() {
UErrorCode status = U_ZERO_ERROR;
MessagePattern *ado_mp = new MessagePattern(UMSGPAT_APOS_DOUBLE_OPTIONAL, status);
MessagePattern *adr_mp = new MessagePattern(UMSGPAT_APOS_DOUBLE_REQUIRED, status);
if (ado_mp->getApostropheMode() != UMSGPAT_APOS_DOUBLE_OPTIONAL) {
errln("wrong value from ado_mp->getApostropheMode().");
}
if (adr_mp->getApostropheMode() != UMSGPAT_APOS_DOUBLE_REQUIRED) {
errln("wrong value from adr_mp->getApostropheMode().");
}
UnicodeString tuples[] = {
// Desired output
// DOUBLE_OPTIONAL pattern
// DOUBLE_REQUIRED pattern (empty=same as DOUBLE_OPTIONAL)
"I see {many}", "I see '{many}'", "",
"I said {'Wow!'}", "I said '{''Wow!''}'", "",
"I dont know", "I dont know", "I don't know",
"I don't know", "I don't know", "I don''t know",
"I don't know", "I don''t know", "I don''t know"
};
int32_t tuples_count = LENGTHOF(tuples);
for (int i = 0; i < tuples_count; i += 3) {
UnicodeString& desired = tuples[i];
UnicodeString& ado_pattern = tuples[i + 1];
UErrorCode status = U_ZERO_ERROR;
assertEquals("DOUBLE_OPTIONAL failure",
desired,
GetPatternAndSkipSyntax(ado_mp->parse(ado_pattern, NULL, status)));
UnicodeString& adr_pattern = tuples[i + 2].isEmpty() ? ado_pattern : tuples[i + 2];
assertEquals("DOUBLE_REQUIRED failure", desired,
GetPatternAndSkipSyntax(adr_mp->parse(adr_pattern, NULL, status)));
}
delete adr_mp;
delete ado_mp;
}
// Compare behavior of DOUBLE_OPTIONAL (new default) and DOUBLE_REQUIRED JDK-compatibility mode.
void TestMessageFormat::TestCompatibleApostrophe() {
// Message with choice argument which does not contain another argument.
// The JDK performs only one apostrophe-quoting pass on this pattern.
UnicodeString pattern = "ab{0,choice,0#1'2''3'''4''''.}yz";
UErrorCode ec = U_ZERO_ERROR;
MessageFormat compMsg("", Locale::getUS(), ec);
compMsg.applyPattern(pattern, UMSGPAT_APOS_DOUBLE_REQUIRED, NULL, ec);
if (compMsg.getApostropheMode() != UMSGPAT_APOS_DOUBLE_REQUIRED) {
errln("wrong value from compMsg.getApostropheMode().");
}
MessageFormat icuMsg("", Locale::getUS(), ec);
icuMsg.applyPattern(pattern, UMSGPAT_APOS_DOUBLE_OPTIONAL, NULL, ec);
if (icuMsg.getApostropheMode() != UMSGPAT_APOS_DOUBLE_OPTIONAL) {
errln("wrong value from icuMsg.getApostropheMode().");
}
Formattable zero0[] = { 0 };
FieldPosition fieldpos(0);
UnicodeString buffer1, buffer2;
assertEquals("incompatible ICU MessageFormat compatibility-apostrophe behavior",
"ab12'3'4''.yz",
compMsg.format(zero0, 1, buffer1, fieldpos, ec));
assertEquals("unexpected ICU MessageFormat double-apostrophe-optional behavior",
"ab1'2'3''4''.yz",
icuMsg.format(zero0, 1, buffer2, fieldpos, ec));
// Message with choice argument which contains a nested simple argument.
// The DOUBLE_REQUIRED version performs two apostrophe-quoting passes.
buffer1.remove();
buffer2.remove();
pattern = "ab{0,choice,0#1'2''3'''4''''.{0,number,'#x'}}yz";
compMsg.applyPattern(pattern, ec);
icuMsg.applyPattern(pattern, ec);
assertEquals("incompatible ICU MessageFormat compatibility-apostrophe behavior",
"ab1234'.0xyz",
compMsg.format(zero0, 1, buffer1, fieldpos, ec));
assertEquals("unexpected ICU MessageFormat double-apostrophe-optional behavior",
"ab1'2'3''4''.#x0yz",
icuMsg.format(zero0, 1, buffer2, fieldpos, ec));
// This part is copied over from Java tests but cannot be properly tested here
// because we do not have a live reference implementation with JDK behavior.
// The JDK ChoiceFormat itself always performs one apostrophe-quoting pass.
/*
ChoiceFormat choice = new ChoiceFormat("0#1'2''3'''4''''.");
assertEquals("unexpected JDK ChoiceFormat apostrophe behavior",
"12'3'4''.",
choice.format(0));
choice.applyPattern("0#1'2''3'''4''''.{0,number,'#x'}");
assertEquals("unexpected JDK ChoiceFormat apostrophe behavior",
"12'3'4''.{0,number,#x}",
choice.format(0));
*/
}
void TestMessageFormat::testAutoQuoteApostrophe(void) {
const char* patterns[] = { // pattern, expected pattern
"'", "''",
@ -1595,7 +1740,10 @@ void TestMessageFormat::testCoverage(void) {
}
}
msgfmt->adoptFormat("adopt", &cf, status);
// adoptFormat() takes ownership of the input Format object.
// We need to clone the stack-allocated cf so that we do not attempt to delete cf.
Format *cfClone = cf.clone();
msgfmt->adoptFormat("adopt", cfClone, status);
delete en;
delete msgfmt;
@ -1609,18 +1757,38 @@ void TestMessageFormat::testCoverage(void) {
errln("FAIL: Unable to detect usage of named arguments.");
}
// Starting with ICU 4.8, we support setFormat(name, ...) and getFormatNames()
// on a MessageFormat without named arguments.
msgfmt->setFormat("formatName", cf, status);
if (!U_FAILURE(status)) {
errln("FAIL: Should fail to setFormat instead of passing.");
if (U_FAILURE(status)) {
errln("FAIL: Should work to setFormat(name, ...) regardless of pattern.");
}
status = U_ZERO_ERROR;
en = msgfmt->getFormatNames(status);
if (!U_FAILURE(status)) {
errln("FAIL: Should fail to get format names enumeration instead of passing.");
if (U_FAILURE(status)) {
errln("FAIL: Should work to get format names enumeration regardless of pattern.");
}
delete en;
delete msgfmt;
}
void TestMessageFormat::TestTrimArgumentName() {
// ICU 4.8 allows and ignores white space around argument names and numbers.
IcuTestErrorCode errorCode(*this, "TestTrimArgumentName");
MessageFormat m("a { 0 , number , '#,#'#.0 } z", Locale::getEnglish(), errorCode);
Formattable args[1] = { 2 };
FieldPosition ignore(0);
UnicodeString result;
assertEquals("trim-numbered-arg format() failed", "a #,#2.0 z",
m.format(args, 1, result, ignore, errorCode));
m.applyPattern("x { _oOo_ , number , integer } y", errorCode);
UnicodeString argName = UNICODE_STRING_SIMPLE("_oOo_");
args[0].setLong(3);
result.remove();
assertEquals("trim-named-arg format() failed", "x 3 y",
m.format(&argName, args, 1, result, errorCode));
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2010, International Business Machines Corporation and
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
#ifndef _TESTMESSAGEFORMAT
@ -65,12 +65,14 @@ public:
**/
void testMsgFormatSelect(/* char* par */);
void testApostropheInPluralAndSelect();
/**
* Internal method to format a MessageFormat object with passed args
**/
void internalFormat(MessageFormat* msgFmt ,
Formattable* args , int32_t numOfArgs ,
UnicodeString expected ,char* errMsg);
UnicodeString expected, const char* errMsg);
/**
* Internal method to create a MessageFormat object with passed args
@ -89,7 +91,10 @@ public:
*/
void TestRBNF();
//
void TestApostropheMode();
void TestCompatibleApostrophe();
/**
* ------------ API tests ----------
* These routines test various API functionality.
@ -108,11 +113,13 @@ public:
void testAdopt(void);
void TestTurkishCasing(void);
void testAutoQuoteApostrophe(void);
void TestTrimArgumentName();
/* Provide better code coverage */
void testCoverage(void);
private:
UnicodeString GetPatternAndSkipSyntax(const MessagePattern& pattern);
};
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2010, International Business Machines Corporation and
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -10,6 +10,7 @@
#include "unicode/putil.h"
#include "cstring.h"
#include "hash.h"
#include "patternprops.h"
#include "normalizer2impl.h"
#include "uparse.h"
#include "ucdtest.h"
@ -50,13 +51,15 @@ UnicodeTest::~UnicodeTest()
void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
{
if (exec) logln("TestSuite UnicodeTest: ");
switch (index) {
case 0: name = "TestAdditionalProperties"; if(exec) TestAdditionalProperties(); break;
case 1: name = "TestBinaryValues"; if(exec) TestBinaryValues(); break;
case 2: name = "TestConsistency"; if(exec) TestConsistency(); break;
default: name = ""; break; //needed to end loop
if(exec) {
logln("TestSuite UnicodeTest: ");
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(TestAdditionalProperties);
TESTCASE_AUTO(TestBinaryValues);
TESTCASE_AUTO(TestConsistency);
TESTCASE_AUTO(TestPatternProperties);
TESTCASE_AUTO_END;
}
//====================================================
@ -339,7 +342,7 @@ void UnicodeTest::TestConsistency() {
IcuTestErrorCode errorCode(*this, "TestConsistency");
const Normalizer2 *nfd=Normalizer2::getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode);
const Normalizer2Impl *nfcImpl=Normalizer2Factory::getNFCImpl(errorCode);
if(errorCode.isFailure()) {
if(!nfcImpl->ensureCanonIterData(errorCode) || errorCode.isFailure()) {
dataerrln("Normalizer2::getInstance(NFD) or Normalizer2Factory::getNFCImpl() failed - %s\n",
errorCode.errorName());
errorCode.reset();
@ -369,3 +372,57 @@ void UnicodeTest::TestConsistency() {
}
#endif
}
/**
* Test various implementations of Pattern_Syntax & Pattern_White_Space.
*/
void UnicodeTest::TestPatternProperties() {
IcuTestErrorCode errorCode(*this, "TestPatternProperties()");
UnicodeSet syn_pp;
UnicodeSet syn_prop(UNICODE_STRING_SIMPLE("[:Pattern_Syntax:]"), errorCode);
UnicodeSet syn_list(UNICODE_STRING_SIMPLE(
"[!-/\\:-@\\[-\\^`\\{-~"
"\\u00A1-\\u00A7\\u00A9\\u00AB\\u00AC\\u00AE\\u00B0\\u00B1\\u00B6\\u00BB\\u00BF\\u00D7\\u00F7"
"\\u2010-\\u2027\\u2030-\\u203E\\u2041-\\u2053\\u2055-\\u205E\\u2190-\\u245F\\u2500-\\u2775"
"\\u2794-\\u2BFF\\u2E00-\\u2E7F\\u3001-\\u3003\\u3008-\\u3020\\u3030\\uFD3E\\uFD3F\\uFE45\\uFE46]"), errorCode);
UnicodeSet ws_pp;
UnicodeSet ws_prop(UNICODE_STRING_SIMPLE("[:Pattern_White_Space:]"), errorCode);
UnicodeSet ws_list(UNICODE_STRING_SIMPLE("[\\u0009-\\u000D\\ \\u0085\\u200E\\u200F\\u2028\\u2029]"), errorCode);
UnicodeSet syn_ws_pp;
UnicodeSet syn_ws_prop(syn_prop);
syn_ws_prop.addAll(ws_prop);
for(UChar32 c=0; c<=0xffff; ++c) {
if(PatternProps::isSyntax(c)) {
syn_pp.add(c);
}
if(PatternProps::isWhiteSpace(c)) {
ws_pp.add(c);
}
if(PatternProps::isSyntaxOrWhiteSpace(c)) {
syn_ws_pp.add(c);
}
}
compareUSets(syn_pp, syn_prop,
"PatternProps.isSyntax()", "[:Pattern_Syntax:]", TRUE);
compareUSets(syn_pp, syn_list,
"PatternProps.isSyntax()", "[Pattern_Syntax ranges]", TRUE);
compareUSets(ws_pp, ws_prop,
"PatternProps.isWhiteSpace()", "[:Pattern_White_Space:]", TRUE);
compareUSets(ws_pp, ws_list,
"PatternProps.isWhiteSpace()", "[Pattern_White_Space ranges]", TRUE);
compareUSets(syn_ws_pp, syn_ws_prop,
"PatternProps.isSyntaxOrWhiteSpace()",
"[[:Pattern_Syntax:][:Pattern_White_Space:]]", TRUE);
}
// So far only minimal port of Java & cucdtst.c compareUSets().
UBool
UnicodeTest::compareUSets(const UnicodeSet &a, const UnicodeSet &b,
const char *a_name, const char *b_name,
UBool diffIsError) {
UBool same= a==b;
if(!same && diffIsError) {
errln("Sets are different: %s vs. %s\n", a_name, b_name);
}
return same;
}

View file

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2010, International Business Machines Corporation and
* Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
@ -36,6 +36,7 @@ public:
void TestAdditionalProperties();
void TestBinaryValues();
void TestConsistency();
void TestPatternProperties();
private:
@ -50,5 +51,8 @@ private:
UnicodeSet derivedProps[30];
U_NAMESPACE_QUALIFIER Hashtable *unknownPropertyNames;
};
UBool compareUSets(const UnicodeSet &a, const UnicodeSet &b,
const char *a_name, const char *b_name,
UBool diffIsError);
};

View file

@ -322,7 +322,6 @@ void UObjectTest::testIDs()
{
ids_count = 0;
UErrorCode status = U_ZERO_ERROR;
static const UChar SMALL_STR[] = {0x51, 0x51, 0x51, 0}; // "QQQ"
#if !UCONFIG_NO_TRANSLITERATION || !UCONFIG_NO_FORMATTING
UParseError parseError;
@ -364,6 +363,8 @@ void UObjectTest::testIDs()
TESTCLASSID_CTOR(DecimalFormatSymbols, (status));
TESTCLASSID_DEFAULT(FieldPosition);
TESTCLASSID_DEFAULT(Formattable);
static const UChar SMALL_STR[] = {0x51, 0x51, 0x51, 0}; // "QQQ"
TESTCLASSID_CTOR(CurrencyAmount, (1.0, SMALL_STR, status));
TESTCLASSID_CTOR(CurrencyUnit, (SMALL_STR, status));
TESTCLASSID_NONE_FACTORY(LocaleDisplayNames, LocaleDisplayNames::createInstance("de"));
@ -570,6 +571,7 @@ void UObjectTest::TestMFCCompatibility() {
}
void UObjectTest::TestCompilerRTTI() {
#if !UCONFIG_NO_FORMATTING
UErrorCode errorCode = U_ZERO_ERROR;
NumberFormat *nf = NumberFormat::createInstance("de", errorCode);
if (U_FAILURE(errorCode)) {
@ -587,6 +589,7 @@ void UObjectTest::TestCompilerRTTI() {
errln("typeid(NumberFormat) failed");
}
delete nf;
#endif
}
/* --------------- */

View file

@ -1,6 +1,6 @@
/*
**********************************************************************
* Copyright (C) 2004-2010, International Business Machines
* Copyright (C) 2004-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: filetst.c
@ -910,8 +910,8 @@ static void TestCodepage(void) {
}
static void TestCodepageFlush(void) {
#if UCONFIG_NO_LEGACY_CONVERSION
log_verbose("Skipping, legacy conversion is disabled.");
#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_FORMATTING
log_verbose("Skipping, legacy conversion or formatting is disabled.");
#else
UChar utf16String[] = { 0x39, 0x39, 0x39, 0x20, 0x65E0, 0x6CD6, 0x5728, 0x0000 };
uint8_t inBuf[200];