ICU-13569 refresh dev branch from trunk.

X-SVN-Rev: 40917
This commit is contained in:
Andy Heninger 2018-02-14 23:55:39 +00:00
parent fd77c49a2b
commit c25708b4c3
30 changed files with 480 additions and 237 deletions

1
.gitattributes vendored
View file

@ -49,7 +49,6 @@ README text !eol
*.tri2 -text
icu4c/icu4c.css -text
icu4c/packaging/distrelease.ps1 -text
icu4c/source/aclocal.m4 -text
icu4c/source/config/m4/icu-conditional.m4 -text
icu4c/source/data/curr/pool.res -text

2
.gitignore vendored
View file

@ -635,6 +635,8 @@ icu4c/source/tools/ctestfw/libsicutest*
icu4c/source/tools/ctestfw/release
icu4c/source/tools/ctestfw/x64
icu4c/source/tools/ctestfw/x86
icu4c/source/tools/escapesrc/*.d
icu4c/source/tools/escapesrc/Makefile
icu4c/source/tools/genbrk/*.d
icu4c/source/tools/genbrk/*.o
icu4c/source/tools/genbrk/*.pdb

View file

@ -383,3 +383,32 @@ Database section 7.
# by ICANN or the IETF Trust on the database or the code. Any person
# making a contribution to the database or code waives all rights to
# future claims in that contribution or in the TZ Database.
6. Google double-conversion
Copyright 2006-2011, the V8 project authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -1,53 +1,53 @@
# Copyright (C) 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
#-------------------------
# Script: icu\packaging\distrelease.ps1
# Author: Steven R. Loomis
# Date: 2017-04-14
#-------------------------
#
# This builds a zipfile containing the *64 bit* Windows binary
#
# Usage: (after building ICU using MSVC)
# (bring up Powershell ISE)
# cd C:\icu\icu4c\
# Set-ExecutionPolicy -Scope Process AllSigned
# .\packaging\distrelease.ps1
#
# Will emit: c:\icu4c\icu\source\dist\icu-windows.zip
#
#
# You will get warnings from the execution policy and the script itself.
# see https://docs.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_execution_policies?view=powershell-5.1&viewFallbackFrom=powershell-Microsoft.PowerShell.Core
# for more about execution policies.
$icuDir = Split-Path -Path $MyInvocation.MyCommand.Definition -Parent
$icuDir = Resolve-Path -Path '$icuDir\..'
echo $icuDir
# ok, create some work areas
New-Item -Path "$icuDir\source\dist" -ErrorAction SilentlyContinue -ItemType "directory"
$source = "$icuDir\source\dist\icu"
Get-ChildItem -Path $source -ErrorAction SilentlyContinue | Remove-Item -Recurse
New-Item -Path $source -ItemType "directory" -ErrorAction SilentlyContinue
# copy required stuff
Copy-Item -Path "$icuDir\lib64" -Destination $source -Recurse
Copy-Item -Path "$icuDir\include" -Destination $source -Recurse
Copy-Item -Path "$icuDir\bin64" -Destination $source -Recurse
Copy-Item -Path "$icuDir\APIChangeReport.html" -Destination $source -Recurse
Copy-Item -Path "$icuDir\icu4c.css" -Destination $source -Recurse
Copy-Item -Path "$icuDir\LICENSE" -Destination $source -Recurse
Copy-Item -Path "$icuDir\readme.html" -Destination $source -Recurse
$destination = "$icuDir\source\dist\icu-windows.zip"
Remove-Item -Path $destination -ErrorAction Continue
Add-Type -assembly "system.io.compression.filesystem"
Echo $source
Echo $destination
[io.compression.zipfile]::CreateFromDirectory($source, $destination)
# Copyright (C) 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
#-------------------------
# Script: icu\packaging\distrelease.ps1
# Author: Steven R. Loomis
# Date: 2017-04-14
#-------------------------
#
# This builds a zipfile containing the *64 bit* Windows binary
#
# Usage: (after building ICU using MSVC)
# (bring up Powershell ISE)
# cd C:\icu\icu4c\
# Set-ExecutionPolicy -Scope Process AllSigned
# .\packaging\distrelease.ps1
#
# Will emit: c:\icu4c\icu\source\dist\icu-windows.zip
#
#
# You will get warnings from the execution policy and the script itself.
# see https://docs.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_execution_policies?view=powershell-5.1&viewFallbackFrom=powershell-Microsoft.PowerShell.Core
# for more about execution policies.
$icuDir = Split-Path -Path $MyInvocation.MyCommand.Definition -Parent
$icuDir = Resolve-Path -Path '$icuDir\..'
echo $icuDir
# ok, create some work areas
New-Item -Path "$icuDir\source\dist" -ErrorAction SilentlyContinue -ItemType "directory"
$source = "$icuDir\source\dist\icu"
Get-ChildItem -Path $source -ErrorAction SilentlyContinue | Remove-Item -Recurse
New-Item -Path $source -ItemType "directory" -ErrorAction SilentlyContinue
# copy required stuff
Copy-Item -Path "$icuDir\lib64" -Destination $source -Recurse
Copy-Item -Path "$icuDir\include" -Destination $source -Recurse
Copy-Item -Path "$icuDir\bin64" -Destination $source -Recurse
Copy-Item -Path "$icuDir\APIChangeReport.html" -Destination $source -Recurse
Copy-Item -Path "$icuDir\icu4c.css" -Destination $source -Recurse
Copy-Item -Path "$icuDir\LICENSE" -Destination $source -Recurse
Copy-Item -Path "$icuDir\readme.html" -Destination $source -Recurse
$destination = "$icuDir\source\dist\icu-windows.zip"
Remove-Item -Path $destination -ErrorAction Continue
Add-Type -assembly "system.io.compression.filesystem"
Echo $source
Echo $destination
[io.compression.zipfile]::CreateFromDirectory($source, $destination)
echo $destination

View file

@ -696,36 +696,20 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
// Use a single counter for source and target, counting the minimum of
// the source length and the target capacity.
// Let the standard converter handle edge cases.
const uint8_t *limit=sourceLimit;
if(count>targetCapacity) {
limit-=(count-targetCapacity);
count=targetCapacity;
}
// The conversion loop checks count>0 only once per 1/2/3-byte character.
// If the buffer ends with a truncated 2- or 3-byte sequence,
// The conversion loop checks count>0 only once per character.
// If the buffer ends with a truncated sequence,
// then we reduce the count to stop before that,
// and collect the remaining bytes after the conversion loop.
{
// Do not go back into the bytes that will be read for finishing a partial
// sequence from the previous buffer.
int32_t length=count-toULimit;
if(length>0) {
uint8_t b1=*(limit-1);
if(U8_IS_SINGLE(b1)) {
// common ASCII character
} else if(U8_IS_TRAIL(b1) && length>=2) {
uint8_t b2=*(limit-2);
if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
// truncated 3-byte sequence
count-=2;
}
} else if(0xc2<=b1 && b1<0xf0) {
// truncated 2- or 3-byte sequence
--count;
}
}
}
// Do not go back into the bytes that will be read for finishing a partial
// sequence from the previous buffer.
int32_t length=count-toULimit;
U8_TRUNCATE_IF_INCOMPLETE(source, 0, length);
count=toULimit+length;
}
if(c!=0) {
@ -815,7 +799,7 @@ moreBytes:
}
/* copy the legal byte sequence to the target */
if(count>=toULength) {
{
int8_t i;
for(i=0; i<oldToULength; ++i) {
@ -826,14 +810,6 @@ moreBytes:
*target++=*source++;
}
count-=toULength;
} else {
// A supplementary character that does not fit into the target.
// Let the standard converter handle this.
source-=(toULength-oldToULength);
pToUArgs->source=(char *)source;
pFromUArgs->target=(char *)target;
*pErrorCode=U_USING_DEFAULT_WARNING;
return;
}
}
}
@ -857,8 +833,7 @@ moreBytes:
utf8->toULength=toULength;
utf8->mode=toULimit;
break;
} else if(!U8_IS_TRAIL(b=*source)) {
/* lead byte in trail byte position */
} else if(!icu::UTF8::isValidTrail(c, b=*source, toULength, toULimit)) {
utf8->toULength=toULength;
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
break;

View file

@ -631,7 +631,7 @@ namespace std {
*/
#ifdef U_CHARSET_IS_UTF8
/* Use the predefined value. */
#elif U_PLATFORM == U_PF_ANDROID || U_PLATFORM_IS_DARWIN_BASED
#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED
# define U_CHARSET_IS_UTF8 1
#else
# define U_CHARSET_IS_UTF8 0

View file

@ -380,7 +380,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) { \
(c)=(uint8_t)(s)[(i)++]; \
if(!U8_IS_SINGLE(c)) { \
uint8_t __t; \
uint8_t __t = 0; \
if((i)!=(length) && \
/* fetch/validate/assemble all but last trail byte */ \
((c)>=0xe0 ? \
@ -592,12 +592,15 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* If the offset points to a UTF-8 trail byte,
* then the offset is moved backward to the corresponding lead byte.
* Otherwise, it is not modified.
*
* "Safe" macro, checks for illegal sequences and for string boundaries.
* Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<=i
* @see U8_SET_CP_START_UNSAFE
* @see U8_TRUNCATE_IF_INCOMPLETE
* @stable ICU 2.4
*/
#define U8_SET_CP_START(s, start, i) { \
@ -606,6 +609,51 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
} \
}
/**
* If the string ends with a UTF-8 byte sequence that is valid so far
* but incomplete, then reduce the length of the string to end before
* the lead byte of that incomplete sequence.
* For example, if the string ends with E1 80, the length is reduced by 2.
*
* Useful for processing text split across multiple buffers
* (save the incomplete sequence for later)
* and for optimizing iteration
* (check for string length only once per character).
*
* "Safe" macro, checks for illegal sequences and for string boundaries.
* Unlike U8_SET_CP_START(), this macro never reads s[length].
*
* (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param length int32_t string length, must be start<=length
* @see U8_SET_CP_START
* @draft ICU 61
*/
#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) \
if((length)>(start)) { \
uint8_t __b1=s[(length)-1]; \
if(U8_IS_SINGLE(__b1)) { \
/* common ASCII character */ \
} else if(U8_IS_LEAD(__b1)) { \
--(length); \
} else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
uint8_t __b2=s[(length)-2]; \
if(0xe0<=__b2 && __b2<=0xf4) { \
if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
(length)-=2; \
} \
} else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
uint8_t __b3=s[(length)-3]; \
if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
(length)-=3; \
} \
} \
} \
}
/* definitions with backward iteration -------------------------------------- */
/**

View file

@ -238,33 +238,45 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U
int32_t i=*pi;
if(U8_IS_TRAIL(c) && i>start) {
uint8_t b1=s[--i];
if(0xc2<=b1 && b1<0xe0) {
*pi=i;
return ((b1-0xc0)<<6)|(c&0x3f);
if(U8_IS_LEAD(b1)) {
if(b1<0xe0) {
*pi=i;
return ((b1-0xc0)<<6)|(c&0x3f);
} else if(b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c)) {
// Truncated 3- or 4-byte sequence.
*pi=i;
return errorValue(1, strict);
}
} else if(U8_IS_TRAIL(b1) && i>start) {
// Extract the value bits from the last trail byte.
c&=0x3f;
uint8_t b2=s[--i];
if(0xe0<=b2 && b2<0xf0) {
b2&=0xf;
if(strict!=-2) {
if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
*pi=i;
c=(b2<<12)|((b1&0x3f)<<6)|c;
if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
return c;
} else {
// strict: forbid non-characters like U+fffe
return errorValue(2, strict);
if(0xe0<=b2 && b2<=0xf4) {
if(b2<0xf0) {
b2&=0xf;
if(strict!=-2) {
if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
*pi=i;
c=(b2<<12)|((b1&0x3f)<<6)|c;
if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) {
return c;
} else {
// strict: forbid non-characters like U+fffe
return errorValue(2, strict);
}
}
} else {
// strict=-2 -> lenient: allow surrogates
b1-=0x80;
if((b2>0 || b1>=0x20)) {
*pi=i;
return (b2<<12)|(b1<<6)|c;
}
}
} else {
// strict=-2 -> lenient: allow surrogates
b1-=0x80;
if((b2>0 || b1>=0x20)) {
*pi=i;
return (b2<<12)|(b1<<6)|c;
}
} else if(U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
// Truncated 4-byte sequence.
*pi=i;
return errorValue(2, strict);
}
} else if(U8_IS_TRAIL(b2) && i>start) {
uint8_t b3=s[--i];
@ -281,16 +293,7 @@ utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, U
}
}
}
} else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
// Truncated 4-byte sequence.
*pi=i;
return errorValue(2, strict);
}
} else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) ||
(0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
// Truncated 3- or 4-byte sequence.
*pi=i;
return errorValue(1, strict);
}
}
return errorValue(0, strict);
@ -303,29 +306,23 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i) {
uint8_t c=s[i];
if(U8_IS_TRAIL(c) && i>start) {
uint8_t b1=s[--i];
if(0xc2<=b1 && b1<0xe0) {
return i;
if(U8_IS_LEAD(b1)) {
if(b1<0xe0 ||
(b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
return i;
}
} else if(U8_IS_TRAIL(b1) && i>start) {
uint8_t b2=s[--i];
if(0xe0<=b2 && b2<0xf0) {
if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
if(0xe0<=b2 && b2<=0xf4) {
if(b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b2, b1) : U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
return i;
}
} else if(U8_IS_TRAIL(b2) && i>start) {
uint8_t b3=s[--i];
if(0xf0<=b3 && b3<=0xf4) {
if(U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
return i;
}
if(0xf0<=b3 && b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
return i;
}
} else if(0xf0<=b2 && b2<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
// Truncated 4-byte sequence.
return i;
}
} else if((0xe0<=b1 && b1<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b1, c)) ||
(0xf0<=b1 && b1<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
// Truncated 3- or 4-byte sequence.
return i;
}
}
return orig_i;

View file

@ -19,7 +19,7 @@ DISTY_DIR=dist
DISTY_TMP=dist/tmp
DISTY_ICU=$(DISTY_TMP)/icu
DISTY_DATA=$(DISTY_ICU)/source/data
DISTY_RMV=brkitr coll curr lang locales mappings rbnf region translit xml zone misc unit
DISTY_RMV=brkitr coll curr lang locales mappings rbnf region translit xml zone misc/*.txt misc/*.mk unit
DISTY_RMDIR=$(DISTY_RMV:%=$(DISTY_DATA)/%)
DISTY_IN=$(DISTY_DATA)/in
DOCZIP=icu-docs.zip
@ -49,7 +49,7 @@ $(DISTY_TMP):
$(DISTY_DOC_ZIP): $(DOCZIP) $(DISTY_FILE_DIR)
cp $(DOCZIP) $(DISTY_DOC_ZIP)
ln -sf $(DISTY_DOC_ZIP) $(DISTY_FILE_DIR)/icu4c-docs.zip
ln -sf $(shell basename $(DISTY_DOC_ZIP)) $(DISTY_FILE_DIR)/icu4c-docs.zip
$(DISTY_DAT):
echo Missing $@
@ -74,14 +74,14 @@ $(DISTY_FILE_TGZ) $(DISTY_FILE_ZIP) $(DISTY_DATA_ZIP): $(DISTY_DAT) $(DISTY_TMP
$(MKINSTALLDIRS) $(DISTY_IN)
echo DISTY_DAT=$(DISTY_DAT)
cp $(DISTY_DAT) $(DISTY_IN)
( cd $(DISTY_TMP)/icu ; python as_is/bomlist.py > as_is/bomlist.txt || rm -f as_is/bomlist.txt )
( cd $(DISTY_TMP) ; zip -rlq $(DISTY_FILE_ZIP) icu )
$(RMV) $(DISTY_RMDIR)
( cd $(DISTY_TMP)/icu ; python as_is/bomlist.py > as_is/bomlist.txt || rm -f as_is/bomlist.txt )
( cd $(DISTY_TMP) ; tar cfpz $(DISTY_FILE_TGZ) icu )
ln -sf $(DISTY_FILE_ZIP) $(DISTY_FILE_DIR)/icu4c-src.zip
ln -sf $(DISTY_FILE_TGZ) $(DISTY_FILE_DIR)/icu4c-src.tgz
ln -sf $(DISTY_DATA_ZIP) $(DISTY_FILE_DIR)/icu4c-data.zip
( cd $(DISTY_TMP) ; zip -rlq $(DISTY_FILE_ZIP) icu )
$(RMV) $(DISTY_TMP)
ln -sf $(shell basename $(DISTY_FILE_ZIP)) $(DISTY_FILE_DIR)/icu4c-src.zip
ln -sf $(shell basename $(DISTY_FILE_TGZ)) $(DISTY_FILE_DIR)/icu4c-src.tgz
ln -sf $(shell basename $(DISTY_DATA_ZIP)) $(DISTY_FILE_DIR)/icu4c-data.zip
ls -l $(DISTY_FILE_TGZ) $(DISTY_FILE_ZIP) $(DISTY_DATA_ZIP)

View file

@ -614,7 +614,7 @@ void IslamicCalendar::handleComputeFields(int32_t julianDay, UErrorCode &status)
days = julianDay - ASTRONOMICAL_EPOC;
}
// Use the civil calendar approximation, which is just arithmetic
year = (int)ClockMath::floorDivide( (double)(30 * days + 10646) , 10631.0 );
year = (int32_t)ClockMath::floorDivide(30 * (int64_t)days + 10646, (int64_t)10631);
month = (int32_t)uprv_ceil((days - 29 - yearStart(year)) / 29.5 );
month = month<11?month:11;
startDate = monthStart(year, month);

View file

@ -681,7 +681,7 @@ static void dumpUS(FILE* f, const UnicodeString& us) {
#endif
UBool
NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, int32_t nonNumericalExecutedRuleMask, Formattable& result) const
{
// try matching each rule in the rule set against the text being
// parsed. Whichever one matches the most characters is the one
@ -707,9 +707,12 @@ NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun
#endif
// Try each of the negative rules, fraction rules, infinity rules and NaN rules
for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
if (nonNumericalRules[i]) {
if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) {
// Mark this rule as being executed so that we don't try to execute it again.
nonNumericalExecutedRuleMask |= 1 << i;
Formattable tempResult;
UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, tempResult);
if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
result = tempResult;
highWaterMark = workingPos;
@ -748,7 +751,7 @@ NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBoun
continue;
}
Formattable tempResult;
UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, tempResult);
if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
result = tempResult;
highWaterMark = workingPos;

View file

@ -55,7 +55,7 @@ public:
void format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const;
void format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const;
UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const;
UBool parse(const UnicodeString& text, ParsePosition& pos, double upperBound, int32_t nonNumericalExecutedRuleMask, Formattable& result) const;
void appendRules(UnicodeString& result) const; // toString

View file

@ -900,6 +900,7 @@ NFRule::doParse(const UnicodeString& text,
ParsePosition& parsePosition,
UBool isFractionRule,
double upperBound,
int32_t nonNumericalExecutedRuleMask,
Formattable& resVal) const
{
// internally we operate on a copy of the string being parsed
@ -1002,6 +1003,7 @@ NFRule::doParse(const UnicodeString& text,
temp.setTo(ruleText, sub1Pos, sub2Pos - sub1Pos);
double partialResult = matchToDelimiter(workText, start, tempBaseValue,
temp, pp, sub1,
nonNumericalExecutedRuleMask,
upperBound);
// if we got a successful match (or were trying to match a
@ -1022,6 +1024,7 @@ NFRule::doParse(const UnicodeString& text,
temp.setTo(ruleText, sub2Pos, ruleText.length() - sub2Pos);
partialResult = matchToDelimiter(workText2, 0, partialResult,
temp, pp2, sub2,
nonNumericalExecutedRuleMask,
upperBound);
// if we got a successful match on this second
@ -1158,6 +1161,7 @@ NFRule::matchToDelimiter(const UnicodeString& text,
const UnicodeString& delimiter,
ParsePosition& pp,
const NFSubstitution* sub,
int32_t nonNumericalExecutedRuleMask,
double upperBound) const
{
UErrorCode status = U_ZERO_ERROR;
@ -1191,6 +1195,7 @@ NFRule::matchToDelimiter(const UnicodeString& text,
#else
formatter->isLenient(),
#endif
nonNumericalExecutedRuleMask,
result);
// if the substitution could match all the text up to
@ -1244,6 +1249,7 @@ NFRule::matchToDelimiter(const UnicodeString& text,
#else
formatter->isLenient(),
#endif
nonNumericalExecutedRuleMask,
result);
if (success && (tempPP.getIndex() != 0)) {
// if there's a successful match (or it's a null

View file

@ -74,6 +74,7 @@ public:
ParsePosition& pos,
UBool isFractional,
double upperBound,
int32_t nonNumericalExecutedRuleMask,
Formattable& result) const;
UBool shouldRollBack(int64_t number) const;
@ -94,6 +95,7 @@ private:
int32_t indexOfAnyRulePrefix() const;
double matchToDelimiter(const UnicodeString& text, int32_t startPos, double baseValue,
const UnicodeString& delimiter, ParsePosition& pp, const NFSubstitution* sub,
int32_t nonNumericalExecutedRuleMask,
double upperBound) const;
void stripPrefix(UnicodeString& text, const UnicodeString& prefix, ParsePosition& pp) const;

View file

@ -155,6 +155,7 @@ public:
double baseValue,
double upperBound,
UBool lenientParse,
int32_t nonNumericalExecutedRuleMask,
Formattable& result) const;
virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const {
@ -221,6 +222,7 @@ public:
double baseValue,
double upperBound,
UBool lenientParse,
int32_t nonNumericalExecutedRuleMask,
Formattable& result) const;
virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue + oldRuleValue; }
@ -292,6 +294,7 @@ public:
double baseValue,
double upperBound,
UBool /*lenientParse*/,
int32_t nonNumericalExecutedRuleMask,
Formattable& result) const;
virtual double composeRuleValue(double newRuleValue, double oldRuleValue) const { return newRuleValue / oldRuleValue; }
@ -689,6 +692,7 @@ NFSubstitution::doParse(const UnicodeString& text,
double baseValue,
double upperBound,
UBool lenientParse,
int32_t nonNumericalExecutedRuleMask,
Formattable& result) const
{
#ifdef RBNF_DEBUG
@ -709,7 +713,7 @@ NFSubstitution::doParse(const UnicodeString& text,
// on), then also try parsing the text using a default-
// constructed NumberFormat
if (ruleSet != NULL) {
ruleSet->parse(text, parsePosition, upperBound, result);
ruleSet->parse(text, parsePosition, upperBound, nonNumericalExecutedRuleMask, result);
if (lenientParse && !ruleSet->isFractionRuleSet() && parsePosition.getIndex() == 0) {
UErrorCode status = U_ZERO_ERROR;
NumberFormat* fmt = NumberFormat::createInstance(status);
@ -931,18 +935,19 @@ ModulusSubstitution::doParse(const UnicodeString& text,
double baseValue,
double upperBound,
UBool lenientParse,
int32_t nonNumericalExecutedRuleMask,
Formattable& result) const
{
// if this isn't a >>> substitution, we can just use the
// inherited parse() routine to do the parsing
if (ruleToUse == NULL) {
return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, result);
return NFSubstitution::doParse(text, parsePosition, baseValue, upperBound, lenientParse, nonNumericalExecutedRuleMask, result);
// but if it IS a >>> substitution, we have to do it here: we
// use the specific rule's doParse() method, and then we have to
// do some of the other work of NFRuleSet.parse()
} else {
ruleToUse->doParse(text, parsePosition, FALSE, upperBound, result);
ruleToUse->doParse(text, parsePosition, FALSE, upperBound, nonNumericalExecutedRuleMask, result);
if (parsePosition.getIndex() != 0) {
UErrorCode status = U_ZERO_ERROR;
@ -1118,12 +1123,13 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
double baseValue,
double /*upperBound*/,
UBool lenientParse,
int32_t nonNumericalExecutedRuleMask,
Formattable& resVal) const
{
// if we're not in byDigits mode, we can just use the inherited
// doParse()
if (!byDigits) {
return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, resVal);
return NFSubstitution::doParse(text, parsePosition, baseValue, 0, lenientParse, nonNumericalExecutedRuleMask, resVal);
// if we ARE in byDigits mode, parse the text one digit at a time
// using this substitution's owning rule set (we do this by setting
@ -1141,7 +1147,7 @@ FractionalPartSubstitution::doParse(const UnicodeString& text,
while (workText.length() > 0 && workPos.getIndex() != 0) {
workPos.setIndex(0);
Formattable temp;
getRuleSet()->parse(workText, workPos, 10, temp);
getRuleSet()->parse(workText, workPos, 10, nonNumericalExecutedRuleMask, temp);
UErrorCode status = U_ZERO_ERROR;
digit = temp.getLong(status);
// digit = temp.getType() == Formattable::kLong ?
@ -1249,6 +1255,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text,
double baseValue,
double upperBound,
UBool /*lenientParse*/,
int32_t nonNumericalExecutedRuleMask,
Formattable& result) const
{
// we don't have to do anything special to do the parsing here,
@ -1267,7 +1274,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text,
while (workText.length() > 0 && workPos.getIndex() != 0) {
workPos.setIndex(0);
getRuleSet()->parse(workText, workPos, 1, temp); // parse zero or nothing at all
getRuleSet()->parse(workText, workPos, 1, nonNumericalExecutedRuleMask, temp); // parse zero or nothing at all
if (workPos.getIndex() == 0) {
// we failed, either there were no more zeros, or the number was formatted with digits
// either way, we're done
@ -1289,7 +1296,7 @@ NumeratorSubstitution::doParse(const UnicodeString& text,
}
// we've parsed off the zeros, now let's parse the rest from our current position
NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, FALSE, result);
NFSubstitution::doParse(workText, parsePosition, withZeros ? 1 : baseValue, upperBound, FALSE, nonNumericalExecutedRuleMask, result);
if (withZeros) {
// any base value will do in this case. is there a way to

View file

@ -191,6 +191,7 @@ public:
double baseValue,
double upperBound,
UBool lenientParse,
int32_t nonNumericalExecutedRuleMask,
Formattable& result) const;
/**

View file

@ -33,12 +33,13 @@ Derived NumberFormatterSettings<Derived>::unit(const icu::MeasureUnit &unit) con
}
template<typename Derived>
Derived NumberFormatterSettings<Derived>::adoptUnit(const icu::MeasureUnit *unit) const {
Derived NumberFormatterSettings<Derived>::adoptUnit(icu::MeasureUnit *unit) const {
Derived copy(*this);
// Just copy the unit into the MacroProps by value, and delete it since we have ownership.
// NOTE: Slicing occurs here. However, CurrencyUnit can be restored from MeasureUnit.
// TimeUnit may be affected, but TimeUnit is not as relevant to number formatting.
if (unit != nullptr) {
// TODO: On nullptr, reset to default value?
copy.fMacros.unit = *unit;
delete unit;
}
@ -54,10 +55,11 @@ Derived NumberFormatterSettings<Derived>::perUnit(const icu::MeasureUnit &perUni
}
template<typename Derived>
Derived NumberFormatterSettings<Derived>::adoptPerUnit(const icu::MeasureUnit *perUnit) const {
Derived NumberFormatterSettings<Derived>::adoptPerUnit(icu::MeasureUnit *perUnit) const {
Derived copy(*this);
// See comments above about slicing and ownership.
if (perUnit != nullptr) {
// TODO: On nullptr, reset to default value?
copy.fMacros.perUnit = *perUnit;
delete perUnit;
}
@ -96,7 +98,7 @@ Derived NumberFormatterSettings<Derived>::symbols(const DecimalFormatSymbols &sy
}
template<typename Derived>
Derived NumberFormatterSettings<Derived>::adoptSymbols(const NumberingSystem *ns) const {
Derived NumberFormatterSettings<Derived>::adoptSymbols(NumberingSystem *ns) const {
Derived copy(*this);
copy.fMacros.symbols.setTo(ns);
return copy;

View file

@ -44,7 +44,7 @@ Grouper Grouper::forStrategy(UGroupingStrategy grouping) {
return {-2, -2, -3};
case UNUM_GROUPING_ON_ALIGNED:
return {-4, -4, 1};
case UNUM_GROUPING_WESTERN:
case UNUM_GROUPING_THOUSANDS:
return {3, 3, 1};
default:
U_ASSERT(FALSE);

View file

@ -1371,7 +1371,7 @@ RuleBasedNumberFormat::parse(const UnicodeString& text,
ParsePosition working_pp(0);
Formattable working_result;
rp->parse(workingText, working_pp, kMaxDouble, working_result);
rp->parse(workingText, working_pp, kMaxDouble, 0, working_result);
if (working_pp.getIndex() > high_pp.getIndex()) {
high_pp = working_pp;
high_result = working_result;

View file

@ -172,7 +172,7 @@ typedef enum UNumberUnitWidth {
* <li>MIN2: 1234 and 12,34,567
* <li>AUTO: 1,234 and 12,34,567
* <li>ON_ALIGNED: 1,234 and 12,34,567
* <li>WESTERN: 1,234 and 1,234,567
* <li>THOUSANDS: 1,234 and 1,234,567
* </ul>
*
* <p>
@ -248,7 +248,7 @@ typedef enum UGroupingStrategy {
*
* @draft ICU 61
*/
UNUM_GROUPING_WESTERN
UNUM_GROUPING_THOUSANDS
} UGroupingStrategy;
@ -1515,7 +1515,8 @@ class U_I18N_API NumberFormatterSettings {
* All units will be properly localized with locale data, and all units are compatible with notation styles,
* rounding strategies, and other number formatter settings.
*
* Pass this method any instance of {@link MeasureUnit}. For units of measure:
* Pass this method any instance of {@link MeasureUnit}. For units of measure (which often involve the
* factory methods that return a pointer):
*
* <pre>
* NumberFormatter::with().adoptUnit(MeasureUnit::createMeter(status))
@ -1550,7 +1551,11 @@ class U_I18N_API NumberFormatterSettings {
/**
* Like unit(), but takes ownership of a pointer. Convenient for use with the MeasureFormat factory
* methods, which return pointers that need ownership.
* methods, which return pointers that need ownership. Example:
*
* <pre>
* NumberFormatter::with().adoptUnit(MeasureUnit::createMeter(status))
* </pre>
*
* @param unit
* The unit to render.
@ -1559,19 +1564,14 @@ class U_I18N_API NumberFormatterSettings {
* @see MeasureUnit
* @draft ICU 60
*/
Derived adoptUnit(const icu::MeasureUnit *unit) const;
Derived adoptUnit(icu::MeasureUnit *unit) const;
/**
* Sets a unit to be used in the denominator. For example, to format "3 m/s", pass METER to the unit and SECOND to
* the perUnit.
*
* Pass this method any instance of {@link MeasureUnit}. For example:
*
* <pre>
* NumberFormatter::with()
* .adoptUnit(MeasureUnit::createMeter(status))
* .adoptPerUnit(MeasureUnit::createSecond(status))
* </pre>
* Pass this method any instance of {@link MeasureUnit}. Since MeasureUnit factory methods return pointers, the
* {@link #adoptPerUnit} version of this method is often more useful.
*
* The default is not to display any unit in the denominator.
*
@ -1587,7 +1587,13 @@ class U_I18N_API NumberFormatterSettings {
/**
* Like perUnit(), but takes ownership of a pointer. Convenient for use with the MeasureFormat factory
* methods, which return pointers that need ownership.
* methods, which return pointers that need ownership. Example:
*
* <pre>
* NumberFormatter::with()
* .adoptUnit(MeasureUnit::createMeter(status))
* .adoptPerUnit(MeasureUnit::createSecond(status))
* </pre>
*
* @param perUnit
* The unit to render in the denominator.
@ -1596,7 +1602,7 @@ class U_I18N_API NumberFormatterSettings {
* @see MeasureUnit
* @draft ICU 61
*/
Derived adoptPerUnit(const icu::MeasureUnit *perUnit) const;
Derived adoptPerUnit(icu::MeasureUnit *perUnit) const;
/**
* Specifies the rounding strategy to use when formatting numbers.
@ -1761,7 +1767,7 @@ class U_I18N_API NumberFormatterSettings {
* @see NumberingSystem
* @draft ICU 60
*/
Derived adoptSymbols(const NumberingSystem *symbols) const;
Derived adoptSymbols(NumberingSystem *symbols) const;
/**
* Sets the width of the unit (measure unit or currency). Most common values:

View file

@ -94,6 +94,7 @@ static void TestFwdBack(void);
static void TestFwdBackUnsafe(void);
static void TestSetChar(void);
static void TestSetCharUnsafe(void);
static void TestTruncateIfIncomplete(void);
static void TestAppendChar(void);
static void TestAppend(void);
static void TestSurrogates(void);
@ -114,6 +115,7 @@ addUTF8Test(TestNode** root)
addTest(root, &TestFwdBackUnsafe, "utf8tst/TestFwdBackUnsafe");
addTest(root, &TestSetChar, "utf8tst/TestSetChar");
addTest(root, &TestSetCharUnsafe, "utf8tst/TestSetCharUnsafe");
addTest(root, &TestTruncateIfIncomplete, "utf8tst/TestTruncateIfIncomplete");
addTest(root, &TestAppendChar, "utf8tst/TestAppendChar");
addTest(root, &TestAppend, "utf8tst/TestAppend");
addTest(root, &TestSurrogates, "utf8tst/TestSurrogates");
@ -927,6 +929,64 @@ static void TestSetCharUnsafe() {
}
}
static void TestTruncateIfIncomplete() {
// Difference from U8_SET_CP_START():
// U8_TRUNCATE_IF_INCOMPLETE() does not look at s[length].
// Therefore, if the last byte is a lead byte, then this macro truncates
// even if the byte at the input index cannot continue a valid sequence
// (including when that is not a trail byte).
// On the other hand, if the last byte is a trail byte, then the two macros behave the same.
static const struct {
const char *s;
int32_t expected;
} cases[] = {
{ "", 0 },
{ "a", 1 },
{ "\x80", 1 },
{ "\xC1", 1 },
{ "\xC2", 0 },
{ "\xE0", 0 },
{ "\xF4", 0 },
{ "\xF5", 1 },
{ "\x80\x80", 2 },
{ "\xC2\xA0", 2 },
{ "\xE0\x9F", 2 },
{ "\xE0\xA0", 0 },
{ "\xED\x9F", 0 },
{ "\xED\xA0", 2 },
{ "\xF0\x8F", 2 },
{ "\xF0\x90", 0 },
{ "\xF4\x8F", 0 },
{ "\xF4\x90", 2 },
{ "\xF5\x80", 2 },
{ "\x80\x80\x80", 3 },
{ "\xC2\xA0\x80", 3 },
{ "\xE0\xA0\x80", 3 },
{ "\xF0\x8F\x80", 3 },
{ "\xF0\x90\x80", 0 },
{ "\xF4\x8F\x80", 0 },
{ "\xF4\x90\x80", 3 },
{ "\xF5\x80\x80", 3 },
{ "\x80\x80\x80\x80", 4 },
{ "\xC2\xA0\x80\x80", 4 },
{ "\xE0\xA0\x80\x80", 4 },
{ "\xF0\x90\x80\x80", 4 },
{ "\xF5\x80\x80\x80", 4 }
};
int32_t i;
for (i = 0; i < UPRV_LENGTHOF(cases); ++i) {
const char *s = cases[i].s;
int32_t expected = cases[i].expected;
int32_t length = (int32_t)strlen(s);
int32_t adjusted = length;
U8_TRUNCATE_IF_INCOMPLETE(s, 0, adjusted);
if (adjusted != expected) {
log_err("ERROR: U8_TRUNCATE_IF_INCOMPLETE failed for i=%d, length=%d. Expected:%d Got:%d\n",
(int)i, (int)length, (int)expected, (int)adjusted);
}
}
}
static void TestAppendChar(){
#if !U_HIDE_OBSOLETE_UTF_OLD_H
static const uint8_t s[11]={0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x00};

View file

@ -93,6 +93,7 @@ CalendarRegressionTest::runIndexedTest( int32_t index, UBool exec, const char* &
CASE(50,TestT9452);
CASE(51,TestT11632);
CASE(52,TestPersianCalOverflow);
CASE(53,TestIslamicCalOverflow);
default: name = ""; break;
}
}
@ -3009,9 +3010,9 @@ void CalendarRegressionTest::TestPersianCalOverflow(void) {
month = cal->get(UCAL_MONTH, status);
dayOfMonth = cal->get(UCAL_DATE, status);
if ( U_FAILURE(status) ) {
errln("FAIL: Calendar->get MONTH/DATE for localeID %s, julianDay %d, status %s\n", localeID, jd, u_errorName(status));
errln("FAIL: Calendar->get MONTH/DATE for localeID %s, julianDay %d, status %s", localeID, jd, u_errorName(status));
} else if (month > maxMonth || dayOfMonth > maxDayOfMonth) {
errln("FAIL: localeID %s, julianDay %d; maxMonth %d, got month %d; maxDayOfMonth %d, got dayOfMonth %d\n",
errln("FAIL: localeID %s, julianDay %d; maxMonth %d, got month %d; maxDayOfMonth %d, got dayOfMonth %d",
localeID, jd, maxMonth, month, maxDayOfMonth, dayOfMonth);
}
}
@ -3019,4 +3020,35 @@ void CalendarRegressionTest::TestPersianCalOverflow(void) {
}
}
/**
* @bug tickets 12661, 13538
*/
void CalendarRegressionTest::TestIslamicCalOverflow(void) {
const char* localeID = "ar@calendar=islamic-civil";
UErrorCode status = U_ZERO_ERROR;
Calendar* cal = Calendar::createInstance(Locale(localeID), status);
if(U_FAILURE(status)) {
dataerrln("FAIL: Calendar::createInstance for localeID %s: %s", localeID, u_errorName(status));
} else {
int32_t maxMonth = cal->getMaximum(UCAL_MONTH);
int32_t maxDayOfMonth = cal->getMaximum(UCAL_DATE);
int32_t jd, year, month, dayOfMonth;
for (jd = 73530872; jd <= 73530876; jd++) { // year 202002, int32_t overflow if jd >= 73530874
status = U_ZERO_ERROR;
cal->clear();
cal->set(UCAL_JULIAN_DAY, jd);
year = cal->get(UCAL_YEAR, status);
month = cal->get(UCAL_MONTH, status);
dayOfMonth = cal->get(UCAL_DATE, status);
if ( U_FAILURE(status) ) {
errln("FAIL: Calendar->get YEAR/MONTH/DATE for localeID %s, julianDay %d, status %s", localeID, jd, u_errorName(status));
} else if (month > maxMonth || dayOfMonth > maxDayOfMonth) {
errln("FAIL: localeID %s, julianDay %d; got year %d; maxMonth %d, got month %d; maxDayOfMonth %d, got dayOfMonth %d",
localeID, jd, year, maxMonth, month, maxDayOfMonth, dayOfMonth);
}
}
delete cal;
}
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -79,6 +79,7 @@ public:
void TestT9452(void);
void TestT11632(void);
void TestPersianCalOverflow(void);
void TestIslamicCalOverflow(void);
void printdate(GregorianCalendar *cal, const char *string);
void dowTest(UBool lenient) ;

View file

@ -75,6 +75,7 @@ void IntlTestRBNF::runIndexedTest(int32_t index, UBool exec, const char* &name,
TESTCASE(23, TestVariableDecimalPoint);
TESTCASE(24, TestLargeNumbers);
TESTCASE(25, TestCompactDecimalFormatStyle);
TESTCASE(26, TestParseFailure);
#else
TESTCASE(0, TestRBNFDisabled);
#endif
@ -2283,6 +2284,25 @@ void IntlTestRBNF::TestCompactDecimalFormatStyle() {
doTest(&rbnf, enTestFullData, false);
}
void IntlTestRBNF::TestParseFailure() {
UErrorCode status = U_ZERO_ERROR;
RuleBasedNumberFormat rbnf(URBNF_SPELLOUT, Locale::getJapanese(), status);
static const char* testData[][1] = {
{ "\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB\u30FB" },
{ NULL }
};
for (int i = 0; testData[i][0]; ++i) {
const char* spelledNumber = testData[i][0]; // spelled-out number
UnicodeString spelledNumberString = UnicodeString(spelledNumber).unescape();
Formattable actualNumber;
rbnf.parse(spelledNumberString, actualNumber, status);
if (status != U_INVALID_FORMAT_ERROR) { // I would have expected U_PARSE_ERROR, but NumberFormat::parse gives U_INVALID_FORMAT_ERROR
errln("FAIL: string should be unparseable %s %s", spelledNumber, u_errorName(status));
}
}
}
void
IntlTestRBNF::doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing)
{

View file

@ -147,6 +147,7 @@ class IntlTestRBNF : public IntlTest {
void TestRounding();
void TestLargeNumbers();
void TestCompactDecimalFormatStyle();
void TestParseFailure();
protected:
virtual void doTest(RuleBasedNumberFormat* formatter, const char* const testData[][2], UBool testParsing);

View file

@ -325,6 +325,7 @@ const NumberFormatTestTupleFieldData gFieldData[] = {
FIELD_INIT(positiveSuffix, &gStrOps),
FIELD_INIT(negativePrefix, &gStrOps),
FIELD_INIT(negativeSuffix, &gStrOps),
FIELD_INIT(signAlwaysShown, &gIntOps),
FIELD_INIT(localizedPattern, &gStrOps),
FIELD_INIT(toPattern, &gStrOps),
FIELD_INIT(toLocalizedPattern, &gStrOps),

View file

@ -55,6 +55,7 @@ enum ENumberFormatTestTupleField {
kPositiveSuffix,
kNegativePrefix,
kNegativeSuffix,
kSignAlwaysShown,
kLocalizedPattern,
kToPattern,
kToLocalizedPattern,
@ -118,6 +119,7 @@ public:
UnicodeString positiveSuffix;
UnicodeString negativePrefix;
UnicodeString negativeSuffix;
int32_t signAlwaysShown;
UnicodeString localizedPattern;
UnicodeString toPattern;
UnicodeString toLocalizedPattern;
@ -164,6 +166,7 @@ public:
UBool positiveSuffixFlag;
UBool negativePrefixFlag;
UBool negativeSuffixFlag;
UBool signAlwaysShownFlag;
UBool localizedPatternFlag;
UBool toPatternFlag;
UBool toLocalizedPatternFlag;

View file

@ -1097,6 +1097,20 @@ void NumberFormatterApiTest::grouping() {
u"8.765",
u"0");
assertFormatDescendingBig(
u"Indic locale with THOUSANDS grouping",
NumberFormatter::with().grouping(UNUM_GROUPING_THOUSANDS),
Locale("en-IN"),
u"87,650,000",
u"8,765,000",
u"876,500",
u"87,650",
u"8,765",
u"876.5",
u"87.65",
u"8.765",
u"0");
// NOTE: Hungarian is interesting because it has minimumGroupingDigits=4 in locale data
// If this test breaks due to data changes, find another locale that has minimumGroupingDigits.
assertFormatDescendingBig(

View file

@ -229,6 +229,9 @@ static void adjustDecimalFormat(
if (tuple.negativeSuffixFlag) {
fmt.setNegativeSuffix(tuple.negativeSuffix);
}
if (tuple.signAlwaysShownFlag) {
// Not currently supported
}
if (tuple.localizedPatternFlag) {
UErrorCode status = U_ZERO_ERROR;
fmt.applyLocalizedPattern(tuple.localizedPattern, status);

View file

@ -441,11 +441,10 @@ en_US 1 123,456 123456
en_US 0 123,456 123
en_US 1 123.456 123.456
en_US 0 123.456 123.456
fr_FR 1 123,456 123.456
fr_FR 0 123,456 123.456
// JDK returns 123 here; not sure why.
fr_FR 1 123.456 123456 K
fr_FR 0 123.456 123
it_IT 1 123,456 123.456
it_IT 0 123,456 123.456
it_IT 1 123.456 123456
it_IT 0 123.456 123
test no grouping in pattern with parsing
set pattern 0
@ -466,9 +465,8 @@ output grouping breaks grouping2 minGroupingDigits
1,2345,6789 4
1,23,45,6789 4 K 2
1,23,45,6789 4 K 2 2
// Q only supports minGrouping<=2
123,456789 6 6 3
123456789 6 JKQ 6 4
123456789 6 JK 6 4
test multiplier setters
set locale en_US
@ -754,6 +752,7 @@ parse output breaks
+3.52EE4 3.52
+1,234,567.8901 1234567.8901
+1,23,4567.8901 1234567.8901
// Fraction grouping is disabled by default
+1,23,4567.89,01 1234567.89
+1,23,456.78.9 123456.78
+12.34,56 12.34
@ -831,15 +830,14 @@ parse output breaks
// JDK does allow separators in the wrong place and parses as -5347.25
(53,47.25) fail K
// strict requires prefix or suffix, except in C
65,347.25 fail
65,347.25 fail
+3.52E4 35200
(34.8E-3) -0.0348
(3425E-1) -342.5
// Strict doesn't allow separators in sci notation.
(63,425) -63425
// JDK and S allow separators in sci notation and parses as -342.5
// C passes
(63,425E-1) fail CKS
// J does not allow grouping separators in scientific notation.
(63,425E-1) -6342.5 J
// Both prefix and suffix needed for strict.
// JDK accepts this and parses as -342.5
(3425E-1 fail K
@ -954,12 +952,12 @@ set negativeSuffix 9N
begin
parse output breaks
// S is the only implementation that passes these cases.
// C consumes the '9' as a digit and assumes number is negative
// C and P consume the '9' as a digit and assumes number is negative
// J and JDK bail
6549K 654 CJK
// C consumes the '9' as a digit and assumes number is negative
6549K 654 CJKP
// C and P consume the '9' as a digit and assumes number is negative
// J and JDK bail
6549N -654 CJK
6549N -654 CJKP
test really strange prefix
set locale en
@ -974,7 +972,7 @@ test parse pattern with quotes
set locale en
set pattern '-'#y
begin
parse output
parse output breaks
-45y 45
test parse with locale symbols
@ -1187,17 +1185,17 @@ $53.45 fail USD J
USD 53.45 53.45 USD J
53.45USD 53.45 USD CJ
USD53.45 53.45 USD
// S fails these because '(' is an incomplete prefix.
(7.92) USD -7.92 USD CJS
(7.92) GBP -7.92 GBP CJS
(7.926) USD -7.926 USD CJS
(7.926 USD) -7.926 USD CJS
// P fails these because '(' is an incomplete prefix.
(7.92) USD -7.92 USD CJP
(7.92) GBP -7.92 GBP CJP
(7.926) USD -7.926 USD CJP
(7.926 USD) -7.926 USD CJP
(USD 7.926) -7.926 USD J
USD (7.926) -7.926 USD CJS
USD (7.92) -7.92 USD CJS
(7.92)USD -7.92 USD CJS
USD(7.92) -7.92 USD CJS
(8) USD -8 USD CJS
USD (7.926) -7.926 USD CJP
USD (7.92) -7.92 USD CJP
(7.92)USD -7.92 USD CJP
USD(7.92) -7.92 USD CJP
(8) USD -8 USD CJP
-8 USD -8 USD C
67 USD 67 USD C
53.45$ fail USD
@ -1223,37 +1221,38 @@ test parse foreign currency symbol
set pattern \u00a4 0.00;\u00a4 -#
set locale fa_IR
begin
parse output outputCurrency
parse output outputCurrency breaks
\u0631\u06cc\u0627\u0644 \u06F1\u06F2\u06F3\u06F5 1235 IRR
IRR \u06F1\u06F2\u06F3\u06F5 1235 IRR
\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 \u06F1\u06F2\u06F3\u06F5 1235 IRR
// P fails here because this currency name is in the Trie only, but it has the same prefix as the non-Trie currency
\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 \u06F1\u06F2\u06F3\u06F5 1235 IRR P
IRR 1235 1235 IRR
\u0631\u06cc\u0627\u0644 1235 1235 IRR
\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 1235 1235 IRR
\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 1235 1235 IRR P
test parse foreign currency ISO
set pattern \u00a4\u00a4 0.00;\u00a4\u00a4 -#
set locale fa_IR
begin
parse output outputCurrency
parse output outputCurrency breaks
\u0631\u06cc\u0627\u0644 \u06F1\u06F2\u06F3\u06F5 1235 IRR
IRR \u06F1\u06F2\u06F3\u06F5 1235 IRR
\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 \u06F1\u06F2\u06F3\u06F5 1235 IRR
\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 \u06F1\u06F2\u06F3\u06F5 1235 IRR P
IRR 1235 1235 IRR
\u0631\u06cc\u0627\u0644 1235 1235 IRR
\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 1235 1235 IRR
\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 1235 1235 IRR P
test parse foreign currency full
set pattern \u00a4\u00a4\u00a4 0.00;\u00a4\u00a4\u00a4 -#
set locale fa_IR
begin
parse output outputCurrency
parse output outputCurrency breaks
\u0631\u06cc\u0627\u0644 \u06F1\u06F2\u06F3\u06F5 1235 IRR
IRR \u06F1\u06F2\u06F3\u06F5 1235 IRR
\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 \u06F1\u06F2\u06F3\u06F5 1235 IRR
\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 \u06F1\u06F2\u06F3\u06F5 1235 IRR P
IRR 1235 1235 IRR
\u0631\u06cc\u0627\u0644 1235 1235 IRR
\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 1235 1235 IRR
\u0631\u06cc\u0627\u0644 \u0627\u06cc\u0631\u0627\u0646 1235 1235 IRR P
test parse currency with foreign symbols symbol english
set pattern \u00a4 0.00;\u00a4 (#)
@ -1288,16 +1287,17 @@ Euros 7.82 7.82 EUR
test parse currency without currency mode
// Should accept a symbol associated with the currency specified by the API,
// but should not traverse the full currency data.
// P always traverses full currency data.
set locale en_US
set pattern \u00a4#,##0.00
begin
parse currency output breaks
$52.41 USD 52.41
USD52.41 USD 52.41 K
\u20ac52.41 USD fail
EUR52.41 USD fail
$52.41 EUR fail
USD52.41 EUR fail
\u20ac52.41 USD fail P
EUR52.41 USD fail P
$52.41 EUR fail P
USD52.41 EUR fail P
\u20ac52.41 EUR 52.41 K
EUR52.41 EUR 52.41
@ -1307,11 +1307,11 @@ set locale en_US
set lenient 0
begin
parse output outputCurrency breaks
$53.45 53.45 USD
$53.45 53.45 USD P
53.45 USD 53.45 USD
USD 53.45 fail USD
53.45USD fail USD
USD53.45 53.45 USD
USD53.45 53.45 USD P
(7.92) USD -7.92 USD
(7.92) EUR -7.92 EUR
(7.926) USD -7.926 USD
@ -1329,9 +1329,9 @@ US Dollars 53.45 fail USD
53.45 US Dollars 53.45 USD
US Dollar 53.45 fail USD
53.45 US Dollar 53.45 USD
US Dollars53.45 53.45 USD
US Dollars53.45 53.45 USD P
53.45US Dollars fail USD
US Dollar53.45 53.45 USD
US Dollar53.45 53.45 USD P
US Dollat53.45 fail USD
53.45US Dollar fail USD
US Dollars (53.45) fail USD
@ -1376,13 +1376,15 @@ test parse minus sign
set locale en
set pattern #
begin
parse output breaks
-123 -123
- 123 -123 JK
-123 -123 JK
- 123 -123 JK
123- -123 CJKS
123 - -123 CJKS
pattern parse output breaks
# -123 -123
# - 123 -123 JK
# -123 -123 JK
# - 123 -123 JK
# 123- 123
# 123 - 123
#;#- 123- -123
#;#- 123 - -123 JK
test parse case sensitive
set locale en
@ -1423,8 +1425,8 @@ NaN NaN K
1E2147483646 1E2147483646
1E-2147483649 0
1E-2147483648 0
// S returns zero here
1E-2147483647 1E-2147483647 S
// P returns zero here
1E-2147483647 1E-2147483647 P
1E-2147483646 1E-2147483646
test format push limits
@ -1439,7 +1441,7 @@ maxFractionDigits format output breaks
100 9999999999999.9950000000001 9999999999999.9950000000001 C
2 9999999999999.9950000000001 10000000000000.00 C
2 9999999.99499999 9999999.99
// K doesn't support halfDowm rounding mode?
// K doesn't support halfDown rounding mode?
2 9999999.995 9999999.99 K
2 9999999.99500001 10000000.00
100 56565656565656565656565656565656565656565656565656565656565656 56565656565656565656565656565656565656565656565656565656565656.00 C
@ -1453,8 +1455,8 @@ set locale en
set pattern #,##0
begin
parse output breaks
// K and J return null; S and C return 99
9 9 9 CJKS
// K and J return null; S, C, and P return 99
9 9 9 CJKP
// K returns null
9 999 9999 K
@ -1497,7 +1499,7 @@ y g h56 -56 JK
56i jk -56 CJK
56i jk -56 CJK
// S and C get 56 (accepts ' ' gs grouping); J and K get null
5 6 fail CS
5 6 fail CP
56 5 JK
test parse spaces in grouping
@ -1507,9 +1509,9 @@ set locale en
set pattern #,##0
begin
parse output breaks
// C, J and S get "12" here
1 2 1 CJS
1 23 1 CJS
// C, J, S, and P get "12" here
1 2 1 CJP
1 23 1 CJP
// K gets 1 here; doesn't pick up the grouping separator
1 234 1234 K
@ -1543,7 +1545,8 @@ begin
parse output breaks
55% 0.55
// J and K get null
55 0.55 JK
// P requires the symbol to be present and gets 55
55 0.55 JKP
test trailing grouping separators in pattern
// This test is for #13115
@ -1573,6 +1576,34 @@ begin
parse output breaks
9223372036854775807% 92233720368547758.07
test sign always shown
set locale en
set pattern 0
set signAlwaysShown 1
begin
format output breaks
// C, J and K do not support this feature
42 +42 CJK
0 +0 CJK
-42 -42
test parse strict with plus sign
set locale en
set pattern 0
set signAlwaysShown 1
begin
lenient parse output breaks
1 42 42
1 -42 -42
1 +42 42 CJK
1 0 0
1 +0 0 CJK
0 42 fail CJK
0 -42 -42
0 +42 42 CJK
0 0 fail CJK
0 +0 0 CJK