ICU-20511 Merge branch 'master' into 64-merge-branch

This commit is contained in:
Shane F. Carr 2019-04-10 19:09:52 -07:00 committed by Shane Carr
commit a268e3a299
140 changed files with 7648 additions and 4184 deletions

View file

@ -94,9 +94,12 @@ jobs:
- visualstudio
- Cmd
steps:
- powershell: 'Invoke-WebRequest https://www.python.org/ftp/python/3.7.1/python-3.7.1-amd64-webinstall.exe -OutFile c:\py3-setup.exe'
- powershell: 'Invoke-WebRequest https://www.python.org/ftp/python/3.7.2/python-3.7.2-amd64-webinstall.exe -OutFile c:\py3-setup.exe'
- script: |
c:\py3-setup.exe /quiet PrependPath=1 InstallAllUsers=1 Include_launcher=1 InstallLauncherAllUsers=1 Include_test=0 Include_doc=0 Include_dev=0 Include_debug=0 Include_tcltk=0 TargetDir=c:\py3
- script: |
@echo ##vso[task.prependpath]C:\py3
@echo ##vso[task.prependpath]C:\py3\Scripts
- script: |
python --version
py -3 --version

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 53 KiB

View file

@ -0,0 +1,74 @@
<!--
© 2019 and later: Unicode, Inc. and others.
License & terms of use: http://www.unicode.org/copyright.html
-->
FormattedValue
==============
FormattedValue is an abstraction for localized strings with attributes
returned by a number of ICU formatters. APIs for FormattedValue are available
in Java, C++, and C. For more details and a list of all implementing classes,
refer to the API docs:
- [C++ FormattedValue](http://icu-project.org/apiref/icu4c/classicu_1_1FormattedValue.html)
- [C UFormattedValue](http://icu-project.org/apiref/icu4c/globals_u.html) -- search for "resultAsValue"
- [Java FormattedValue](http://www.icu-project.org/apiref/icu4j/com/ibm/icu/text/FormattedValue.html)
## Nested Span Fields
Certain ICU formatters, like FormattedList and FormattedDateInterval, use
*span fields* to return information about which spans of a string correspond
to different input parameters. In C and C++, span fields are implemented
using a field category, with the field being set to the input index; in Java,
they are implemented by associating an Integer value with a SpanField
subclass.
For example, in C++, here is how you can determine which region in a formatted
date interval corresponds to the 2nd argument (index 1) in the input date
interval (the "to" date):
```cpp
// Let fmt be a DateIntervalFormat for locale en-US and skeleton dMMMMy
// Let input1 be July 20, 2018 and input2 be August 3, 2018:
FormattedDateInterval result = fmt->formatToValue(*input1, *input2, status);
assertEquals("Expected output from format",
u"July 20 \u2013 August 3, 2018", result.toString(status));
ConstrainedFieldPosition cfpos;
cfpos.constrainField(UFIELD_CATEGORY_DATE_INTERVAL_SPAN, 0);
if (result.nextPosition(cfpos, status)) {
assertEquals("Expect start index", 0, cfpos.getStart());
assertEquals("Expect end index", 7, cfpos.getLimit());
} else {
// No such span: can happen if input dates are equal.
}
assertFalse("No more than one occurrence of the field",
result.nextPosition(cfpos, status));
```
In C, the code looks very similar, except you use the equivalent C types.
In Java, use the `constrainFieldAndValue` method:
```java
// Let fmt be a DateIntervalFormat for locale en-US and skeleton dMMMMy
// Let input1 be July 20, 2018 and input2 be August 3, 2018:
FormattedDateInterval result = fmt.formatToValue(input1, input2);
assertEquals("Expected output from format",
"July 20 \u2013 August 3, 2018", result.toString());
ConstrainedFieldPosition cfpos = new ConstrainedFieldPosition();
cfpos.constrainFieldAndValue(DateIntervalFormat.SpanField.DATE_INTERVAL_SPAN, 0);
if (result.nextPosition(cfpos)) {
assertEquals("Expect start index", 0, cfpos.getStart());
assertEquals("Expect end index", 7, cfpos.getLimit());
} else {
// No such span: can happen if input dates are equal.
}
assertFalse("No more than one occurrence of the field",
result.nextPosition(cfpos));
```
A span may cover multiple primitive fields; in the above example, the span
contains both a month and a date. Using FormattedValue, those primitive
fields will also be present, and you can check their start and end indices to
see if they are contained within a desired span.

View file

@ -0,0 +1,302 @@
<!--
© 2019 and later: Unicode, Inc. and others.
License & terms of use: http://www.unicode.org/copyright.html
-->
Number Skeletons
================
Number skeletons are a locale-agnostic way to configure a NumberFormatter in
ICU. Number skeletons work in MessageFormat.
Number skeletons consist of *space-separated tokens* that correspond to
settings in ICU NumberFormatter. For example, to format a currency in compact
notation, you could use this skeleton:
compact-short currency/GBP
To use a skeleton in MessageFormat, use the "number" type and prefix the
skeleton with `::`
{0, number, ::compact-short currency/GBP}
## Syntax
A token consists of a *stem* and zero or more *options*. The stem is what
occurs before the first "/" character in a token, and the options are each of
the subsequent "/"-delimited strings. For example, "compact-short" and
"currency" are stems, and "GBP" is an option.
Stems might also be dynamic strings (not a fixed list); these are called
*blueprint stems*. For example, to format a number with 2-3 significant
digits, you could use the following stem:
@@#
A few examples of number skeletons are shown below. The list of available
stems and options can be found below in [Skeleton Stems and
Options](#skeleton-stems-and-options).
## Examples
| Skeleton | Input | en-US Output | Comments |
|---|---|---|---|
| `percent` | 25 | 25% |
| `.00` | 25 | 25.00 | Equivalent to Precision::fixedFraction(2) |
| `percent .00` | 25 | 25.00% |
| `scale/100` | 0.3 | 30 | Multiply by 100 before formatting |
| `percent scale/100` | 0.3 | 30% |
| `measure-unit/length-meter` | 5 | 5 m | UnitWidth defaults to Short |
| `measure-unit/length-meter` <br/> `unit-width-full-name` | 5 | 5 meters |
| `currency/CAD` | 10 | CA$10.00 |
| `currency/CAD` <br/> `unit-width-narrow` | 10 | $10.00 | Use the narrow symbol variant |
| `compact-short` | 5000 | 5K |
| `compact-long` | 5000 | 5 thousand |
| `compact-short` <br/> `currency/CAD` | 5000 | CA$5K |
| - | 5000 | 5,000 |
| `group-min2` | 5000 | 5000 | Require 2 digits in group for separator |
| `group-min2` | 15000 | 15,000 |
| `sign-always` | 60 | +60 | Show sign on all numbers |
| `sign-always` | 0 | +0 |
| `sign-except-zero` | 60 | +60 | Show sign on all numbers except 0 |
| `sign-except-zero` | 0 | 0 |
| `sign-accounting` <br/> `currency/CAD` | -40 | (CA$40.00) |
## Skeleton Stems and Options
The full set of features supported by number skeletons is listed by category
below.
### Notation
Use one of the following stems to select your notation style:
- `compact-short`
- `compact-long`
- `scientific`
- `engineering`
- `notation-simple`
The skeletons `scientific` and `engineering` take the following optional
options:
- `/sign-xxx` sets the sign display option for the exponent; see [Sign](#sign).
- `/+ee` sets exponent digits to "at least 2"; use `/+eee` for at least 3 digits, etc.
For example, all of the following skeletons are valid:
- `scientific`
- `scientific/sign-always`
- `scientific/+ee`
- `scientific/+ee/sign-always`
### Unit
The supported types of units are percent, currency, and measurement units.
The following skeleton tokens are accepted:
- `percent`
- `permille`
- `base-unit`
- `currency/XXX`
- `measure-unit/aaaa-bbbb`
The `percent`, `permille`, and `base-unit` stems do not take any options.
The `currency` stem takes one required option: the three-letter ISO code of
the currency to be formatted.
The `measure-unit` stem takes one required option: the unit identifier of the
unit to be formatted. The full unit identifier is required: both the type and
the subtype (for example, `length-meter`).
### Per Unit
To specify a unit to put in the denominator, use the following skeleton token:
- `per-measure-unit/aaaa-bbbb`
As with the `measure-unit` stem, pass the unit identifier as the option.
### Unit Width
The unit width can be specified by the following stems:
- `unit-width-narrow`
- `unit-width-short`
- `unit-width-full-name`
- `unit-width-iso-code`
- `unit-width-hidden`
For more details, see
[UNumberUnitWidth](http://icu-project.org/apiref/icu4c/unumberformatter_8h.html).
### Precision
The precision category has more blueprint stems than most other categories;
they are documented in detail below. The following non-blueprint stems are
accepted:
- `precision-integer` (round to the nearest integer) --- accepts fraction-precision options
- `precision-unlimited` (do not perform rounding; display all digits)
- `precision-increment/dddd` (round to *dddd*, a decimal number) --- see below
- `precision-currency-standard`
- `precision-currency-cash`
To round to the nearest nickel, for example, use the skeleton
`precision-increment/0.05`. For more information on the decimal number
syntax, see [Scale](#scale).
#### Fraction Precision
The following are examples of fraction-precision stems:
| Stem | Explanation | Equivalent C++ Code |
|---|---|---|
| `.00` | Exactly 2 fraction digits | `Precision::fixedFraction(2) ` |
| `.00+` | At least 2 fraction digits | `Precision::minFraction(2)` |
| `.##` | At most 2 fraction digits | `Precision::maxFraction(2) ` |
| `.0#` | Between 1 and 2 fraction digits | `Precision::minMaxFraction(1, 2)` |
More precisely, the fraction precision stem starts with `.`, then contains
zero or more `0` symbols, which implies the minimum fraction digits. Then it
contains either a `+`, for unlimited maximum fraction digits, or zero or more
`#` symbols, which implies the minimum fraction digits when added to the `0`
symbols.
Note that the stem `.` is considered valid and is equivalent to `precision-integer`.
Fraction-precision stems accept a single optional option: the minimum or
maximum number of significant digits. This allows you to combine fraction
precision with certain significant digits capabilities. The following are
examples:
| Skeleton | Explanation | Equivalent C++ Code |
|---|---|---|
| `.##/@@@+` | At most 2 fraction digits, but guarantee <br/> at least 3 significant digits | `Precision::maxFraction(2)` <br/> `.withMinDigits(3)` |
| `.00/@##` | Exactly 2 fraction digits, but do not <br/> display more than 3 significant digits | `Precision::fixedFraction(2)` <br/> `.withMaxDigits(3)` |
Precisely, the option starts with one or more `@` symbols. Then it contains
either a `+`, for `::withMinDigits`, or one or more `#` symbols, for
`::withMaxDigits`. If a `#` symbol is present, there must be only one `@`
symbol.
#### Significant Digits Precision
The following are examples of stems for significant figures:
| Stem | Explanation | Equivalent C++ Code|
|---|---|---|
| `@@@` | Exactly 3 significant digits | `Precision::fixedSignificantDigits(3)` |
| `@@@+` | At least 3 significant digits | `Precision::minSignificantDigits(3)` |
| `@##` | At most 3 significant digits | `Precision::maxSignificantDigits(3)` |
| `@@#` | Between 2 and 3 significant digits | `...::minMaxSignificantDigits(2, 3)` |
The precise syntax is very similar to fraction precision. The blueprint stem
starts with one or more `@` symbols, which implies the minimum significant
digits. Then it contains either a `+`, for unlimited maximum significant
digits, or zero or more `#` symbols, which implies the minimum significant
digits when added to the `@` symbols.
### Rounding Mode
The rounding mode can be specified by the following stems:
- `rounding-mode-ceiling`
- `rounding-mode-floor`
- `rounding-mode-down`
- `rounding-mode-up`
- `rounding-mode-half-even`
- `rounding-mode-half-down`
- `rounding-mode-half-up`
- `rounding-mode-unnecessary`
For more details, see [Rounding
Modes](http://userguide.icu-project.org/formatparse/numbers/rounding-modes).
### Integer Width
The following examples show how to specify integer width (minimum or maximum
integer digits):
| Token | Explanation | Equivalent C++ Code |
|---|---|---|
| `integer-width/+000` | At least 3 <br/> integer digits | `IntegerWidth::zeroFillTo(3)` |
| `integer-width/##0` | Between 1 and 3 <br/> integer digits | `IntegerWidth::zeroFillTo(1)` <br/> `.truncateAt(3)`
| `integer-width/00` | Exactly 2 <br/> integer digits | `IntegerWidth::zeroFillTo(2)` <br/> `.truncateAt(2)` |
| `integer-width/+` | Zero or more <br/> integer digits | `IntegerWidth::zeroFillTo(0) `
The option start with either a single `+` symbols, signaling no limit on the
number of integer digits (no *truncateAt*), or zero or more `#` symbols. It
should then be followed by zero or more `0` symbols, indicating the minimum
integer digits (the argument to *zeroFillTo*). If there is no `+` symbol, the
maximum integer digits (the argument to *truncateAt*) is the number of `#`
symbols plus the number of `0` symbols.
### Scale
To specify the scale, use the following stem and option:
- `scale/dddd`
where *dddd* is a decimal number. For example, the following are valid
skeletons:
- `scale/100` (multiply by 100)
- `scale/1E2` (same as above)
- `scale/0.5` (multiply by 0.5)
The decimal number should conform to a standard decimal number syntax. In
C++, it is parsed using the decimal number library described in
[LocalizedNumberFormatter::formatDecimal](http://icu-project.org/apiref/icu4c/classicu_1_1number_1_1LocalizedNumberFormatter.html).
In Java, it is parsed using
[BigDecimal](https://docs.oracle.com/javase/7/docs/api/java/math/BigDecimal.html#BigDecimal%28java.lang.String%29).
For maximum compatibility, it is highly recommended that your decimal number
is able to be parsed by both engines.
### Grouping
The grouping strategy can be specified by the following stems:
- `group-off`
- `group-min2`
- `group-auto`
- `group-on-aligned`
- `group-thousands`
For more details, see
[UNumberGroupingStrategy](http://icu-project.org/apiref/icu4c/unumberformatter_8h.html).
### Symbols
The following stems are allowed for specifying the number symbols:
- `latin` (use Latin-script digits)
- `numbering-system/nnnn` (use the `nnnn` numbering system)
A custom NDecimalFormatSymbols instance is not supported at this time.
### Sign Display
The following stems specify sign display:
- `sign-auto`
- `sign-always`
- `sign-never`
- `sign-accounting`
- `sign-accounting-always`
- `sign-except-zero`
- `sign-accounting-except-zero`
For more details, see
[UNumberSignDisplay](http://icu-project.org/apiref/icu4c/unumberformatter_8h.html).
### Decimal Separator Display
The following stems specify decimal separator display:
- `decimal-auto`
- `decimal-always`
For more details, see
[UNumberDecimalSeparatorDisplay](http://icu-project.org/apiref/icu4c/unumberformatter_8h.html).

View file

@ -0,0 +1,633 @@
<!--
© 2019 and later: Unicode, Inc. and others.
License & terms of use: http://www.unicode.org/copyright.html
-->
ICU Data Build Tool
===================
ICU 64 provides a tool for configuring your ICU locale data file with finer
granularity. This page explains how to use this tool to customize and reduce
your data file size.
## Overview: What is in the ICU data file?
There are hundreds of **locales** supported in ICU (including script and
region variants), and ICU supports many different **features**. For each
locale and for each feature, data is stored in one or more data files.
Those data files are compiled and then bundled into a `.dat` file called
something like `icudt64l.dat`, which is little-endian data for ICU 64. This
dat file is packaged into the `libicudata.so` on Linux or `libicudata.dll.a`
on Windows. In ICU4J, it is bundled into a jar file named `icudata.jar`.
At a high level, the size of the ICU data file corresponds to the
cross-product of locales and features, except that not all features require
locale-specific data, and not all locales require data for all features. The
data file contents can be approximately visualized like this:
<img alt="Features vs. Locales" src="../assets/features_locales.svg" style="max-width:600px" />
The `icudt64l.dat` file is 27 MiB uncompressed and 11 MiB gzipped. This file
size is too large for certain use cases, such as bundling the data file into a
smartphone app or an embedded device. This is something the ICU Data Build
Tool aims to solve.
## ICU Data Configuration File
The ICU Data Build Tool enables you to write a configuration file that
specifies what features and locales to include in a custom data bundle.
The configuration file may be written in either [JSON](http://json.org/) or
[Hjson](https://hjson.org/). To build ICU4C with custom data, set the
`ICU_DATA_FILTER_FILE` environment variable when running `runConfigureICU` on
Unix or when building the data package on Windows. For example:
ICU_DATA_FILTER_FILE=filters.json path/to/icu4c/source/runConfigureICU Linux
The ICU Data Build Tool will work out of the box with a default Python
installation. In order to use Hjson syntax, the `hjson` pip module must be
installed on your system. You should also consider installing the
`jsonschema` module to print messages when errors are found in your config
file.
$ pip3 install --user hjson jsonschema
To build ICU4J with custom data, you must first build ICU4C with custom data
and then generate the JAR file. For more information, read
[icu4j-readme.txt](https://github.com/unicode-org/icu/blob/master/icu4c/source/data/icu4j-readme.txt).
### Locale Slicing
The simplest way to slice ICU data is by locale. The ICU Data Build Tool
makes it easy to select your desired locales to suit a number of use cases.
#### Filtering by Language Only
Here is a *filters.json* file that builds ICU data with support for English,
Chinese, and German, including *all* script and regional variants for those
languages:
{
"localeFilter": {
"filterType": "language",
"whitelist": [
"en",
"de",
"zh"
]
}
}
The *filterType* "language" only supports slicing by entire languages.
#### Filtering by Locale
For more control, use *filterType* "locale". Here is a *filters.hjson* file that
includes the same three languages as above, including regional variants, but
only the default script (e.g., Simplified Han for Chinese):
localeFilter: {
filterType: locale
whitelist: [
en
de
zh
]
}
#### Adding Script Variants (includeScripts = true)
You may set the *includeScripts* option to true to include all scripts for a
language while using *filterType* "locale". This results in behavior similar
to *filterType* "language". In the following JSON example, all scripts for
Chinese are included:
{
"localeFilter": {
"filterType": "locale",
"includeScripts": true,
"whitelist": [
"en",
"de",
"zh"
]
}
}
If you wish to explicitly list the scripts, you may put the script code in the
locale tag in the whitelist, and you do not need the *includeScripts* option
enabled. For example, in Hjson, to include Han Traditional ***but not Han
Simplified***:
localeFilter: {
filterType: locale
whitelist: [
en
de
zh_Hant
]
}
Note: the option *includeScripts* is only supported at the language level;
i.e., in order to include all scripts for a particular language, you must
specify the language alone, without a region tag.
#### Removing Regional Variants (includeChildren = false)
If you wish to enumerate exactly which regional variants you wish to support,
you may use *filterType* "locale" with the *includeChildren* setting turned to
false. The following *filters.hjson* file includes English (US), English
(UK), German (Germany), and Chinese (China, Han Simplified), as well as their
dependencies, *but not* other regional variants like English (Australia),
German (Switzerland), or Chinese (Taiwan, Han Traditional):
localeFilter: {
filterType: locale
includeChildren: false
whitelist: [
en_US
en_GB
de_DE
zh_CN
]
}
Including dependencies, the above filter would include the following data files:
- root.txt
- en.txt
- en_US.txt
- en_001.txt
- en_GB.txt
- de.txt
- de_DE.txt
- zh.txt
- zh_Hans.txt
- zh_Hans_CN.txt
- zh_CN.txt
### File Slicing (coarse-grained features)
ICU provides a lot of features, of which you probably need only a small subset
for your application. Feature slicing is a powerful way to prune out data for
any features you are not using.
***CAUTION:*** When slicing by features, you must manually include all
dependencies. For example, if you are formatting dates, you must include not
only the date formatting data but also the number formatting data, since dates
contain numbers. Expect to spend a fair bit of time debugging your feature
filter to get it to work the way you expect it to.
The data for many ICU features live in individual files. The ICU Data Build
Tool puts puts similar *types* of files into categories. The following table
summarizes the ICU data files and their corresponding features and categories:
| Feature | Category ID(s) | Data Files <br/> ([icu4c/source/data](https://github.com/unicode-org/icu/tree/master/icu4c/source/data)) | Resource Size <br/> (as of ICU 64) |
|---|---|---|---|
| Break Iteration | `"brkitr_rules"` <br/> `"brkitr_dictionaries"` <br/> `"brkitr_tree"` | brkitr/rules/\*.txt <br/> brkitr/dictionaries/\*.txt <br/> brkitr/\*.txt | 522 KiB <br/> **2.8 MiB** <br/> 14 KiB |
| Charset Conversion | `"conversion_mappings"` | mappings/\*.ucm | **4.9 MiB** |
| Collation <br/> *[more info](#collation-ucadata)* | `"coll_ucadata"` <br/> `"coll_tree"` | in/coll/ucadata-\*.icu <br/> coll/\*.txt | 511 KiB <br/> **2.8 MiB** |
| Confusables | `"confusables"` | unidata/confusables\*.txt | 45 KiB |
| Currencies | `"misc"` <br/> `"curr_supplemental"` <br/> `"curr_tree"` | misc/currencyNumericCodes.txt <br/> curr/supplementalData.txt <br/> curr/\*.txt | 3.1 KiB <br/> 27 KiB <br/> **2.5 MiB** |
| Language Display <br/> Names | `"lang_tree"` | lang/\*.txt | **2.1 MiB** |
| Language Tags | `"misc"` | misc/keyTypeData.txt <br/> misc/langInfo.txt <br/> misc/likelySubtags.txt <br/> misc/metadata.txt | 6.8 KiB <br/> 37 KiB <br/> 53 KiB <br/> 33 KiB |
| Normalization | `"normalization"` | in/\*.nrm except in/nfc.nrm | 160 KiB |
| Plural Rules | `"misc"` | misc/pluralRanges.txt <br/> misc/plurals.txt | 3.3 KiB <br/> 33 KiB |
| Region Display <br/> Names | `"region_tree"` | region/\*.txt | **1.1 MiB** |
| Rule-Based <br/> Number Formatting <br/> (Spellout, Ordinals) | `"rbnf_tree"` | rbnf/\*.txt | 538 KiB |
| StringPrep | `"stringprep"` | sprep/\*.txt | 193 KiB |
| Time Zones | `"misc"` <br/> `"zone_tree"` | misc/metaZones.txt <br/> misc/timezoneTypes.txt <br/> misc/windowsZones.txt <br/> misc/zoneinfo64.txt <br/> zone/\*.txt | 41 KiB <br/> 20 KiB <br/> 22 KiB <br/> 151 KiB <br/> **2.7 MiB** |
| Transliteration | `"translit"` | translit/\*.txt | 685 KiB |
| Unicode Character <br/> Names | `"unames"` | in/unames.icu | 269 KiB |
| Unicode Text Layout | `"ulayout"` | in/ulayout.icu | 14 KiB |
| Units | `"unit_tree"` | unit/\*.txt | **1.7 MiB** |
| **OTHER** | `"cnvalias"` <br/> `"misc"` <br/> `"locales_tree"` | mappings/convrtrs.txt <br/> misc/dayPeriods.txt <br/> misc/genderList.txt <br/> misc/numberingSystems.txt <br/> misc/supplementalData.txt <br/> locales/\*.txt | 63 KiB <br/> 19 KiB <br/> 0.5 KiB <br/> 5.6 KiB <br/> 228 KiB <br/> **2.4 MiB** |
#### Filter Types
You may list *filters* for each category in the *featureFilters* section of
your config file. What follows are examples of the possible types of filters.
##### Exclusion Filter
To exclude an entire category, use *filterType* "exclude". For example, to
exclude all confusables data:
featureFilters: {
confusables: {
filterType: exclude
}
}
##### File Name Filter
To exclude certain files out of a category, use the file name filter, which is
the default type of filter when *filterType* is not specified. For example,
to include the Burmese break iteration dictionary but not any other
dictionaries:
featureFilters: {
brkitr_dictionaries: {
whitelist: [
burmesedict
]
}
}
Do *not* include directories or file extensions. They will be added
automatically for you. Note that all files in a particular category have the
same directory and extension.
You can use either a whitelist or a blacklist for the file name filter.
##### Regex Filter
To exclude filenames matching a certain regular expression, use *filterType*
"regex". For example, to reject the CJK-specific break iteration rules:
featureFilters: {
brkitr_rules: {
filterType: regex
blacklist: [
^.*_cj$
]
}
}
The Python standard library [*re*
module](https://docs.python.org/3/library/re.html) is used for evaluating the
regular expressions. In case the regular expression engine is changed in the
future, however, you are encouraged to restrict yourself to a simple set of
regular expression operators.
As above, do not include directories or file extensions, and you can use
either a whitelist or a blacklist.
##### Union Filter
You can combine the results of multiple filters with *filterType* "union".
This filter matches files that match *at least one* of the provided filters.
The syntax is:
{
filterType: union
unionOf: [
{ /* filter 1 */ },
{ /* filter 2 */ },
// ...
]
}
This filter type is useful for combining "locale" filters with different
includeScripts or includeChildren options.
#### Locale-Tree Categories
Several categories have the `_tree` suffix. These categories are for "locale
trees": they contain locale-specific data. ***The [localeFilter configuration
option](#slicing-data-by-locale) sets the default file filter for all `_tree`
categories.***
If you want to include different locales for different locale file trees, you
can override their filter in the *featureFilters* section of the config file.
For example, to include only Italian data for currency symbols *instead of*
the common locales specified in *localeFilter*, you can do the following:
featureFilters:
curr_tree: {
filterType: locale
whitelist: [
it
]
}
}
You can exclude an entire `_tree` category without affecting other categories.
For example, to exclude region display names:
featureFilters: {
region_tree: {
filterType: exclude
}
}
Note that you are able to use any of the other filter types for `_tree`
categories, but you must be very careful that you are including all of the
correct files. For example, `en_GB` requires `en_001`, and you must always
include `root`. If you use the "language" or "locale" filter types, this
logic is done for you.
### Resource Bundle Slicing (fine-grained features)
The third section of the ICU filter config file is *resourceFilters*. With
this section, you can dive inside resource bundle files to remove even more
data.
You can apply resource filters to all locale tree categories as well as to
categories that include resource bundles, such as the `"misc"` category.
For example, consider measurement units. There is one unit file per locale (example:
[en.txt](https://github.com/unicode-org/icu/blob/master/icu4c/source/data/unit/en.txt)),
and that file contains data for all measurement units in CLDR. However, if
you are only formatting distances, for example, you may need the data for only
a small set of units.
Here is how you could include units of length in the "short" style but no
other units:
resourceFilters: [
{
categories: [
unit_tree
]
rules: [
-/units
-/unitsNarrow
-/unitsShort
+/unitsShort/length
]
}
]
Conceptually, the rules are applied from top to bottom. First, all data for
all three styes of units are removed, and then the short length units are
added back.
#### Wildcard Character
You can use the wildcard character (`*`) to match a piece of the resource
path. For example, to include length units for all three styles, you can do:
resourceFilters: [
{
categories: [
unit_tree
]
rules: [
-/units
-/unitsNarrow
-/unitsShort
+/*/length
]
}
]
The wildcard must be the only character in its path segment. Future ICU
versions may expand the syntax.
#### Resource Filter for Specific File
The resource filter object takes an optional *files* setting which accepts a
file filter in the same syntax used above for file filtering. For example, if
you wanted to apply a filter to misc/supplementalData.txt, you could do the
following (this example removes calendar data):
resourceFilters: [
{
categories: ["misc"]
files: {
whitelist: ["supplementalData"]
}
rules: [
-/calendarData
]
}
]
#### Combining Multiple Resource Filter Specs
You can also list multiple resource filter objects in the *resourceFilters*
array; the filters are added from top to bottom. For example, here is an
advanced configuration that includes "mile" for en-US and "kilometer" for
en-CA; this also makes use of the *files* option:
resourceFilters: [
{
categories: ["unit_tree"]
rules: [
-/units
-/unitsNarrow
-/unitsShort
]
},
{
categories: ["unit_tree"]
files: {
filterType: locale
whitelist: ["en_US"]
}
rules: [
+/*/length/mile
]
},
{
categories: ["unit_tree"]
files: {
filterType: locale
whitelist: ["en_CA"]
}
rules: [
+/*/length/kilometer
]
}
]
The above example would give en-US these resource filter rules:
-/units
-/unitsNarrow
-/unitsShort
+/*/length/mile
and en-CA these resource filter rules:
-/units
-/unitsNarrow
-/unitsShort
+/*/length/kilometer
In accordance with *filterType* "locale", the parent locales *en* and *root*
would get both units; this is required since both en-US and en-CA may inherit
from the parent locale:
-/units
-/unitsNarrow
-/unitsShort
+/*/length/mile
+/*/length/kilometer
## Debugging Tips
**Run Python directly:** If you do not want to wait for ./runConfigureICU to
finish, you can directly re-generate the rules using your filter file with the
following command line run from *iuc4c/source/data*.
$ python3 -m buildtool --mode=gnumake --seqmode=parallel --filter_file=filters.json > rules.mk
**Install jsonschema:** Install the `jsonschema` pip package to get warnings
about problems with your filter file.
**Inspect data/rules.mk:** The Python script outputs the file *rules.mk*
inside *iuc4c/source/data*. To see what is going to get built, you can inspect
that file. First build ICU normally, and copy *rules.mk* to
*rules_default.mk*. Then build ICU with your filter file. Now you can take the
diff between *rules_default.mk* and *rules.mk* to see exactly what your filter
file is removing.
**Inspect the output:** After a `make clean` and `make` with a new *rules.mk*,
you can look inside the directory *icu4c/source/data/out* to see the files
that got built.
**Inspect the compiled resource filter rules:** If you are using a resource
filter, the resource filter rules get compiled for each individual locale
inside *icu4c/source/data/out/tmp/filters*. You can look at those files to see
what filter rules are being applied to each individual locale.
**Run genrb in verbose mode:** For debugging a resource filter, you can run
genrb in verbose mode to see which resources got stripped. To do this, first
inspect the make output and find a command line like this:
LD_LIBRARY_PATH=../lib:../stubdata:../tools/ctestfw:$LD_LIBRARY_PATH ../bin/genrb --filterDir ./out/tmp/filters/unit_tree -s ./unit -d ./out/build/icudt64l/unit/ -i ./out/build/icudt64l --usePoolBundle ./out/build/icudt64l/unit/ -k en.txt
Copy that command line and re-run it from *icu4c/source/data* with the `-v`
flag added to the end. The command will print out exactly which resource paths
are being included and excluded as well as a model of the filter rules applied
to this file.
**Inspect .res files with derb:** The `derb` tool can convert .res files back
to .txt files after filtering. For example, to convert the above unit res file
back to a txt file, you can run this command from *icu4c/source*:
LD_LIBRARY_PATH=lib bin/derb data/out/build/icudt64l/unit/en.res
That will produce a file *en.txt* in your current directory, which is the
original *data/unit/en.txt* but after resource filters were applied.
**Put complex rules first** and **use the wildcard `*` sparingly:** The order
of the filter rules matters a great deal in how effective your data size
reduction can be, and the wildcard `*` can sometimes produce behavior that is
tricky to reason about. For example, these three lists of filter rules look
similar on first glance but acutally produce different output:
<table>
<tr>
<th>Unit Resource Filter Rules</th>
<th>Unit Resource Size</th>
<th>Commentary</th>
<th>Result</th>
</tr>
<tr><td><pre>
-/*/*
+/*/digital
-/*/digital/*/dnam
-/durationUnits
-/units
-/unitsNarrow
</pre></td><td>77 KiB</td><td>
First, remove all unit types. Then, add back digital units across all unit
widths. Then, remove display names from digital units. Then, remove duration
unit patterns and long and narrow forms.
</td><td>
Digital units in short form are included; all other units are removed.
</td></tr>
<tr><td><pre>
-/durationUnits
-/units
-/unitsNarrow
-/*/*
+/*/digital
-/*/digital/*/dnam
</pre></td><td>125 KiB</td><td>
First, remove duration unit patterns and long and narrow forms. Then, remove
all unit types. Then, add back digital units across all unit widths. Then,
remove display names from digital units.
</td><td>
Digital units are included <em>in all widths</em>; all other units are removed.
</td></tr>
<tr><td><pre>
-/*/*
+/*/digital
-/*/*/*/dnam
-/durationUnits
-/units
-/unitsNarrow
</pre></td><td>191 KiB</td><td>
First, remove all unit types. Then, add back digital units across all unit
widths. Then, remove display names from all units. Then, remove duration unit
patterns and long and narrow forms.
</td><td>
Digital units in short form are included, as is the <em>tree structure</em>
for all other units, even though the other units have no real data.
</td></tr>
</table>
By design, empty tree structure is retained in the unit bundle. This is
because there are numerous instances in ICU data where the presence of an
empty tree carries meaning. However, it means that you must be careful when
building resource filter rules in order to achieve the optimal data bundle
size.
Using the `-v` option in genrb (described above) is helpful when debugging
these types of issues.
## Other Features of the ICU Data Build Tool
While data filtering is the primary reason the ICU Data Build Tool was
developed, there are there are additional use cases.
### Running Data Build without Configure/Make
You can build the dat file outside of the ICU build system by directly
invoking the Python buildtool. Run the following command to see the help text
for the CLI tool:
$ PYTHONPATH=path/to/icu4c/source/data python3 -m buildtool --help
### Collation UCAData
For using collation (sorting and searching) in any language, the "root"
collation data file must be included. It provides the Unicode CLDR default
sort order for all code points, and forms the basis for language-specific
tailorings as well as for custom collators built at runtime.
There are two versions of the root collation data file:
- ucadata-unihan.txt (compiled size: 511 KiB)
- ucadata-implicithan.txt (compiled size: 178 KiB)
The unihan version sorts Han characters in radical-stroke order according to
Unicode, which is a somewhat useful default sort order, especially for use
with non-CJK languages. The implicithan version sorts Han characters in the
order of their Unicode assignment, which is similar to radical-stroke order
for common characters but arbitrary for others. For more information, see
[UTS #10 §10.1.3](https://www.unicode.org/reports/tr10/#Implicit_Weights).
By default, the unihan version is used. The unihan version of the data file
is much larger than that for implicithan, so if you need collation but also
small data, then you may want to select the implicithan version. To use the
implicithan version, put the following setting in your *filters.json* file:
{
"collationUCAData": "implicithan"
}
### File Substitution
Using the configuration file, you can perform whole-file substitutions. For
example, suppose you want to replace the transliteration rules for
*Zawgyi_my*. You could create a directory called `my_icu_substitutions`
containing your new `Zawgyi_my.txt` rule file, and then put this in your
configuration file:
fileReplacements: {
directory: "/path/to/my_icu_substitutions"
replacements: [
{
src: "Zawgyi_my.txt"
dest: "translit/Zawgyi_my.txt"
}
]
}
Whole-file substitution happens before all other filters are applied.

View file

@ -13,7 +13,7 @@
# (bring up Powershell ISE)
# cd C:\icu\icu4c\
# Set-ExecutionPolicy -Scope Process Unrestricted
# .\packaging\distrelease.ps1
# .\packaging\distrelease.ps1 -arch "x64 or x86"
#
# Will emit: c:\icu4c\icu\source\dist\icu-windows.zip
#
@ -22,6 +22,9 @@
# see https://docs.microsoft.com/powershell/module/microsoft.powershell.core/about/about_execution_policies?view=powershell-5.1&viewFallbackFrom=powershell-Microsoft.PowerShell.Core
# for more about execution policies.
Param(
[string]$arch = "x64" # use x64 as default
)
$icuDir = Split-Path -Path $MyInvocation.MyCommand.Definition -Parent
$icuDir = Resolve-Path -Path '$icuDir\..'
@ -35,11 +38,25 @@ Get-ChildItem -Path $source -ErrorAction SilentlyContinue | Remove-Item -Recurse
New-Item -Path $source -ItemType "directory" -ErrorAction SilentlyContinue
# copy required stuff
Copy-Item -Path "$icuDir\lib" -Destination $source -Recurse
Copy-Item -Path "$icuDir\lib64" -Destination $source -Recurse
if ($arch -eq "x64")
{
Copy-Item -Path "$icuDir\lib64" -Destination $source -Recurse
Copy-Item -Path "$icuDir\bin64" -Destination $source -Recurse
}
elseif ($arch -eq "x86")
{
Copy-Item -Path "$icuDir\lib" -Destination $source -Recurse
Copy-Item -Path "$icuDir\bin" -Destination $source -Recurse
}
else
{
$filename = $MyInvocation.MyCommand.Name;
echo "Invalid architecture."
echo "Usage: $filename -arch `"x64 or x86`""
exit
}
Copy-Item -Path "$icuDir\include" -Destination $source -Recurse
Copy-Item -Path "$icuDir\bin" -Destination $source -Recurse
Copy-Item -Path "$icuDir\bin64" -Destination $source -Recurse
Copy-Item -Path "$icuDir\APIChangeReport.html" -Destination $source -Recurse
Copy-Item -Path "$icuDir\icu4c.css" -Destination $source -Recurse
Copy-Item -Path "$icuDir\LICENSE" -Destination $source -Recurse

View file

@ -57,18 +57,16 @@ toASCIILower(UChar ch){
inline static UBool
startsWithPrefix(const UChar* src , int32_t srcLength){
UBool startsWithPrefix = TRUE;
if(srcLength < ACE_PREFIX_LENGTH){
return FALSE;
}
for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
if(toASCIILower(src[i]) != ACE_PREFIX[i]){
startsWithPrefix = FALSE;
return FALSE;
}
}
return startsWithPrefix;
return TRUE;
}
@ -441,6 +439,7 @@ _internal_toUnicode(const UChar* src, int32_t srcLength,
for(int32_t j=0; j<srcLength; j++){
if(src[j]> 0x7f){
srcIsASCII = FALSE;
break;
}/*else if(isLDHChar(src[j])==FALSE){
// here we do not assemble surrogates
// since we know that LDH code points

View file

@ -3034,11 +3034,11 @@ public:
* uint16_t * constructor.
* Delegates to UnicodeString(const char16_t *, int32_t).
* @param text UTF-16 string
* @param length string length
* @param textLength string length
* @stable ICU 59
*/
UnicodeString(const uint16_t *text, int32_t length) :
UnicodeString(ConstChar16Ptr(text), length) {}
UnicodeString(const uint16_t *text, int32_t textLength) :
UnicodeString(ConstChar16Ptr(text), textLength) {}
#endif
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
@ -3047,21 +3047,21 @@ public:
* (Only defined if U_SIZEOF_WCHAR_T==2.)
* Delegates to UnicodeString(const char16_t *, int32_t).
* @param text NUL-terminated UTF-16 string
* @param length string length
* @param textLength string length
* @stable ICU 59
*/
UnicodeString(const wchar_t *text, int32_t length) :
UnicodeString(ConstChar16Ptr(text), length) {}
UnicodeString(const wchar_t *text, int32_t textLength) :
UnicodeString(ConstChar16Ptr(text), textLength) {}
#endif
/**
* nullptr_t constructor.
* Effectively the same as the default constructor, makes an empty string object.
* @param text nullptr
* @param length ignored
* @param textLength ignored
* @stable ICU 59
*/
inline UnicodeString(const std::nullptr_t text, int32_t length);
inline UnicodeString(const std::nullptr_t text, int32_t textLength);
/**
* Readonly-aliasing char16_t* constructor.
@ -3266,13 +3266,13 @@ public:
* }
* \endcode
* @param src String using only invariant characters.
* @param length Length of src, or -1 if NUL-terminated.
* @param textLength Length of src, or -1 if NUL-terminated.
* @param inv Signature-distinguishing paramater, use US_INV.
*
* @see US_INV
* @stable ICU 3.2
*/
UnicodeString(const char *src, int32_t length, enum EInvariant inv);
UnicodeString(const char *src, int32_t textLength, enum EInvariant inv);
/**

View file

@ -31,9 +31,9 @@ SHLIB.cc= $(CXX) -dynamiclib -dynamic $(CXXFLAGS) $(LDFLAGS) $(LD_SOOPTIONS)
## Compiler switches to embed a library name and version information
ifeq ($(ENABLE_RPATH),YES)
LD_SONAME = -Wl,-compatibility_version -Wl,$(SO_TARGET_VERSION_MAJOR) -Wl,-current_version -Wl,$(SO_TARGET_VERSION) -install_name $(libdir)/$(notdir $(MIDDLE_SO_TARGET))
LD_SONAME = -Wl,-compatibility_version -Wl,$(SO_TARGET_VERSION_MAJOR) -Wl,-current_version -Wl,$(SO_TARGET_VERSION) -install_name $(libdir)/$(notdir $(MIDDLE_SO_TARGET)) $(PKGDATA_TRAILING_SPACE)
else
LD_SONAME = -Wl,-compatibility_version -Wl,$(SO_TARGET_VERSION_MAJOR) -Wl,-current_version -Wl,$(SO_TARGET_VERSION) -install_name $(notdir $(MIDDLE_SO_TARGET))
LD_SONAME = -Wl,-compatibility_version -Wl,$(SO_TARGET_VERSION_MAJOR) -Wl,-current_version -Wl,$(SO_TARGET_VERSION) -install_name $(notdir $(MIDDLE_SO_TARGET)) $(PKGDATA_TRAILING_SPACE)
endif
## Compiler switch to embed a runtime search path

View file

@ -56,7 +56,7 @@ LD_RPATH_PRE= -R
#LIBRARY_PATH_PREFIX=/usr/lib/lwp:
## Compiler switch to embed a library name
LD_SONAME = -h $(notdir $(MIDDLE_SO_TARGET))
LD_SONAME = -h $(notdir $(MIDDLE_SO_TARGET)) $(PKGDATA_TRAILING_SPACE)
## Shared object suffix
SO= so

View file

@ -25,7 +25,7 @@ LD_RPATH= -R'$$'ORIGIN
LD_RPATH_PRE= -R
## Compiler switch to embed a library name
LD_SONAME = -h $(notdir $(MIDDLE_SO_TARGET))
LD_SONAME = -h $(notdir $(MIDDLE_SO_TARGET)) $(PKGDATA_TRAILING_SPACE)
## Shared library options
LD_SOOPTIONS= -Wl,-Bsymbolic

View file

@ -16,6 +16,7 @@ include $(top_builddir)/icudefs.mk
OUTPUTFILE=pkgdata.inc
MIDDLE_SO_TARGET=
PKGDATA_TRAILING_SPACE=" "
all : clean
@echo GENCCODE_ASSEMBLY_TYPE=$(GENCCODE_ASSEMBLY) >> $(OUTPUTFILE)
@ -36,7 +37,6 @@ all : clean
@echo RANLIB=$(RANLIB) >> $(OUTPUTFILE)
@echo INSTALL_CMD=$(INSTALL-L) >> $(OUTPUTFILE)
clean :
$(RMV) $(OUTPUTFILE)

View file

@ -126,7 +126,7 @@ TESTDATAOUT=$(ICUP)\source\test\testdata\out
#
# TESTDATABLD
# The build directory for test data intermidiate files
# The build directory for test data intermediate files
# (Tests are NOT run from this makefile,
# only the data is put in place.)
TESTDATABLD=$(ICUP)\source\test\testdata\out\build
@ -412,41 +412,34 @@ CLEAN : GODATA
-@erase "region\*.txt"
-@erase "zone\*.res"
-@erase "zone\*.txt"
@cd "$(ICUBLD_PKG)\$(ICUBRK)"
-@erase "*.brk"
-@erase "*.res"
-@erase "*.txt"
-@erase "*.dict"
@cd "$(ICUBLD_PKG)\$(ICUCOL)"
-@erase "*.res"
-@erase "*.txt"
@cd "$(ICUBLD_PKG)\$(ICURBNF)"
-@erase "*.res"
-@erase "*.txt"
@cd "$(ICUBLD_PKG)\$(ICUTRNS)"
-@erase "*.res"
@cd "$(ICUOUT)"
-@erase "*.dat"
@cd "$(ICUTMP)"
-@erase "*.html"
-@erase "*.lst"
-@erase "*.mak"
-@erase "*.obj"
-@erase "*.res"
-@erase "*.timestamp"
@cd "$(TESTDATABLD)"
-@erase "*.cnv"
-@erase "*.icu"
-@erase "*.mak"
-@erase "*.nrm"
-@erase "*.res"
-@erase "*.spp"
-@erase "*.txt"
@cd "$(TESTDATAOUT)"
-@erase "*.dat"
@cd "$(TESTDATAOUT)\testdata"
-@erase "*.typ"
@cd "$(ICUBLD_PKG)"
-@erase "$(ICUBRK)\*.brk"
-@erase "$(ICUBRK)\*.res"
-@erase "$(ICUBRK)\*.txt"
-@erase "$(ICUBRK)\*.dict"
-@erase "$(ICUCOL)\*.res"
-@erase "$(ICUCOL)\*.txt"
-@erase "$(ICURBNF)\*.res"
-@erase "$(ICURBNF)\*.txt"
-@erase "$(ICUTRNS)\*.res"
-@erase "$(ICUOUT)\*.dat"
-@erase "$(ICUTMP)\*.html"
-@erase "$(ICUTMP)\*.lst"
-@erase "$(ICUTMP)\*.mak"
-@erase "$(ICUTMP)\*.obj"
-@erase "$(ICUTMP)\*.res"
-@erase "$(ICUTMP)\*.timestamp"
-@erase "$(TESTDATABLD)\*.cnv"
-@erase "$(TESTDATABLD)\*.icu"
-@erase "$(TESTDATABLD)\*.mak"
-@erase "$(TESTDATABLD)\*.nrm"
-@erase "$(TESTDATABLD)\*.res"
-@erase "$(TESTDATABLD)\*.spp"
-@erase "$(TESTDATABLD)\*.txt"
-@erase "$(TESTDATAOUT)\*.dat"
-@erase "$(TESTDATAOUT)\testdata\*.typ"
-@erase "$(TESTDATAOUT)\testdata\*.res"
-@erase "$(TESTDATAOUT)\testdata\*.txt"
-@erase "$(TESTDATAOUT)\testdata\*.lst"
# DLL version information

File diff suppressed because it is too large Load diff

View file

@ -2086,11 +2086,11 @@ metaZones:table(nofallback){
{
"America_Pacific",
"2018-11-04 10:00",
"2019-03-10 11:00",
"2019-01-20 10:00",
}
{
"Alaska",
"2019-03-10 11:00",
"2019-01-20 10:00",
"9999-12-31 23:59",
}
}

View file

@ -3,17 +3,17 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
//---------------------------------------------------------
// Build tool: tz2icu
// Build date: Tue Feb 19 01:26:22 2019
// Build date: Tue Mar 26 16:57:59 2019
// tz database: ftp://ftp.iana.org/tz/
// tz version: 2018i
// ICU version: 63.1
// tz version: 2019a
// ICU version: 64.1
//---------------------------------------------------------
// >> !!! >> THIS IS A MACHINE-GENERATED FILE << !!! <<
// >> !!! >>> DO NOT EDIT <<< !!! <<
//---------------------------------------------------------
zoneinfo64:table(nofallback) {
TZVersion { "2018i" }
TZVersion { "2019a" }
Zones:array {
/* ACT */ :int { 355 } //Z#0
/* AET */ :int { 367 } //Z#1
@ -786,9 +786,9 @@ zoneinfo64:table(nofallback) {
} //Z#163
/* America/Metlakatla */ :table {
transPre32:intvector { -1, 1069743569, -1, 2106011674 }
trans:intvector { -880207200, -765385200, -21477600, -5756400, 9972000, 25693200, 41421600, 57747600, 73476000, 89197200, 104925600, 120646800, 126698400, 152096400, 162381600, 183546000, 199274400, 215600400, 230724000, 247050000, 262778400, 278499600, 294228000, 309949200, 325677600, 341398800, 357127200, 372848400, 388576800, 404902800, 420026400, 436352400, 1446372000, 1457866800, 1478426400, 1489316400, 1509876000, 1520766000, 1541325600, 1552215600, 1572775200 }
trans:intvector { -880207200, -765385200, -21477600, -5756400, 9972000, 25693200, 41421600, 57747600, 73476000, 89197200, 104925600, 120646800, 126698400, 152096400, 162381600, 183546000, 199274400, 215600400, 230724000, 247050000, 262778400, 278499600, 294228000, 309949200, 325677600, 341398800, 357127200, 372848400, 388576800, 404902800, 420026400, 436352400, 1446372000, 1457866800, 1478426400, 1489316400, 1509876000, 1520766000, 1541325600, 1547978400, 1552215600, 1572775200 }
typeOffsets:intvector { 54822, 0, -32400, 0, -32400, 3600, -31578, 0, -28800, 0, -28800, 3600 }
typeMap:bin { "03040504050405040504050405040504050405040504050405040504050405040504010201020102040201" }
typeMap:bin { "0304050405040504050405040504050405040504050405040504050405040504050401020102010204010201" }
finalRule { "US" }
finalRaw:int { -32400 }
finalYear:int { 2020 }
@ -1374,9 +1374,9 @@ zoneinfo64:table(nofallback) {
} //Z#267
/* Asia/Gaza */ :table {
transPre32:intvector { -1, 2109557424 }
trans:intvector { -933645600, -857358000, -844300800, -825822000, -812685600, -794199600, -779853600, -762656400, -748310400, -731127600, -399088800, -386650800, -368330400, -355114800, -336790800, -323654400, -305168400, -292032000, -273632400, -260496000, -242096400, -228960000, -210560400, -197424000, -178938000, -165801600, -147402000, -134265600, -115866000, -102643200, -84330000, -81313200, 142380000, 150843600, 167176800, 178664400, 482277600, 495579600, 516751200, 526424400, 545436000, 558478800, 576626400, 589323600, 609890400, 620773200, 638316000, 651618000, 669765600, 683672400, 701820000, 715726800, 733701600, 747176400, 765151200, 778021200, 796600800, 810075600, 828655200, 843170400, 860104800, 874620000, 891554400, 906069600, 924213600, 939934800, 956268000, 971989200, 987717600, 1003438800, 1019167200, 1034888400, 1050616800, 1066338000, 1082066400, 1096581600, 1113516000, 1128380400, 1143842400, 1158872400, 1175378400, 1189638000, 1206655200, 1219957200, 1238104800, 1252015200, 1269640860, 1281474000, 1301608860, 1312146000, 1333058400, 1348178400, 1364508000, 1380229200, 1395957600, 1414098000, 1427493600, 1445547600, 1458946800, 1477692000 }
trans:intvector { -933645600, -857358000, -844300800, -825822000, -812685600, -794199600, -779853600, -762656400, -748310400, -731127600, -399088800, -386650800, -368330400, -355114800, -336790800, -323654400, -305168400, -292032000, -273632400, -260496000, -242096400, -228960000, -210560400, -197424000, -178938000, -165801600, -147402000, -134265600, -115866000, -102643200, -84330000, -81313200, 142380000, 150843600, 167176800, 178664400, 334015200, 337644000, 452556000, 462232800, 482277600, 495579600, 516751200, 526424400, 545436000, 558478800, 576626400, 589323600, 609890400, 620773200, 638316000, 651618000, 669765600, 683672400, 701820000, 715726800, 733701600, 747176400, 765151200, 778021200, 796600800, 810075600, 828655200, 843170400, 860104800, 874620000, 891554400, 906069600, 924213600, 939934800, 956268000, 971989200, 987717600, 1003438800, 1019167200, 1034888400, 1050616800, 1066338000, 1082066400, 1096581600, 1113516000, 1128380400, 1143842400, 1158872400, 1175378400, 1189638000, 1206655200, 1219957200, 1238104800, 1252015200, 1269640860, 1281474000, 1301608860, 1312146000, 1333058400, 1348178400, 1364508000, 1380229200, 1395957600, 1414098000, 1427493600, 1445547600, 1458946800, 1477692000 }
typeOffsets:intvector { 8272, 0, 7200, 0, 7200, 3600 }
typeMap:bin { "0102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201" }
typeMap:bin { "010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201" }
finalRule { "Palestine" }
finalRaw:int { 7200 }
finalYear:int { 2017 }
@ -1384,9 +1384,9 @@ zoneinfo64:table(nofallback) {
/* Asia/Harbin */ :int { 314 } //Z#269
/* Asia/Hebron */ :table {
transPre32:intvector { -1, 2109557273 }
trans:intvector { -933645600, -857358000, -844300800, -825822000, -812685600, -794199600, -779853600, -762656400, -748310400, -731127600, -399088800, -386650800, -368330400, -355114800, -336790800, -323654400, -305168400, -292032000, -273632400, -260496000, -242096400, -228960000, -210560400, -197424000, -178938000, -165801600, -147402000, -134265600, -115866000, -102643200, -84330000, -81313200, 142380000, 150843600, 167176800, 178664400, 482277600, 495579600, 516751200, 526424400, 545436000, 558478800, 576626400, 589323600, 609890400, 620773200, 638316000, 651618000, 669765600, 683672400, 701820000, 715726800, 733701600, 747176400, 765151200, 778021200, 796600800, 810075600, 828655200, 843170400, 860104800, 874620000, 891554400, 906069600, 924213600, 939934800, 956268000, 971989200, 987717600, 1003438800, 1019167200, 1034888400, 1050616800, 1066338000, 1082066400, 1096581600, 1113516000, 1128380400, 1143842400, 1158872400, 1175378400, 1189638000, 1206655200, 1220216400, 1238104800, 1252015200, 1269554400, 1281474000, 1301608860, 1312146000, 1314655200, 1317330000, 1333058400, 1348178400, 1364508000, 1380229200, 1395957600, 1414098000, 1427493600, 1445547600, 1458946800, 1477692000 }
trans:intvector { -933645600, -857358000, -844300800, -825822000, -812685600, -794199600, -779853600, -762656400, -748310400, -731127600, -399088800, -386650800, -368330400, -355114800, -336790800, -323654400, -305168400, -292032000, -273632400, -260496000, -242096400, -228960000, -210560400, -197424000, -178938000, -165801600, -147402000, -134265600, -115866000, -102643200, -84330000, -81313200, 142380000, 150843600, 167176800, 178664400, 334015200, 337644000, 452556000, 462232800, 482277600, 495579600, 516751200, 526424400, 545436000, 558478800, 576626400, 589323600, 609890400, 620773200, 638316000, 651618000, 669765600, 683672400, 701820000, 715726800, 733701600, 747176400, 765151200, 778021200, 796600800, 810075600, 828655200, 843170400, 860104800, 874620000, 891554400, 906069600, 924213600, 939934800, 956268000, 971989200, 987717600, 1003438800, 1019167200, 1034888400, 1050616800, 1066338000, 1082066400, 1096581600, 1113516000, 1128380400, 1143842400, 1158872400, 1175378400, 1189638000, 1206655200, 1220216400, 1238104800, 1252015200, 1269554400, 1281474000, 1301608860, 1312146000, 1314655200, 1317330000, 1333058400, 1348178400, 1364508000, 1380229200, 1395957600, 1414098000, 1427493600, 1445547600, 1458946800, 1477692000 }
typeOffsets:intvector { 8423, 0, 7200, 0, 7200, 3600 }
typeMap:bin { "01020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201" }
typeMap:bin { "0102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201" }
finalRule { "Palestine" }
finalRaw:int { 7200 }
finalYear:int { 2017 }
@ -1426,9 +1426,9 @@ zoneinfo64:table(nofallback) {
} //Z#277
/* Asia/Jerusalem */ :table {
transPre32:intvector { -1, 1454818042 }
trans:intvector { -1641003640, -933645600, -857358000, -844300800, -825822000, -812685600, -794199600, -779853600, -762656400, -748310400, -731127600, -681962400, -673243200, -667962000, -652327200, -636426000, -622087200, -608947200, -591847200, -572486400, -558576000, -542851200, -527731200, -514425600, -490845600, -482986800, -459475200, -451537200, -428551200, -418262400, -400032000, -387428400, 142380000, 150843600, 167176800, 178664400, 482277600, 495579600, 516751200, 526424400, 545436000, 558478800, 576626400, 589323600, 609890400, 620773200, 638316000, 651618000, 669765600, 683672400, 701820000, 715726800, 733701600, 747176400, 765151200, 778021200, 796600800, 810075600, 826840800, 842821200, 858895200, 874184400, 890344800, 905029200, 923011200, 936313200, 955670400, 970783200, 986770800, 1001282400, 1017356400, 1033941600, 1048806000, 1065132000, 1081292400, 1095804000, 1112313600, 1128812400, 1143763200, 1159657200, 1175212800, 1189897200, 1206662400, 1223161200, 1238112000, 1254006000, 1269561600, 1284246000, 1301616000, 1317510000, 1333065600, 1348354800, 1364515200, 1382828400 }
trans:intvector { -1641003640, -933645600, -857358000, -844300800, -825822000, -812685600, -794199600, -779853600, -762656400, -748310400, -731127600, -681962400, -673243200, -667962000, -652327200, -636426000, -622087200, -608947200, -591847200, -572486400, -558576000, -542851200, -527731200, -514425600, -490845600, -482986800, -459475200, -451537200, -428551200, -418262400, -400032000, -387428400, 142380000, 150843600, 167176800, 178664400, 334015200, 337644000, 452556000, 462232800, 482277600, 495579600, 516751200, 526424400, 545436000, 558478800, 576626400, 589323600, 609890400, 620773200, 638316000, 651618000, 669765600, 683672400, 701820000, 715726800, 733701600, 747176400, 765151200, 778021200, 796600800, 810075600, 826840800, 842821200, 858895200, 874184400, 890344800, 905029200, 923011200, 936313200, 955670400, 970783200, 986770800, 1001282400, 1017356400, 1033941600, 1048806000, 1065132000, 1081292400, 1095804000, 1112313600, 1128812400, 1143763200, 1159657200, 1175212800, 1189897200, 1206662400, 1223161200, 1238112000, 1254006000, 1269561600, 1284246000, 1301616000, 1317510000, 1333065600, 1348354800, 1364515200, 1382828400 }
typeOffsets:intvector { 8454, 0, 7200, 0, 7200, 3600, 7200, 7200, 8440, 0 }
typeMap:bin { "0401020102010201020102010302010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201" }
typeMap:bin { "040102010201020102010201030201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201" }
finalRule { "Zion" }
finalRaw:int { 7200 }
finalYear:int { 2014 }
@ -2031,13 +2031,10 @@ zoneinfo64:table(nofallback) {
} //Z#431
/* Etc/GMT0 */ :int { 403 } //Z#432
/* Etc/Greenwich */ :int { 403 } //Z#433
/* Etc/UCT */ :table {
typeOffsets:intvector { 0, 0 }
links:intvector { 434, 614 }
} //Z#434
/* Etc/UCT */ :int { 435 } //Z#434
/* Etc/UTC */ :table {
typeOffsets:intvector { 0, 0 }
links:intvector { 435, 436, 438, 628, 629, 633 }
links:intvector { 434, 435, 436, 438, 614, 628, 629, 633 }
} //Z#435
/* Etc/Universal */ :int { 435 } //Z#436
/* Etc/Unknown */ :table {
@ -2860,7 +2857,7 @@ zoneinfo64:table(nofallback) {
finalYear:int { 1977 }
} //Z#612
/* Turkey */ :int { 458 } //Z#613
/* UCT */ :int { 434 } //Z#614
/* UCT */ :int { 435 } //Z#614
/* US/Alaska */ :int { 60 } //Z#615
/* US/Aleutian */ :int { 59 } //Z#616
/* US/Arizona */ :int { 184 } //Z#617
@ -3141,7 +3138,7 @@ zoneinfo64:table(nofallback) {
8, -30, -1, 7200, 1, 3, 1, -1, 7200, 1, 3600
} //_#20
Palestine:intvector {
2, 22, -7, 3600, 0, 9, -31, -7, 3600, 0, 3600
2, 24, -7, 3600, 0, 9, -31, -7, 3600, 0, 3600
} //_#21
Para:intvector {
9, 1, -1, 0, 0, 2, 22, -1, 0, 0, 3600

View file

@ -16,6 +16,7 @@ include $(top_builddir)/icudefs.mk
OUTPUTFILE=icupkg.inc
MIDDLE_SO_TARGET=
PKGDATA_TRAILING_SPACE=" "
all : clean
@echo GENCCODE_ASSEMBLY_TYPE=$(GENCCODE_ASSEMBLY) >> $(OUTPUTFILE)
@ -36,7 +37,6 @@ all : clean
@echo RANLIB=$(RANLIB) >> $(OUTPUTFILE)
@echo INSTALL_CMD=$(INSTALL) >> $(OUTPUTFILE)
clean :
$(RMV) $(OUTPUTFILE)

View file

@ -14,9 +14,9 @@ top_builddir = ../..
## All the flags and other definitions are included here.
include $(top_builddir)/icudefs.mk
MIDDLE_SO_TARGET=
OUTPUTFILE=pkgdata.inc
MIDDLE_SO_TARGET=
PKGDATA_TRAILING_SPACE=" "
all : clean
@echo GENCCODE_ASSEMBLY_TYPE=$(GENCCODE_ASSEMBLY) >> $(OUTPUTFILE)

View file

@ -103,16 +103,17 @@ number_affixutils.o number_compact.o number_decimalquantity.o \
number_decimfmtprops.o number_fluent.o number_formatimpl.o number_grouping.o \
number_integerwidth.o number_longnames.o number_modifiers.o number_notation.o number_output.o \
number_padding.o number_patternmodifier.o number_patternstring.o \
number_rounding.o number_scientific.o number_stringbuilder.o number_utils.o number_asformat.o \
number_rounding.o number_scientific.o number_utils.o number_asformat.o \
number_mapper.o number_multiplier.o number_currencysymbols.o number_skeletons.o number_capi.o \
double-conversion.o double-conversion-bignum-dtoa.o double-conversion-bignum.o \
double-conversion-cached-powers.o double-conversion-diy-fp.o \
double-conversion-fast-dtoa.o double-conversion-strtod.o \
numparse_stringsegment.o numparse_parsednumber.o numparse_impl.o \
string_segment.o numparse_parsednumber.o numparse_impl.o \
numparse_symbols.o numparse_decimal.o numparse_scientific.o numparse_currency.o \
numparse_affixes.o numparse_compositions.o numparse_validators.o \
numrange_fluent.o numrange_impl.o \
erarules.o formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o
erarules.o \
formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o formatted_string_builder.o
## Header files to install
HEADERS = $(srcdir)/unicode/*.h

View file

@ -5,14 +5,9 @@
#if !UCONFIG_NO_FORMATTING
#include "number_stringbuilder.h"
#include "static_unicode_sets.h"
#include "formatted_string_builder.h"
#include "unicode/ustring.h"
#include "unicode/utf16.h"
#include "number_utils.h"
using namespace icu;
using namespace icu::number;
using namespace icu::number::impl;
namespace {
@ -34,7 +29,10 @@ inline void uprv_memmove2(void* dest, const void* src, size_t len) {
} // namespace
NumberStringBuilder::NumberStringBuilder() {
U_NAMESPACE_BEGIN
FormattedStringBuilder::FormattedStringBuilder() {
#if U_DEBUG
// Initializing the memory to non-zero helps catch some bugs that involve
// reading from an improperly terminated string.
@ -44,18 +42,18 @@ NumberStringBuilder::NumberStringBuilder() {
#endif
}
NumberStringBuilder::~NumberStringBuilder() {
FormattedStringBuilder::~FormattedStringBuilder() {
if (fUsingHeap) {
uprv_free(fChars.heap.ptr);
uprv_free(fFields.heap.ptr);
}
}
NumberStringBuilder::NumberStringBuilder(const NumberStringBuilder &other) {
FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) {
*this = other;
}
NumberStringBuilder &NumberStringBuilder::operator=(const NumberStringBuilder &other) {
FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) {
// Check for self-assignment
if (this == &other) {
return *this;
@ -78,7 +76,7 @@ NumberStringBuilder &NumberStringBuilder::operator=(const NumberStringBuilder &o
// UErrorCode is not available; fail silently.
uprv_free(newChars);
uprv_free(newFields);
*this = NumberStringBuilder(); // can't fail
*this = FormattedStringBuilder(); // can't fail
return *this;
}
@ -97,15 +95,15 @@ NumberStringBuilder &NumberStringBuilder::operator=(const NumberStringBuilder &o
return *this;
}
int32_t NumberStringBuilder::length() const {
int32_t FormattedStringBuilder::length() const {
return fLength;
}
int32_t NumberStringBuilder::codePointCount() const {
int32_t FormattedStringBuilder::codePointCount() const {
return u_countChar32(getCharPtr() + fZero, fLength);
}
UChar32 NumberStringBuilder::getFirstCodePoint() const {
UChar32 FormattedStringBuilder::getFirstCodePoint() const {
if (fLength == 0) {
return -1;
}
@ -114,7 +112,7 @@ UChar32 NumberStringBuilder::getFirstCodePoint() const {
return cp;
}
UChar32 NumberStringBuilder::getLastCodePoint() const {
UChar32 FormattedStringBuilder::getLastCodePoint() const {
if (fLength == 0) {
return -1;
}
@ -125,13 +123,13 @@ UChar32 NumberStringBuilder::getLastCodePoint() const {
return cp;
}
UChar32 NumberStringBuilder::codePointAt(int32_t index) const {
UChar32 FormattedStringBuilder::codePointAt(int32_t index) const {
UChar32 cp;
U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
return cp;
}
UChar32 NumberStringBuilder::codePointBefore(int32_t index) const {
UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const {
int32_t offset = index;
U16_BACK_1(getCharPtr() + fZero, 0, offset);
UChar32 cp;
@ -139,19 +137,19 @@ UChar32 NumberStringBuilder::codePointBefore(int32_t index) const {
return cp;
}
NumberStringBuilder &NumberStringBuilder::clear() {
FormattedStringBuilder &FormattedStringBuilder::clear() {
// TODO: Reset the heap here?
fZero = getCapacity() / 2;
fLength = 0;
return *this;
}
int32_t NumberStringBuilder::appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) {
int32_t FormattedStringBuilder::appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) {
return insertCodePoint(fLength, codePoint, field, status);
}
int32_t
NumberStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
int32_t count = U16_LENGTH(codePoint);
int32_t position = prepareForInsert(index, count, status);
if (U_FAILURE(status)) {
@ -168,11 +166,11 @@ NumberStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field fie
return count;
}
int32_t NumberStringBuilder::append(const UnicodeString &unistr, Field field, UErrorCode &status) {
int32_t FormattedStringBuilder::append(const UnicodeString &unistr, Field field, UErrorCode &status) {
return insert(fLength, unistr, field, status);
}
int32_t NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
UErrorCode &status) {
if (unistr.length() == 0) {
// Nothing to insert.
@ -186,7 +184,7 @@ int32_t NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr,
}
int32_t
NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
Field field, UErrorCode &status) {
int32_t count = end - start;
int32_t position = prepareForInsert(index, count, status);
@ -201,7 +199,7 @@ NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t
}
int32_t
NumberStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr,
FormattedStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr,
int32_t startOther, int32_t endOther, Field field, UErrorCode& status) {
int32_t thisLength = endThis - startThis;
int32_t otherLength = endOther - startOther;
@ -224,12 +222,12 @@ NumberStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeSt
return count;
}
int32_t NumberStringBuilder::append(const NumberStringBuilder &other, UErrorCode &status) {
int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) {
return insert(fLength, other, status);
}
int32_t
NumberStringBuilder::insert(int32_t index, const NumberStringBuilder &other, UErrorCode &status) {
FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) {
if (this == &other) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
@ -250,7 +248,7 @@ NumberStringBuilder::insert(int32_t index, const NumberStringBuilder &other, UEr
return count;
}
void NumberStringBuilder::writeTerminator(UErrorCode& status) {
void FormattedStringBuilder::writeTerminator(UErrorCode& status) {
int32_t position = prepareForInsert(fLength, 1, status);
if (U_FAILURE(status)) {
return;
@ -260,7 +258,7 @@ void NumberStringBuilder::writeTerminator(UErrorCode& status) {
fLength--;
}
int32_t NumberStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
U_ASSERT(index >= 0);
U_ASSERT(index <= fLength);
U_ASSERT(count >= 0);
@ -279,7 +277,7 @@ int32_t NumberStringBuilder::prepareForInsert(int32_t index, int32_t count, UErr
}
}
int32_t NumberStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
int32_t oldCapacity = getCapacity();
int32_t oldZero = fZero;
char16_t *oldChars = getCharPtr();
@ -342,7 +340,7 @@ int32_t NumberStringBuilder::prepareForInsertHelper(int32_t index, int32_t count
return fZero + index;
}
int32_t NumberStringBuilder::remove(int32_t index, int32_t count) {
int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) {
// TODO: Reset the heap here? (If the string after removal can fit on stack?)
int32_t position = index + fZero;
uprv_memmove2(getCharPtr() + position,
@ -355,18 +353,18 @@ int32_t NumberStringBuilder::remove(int32_t index, int32_t count) {
return position;
}
UnicodeString NumberStringBuilder::toUnicodeString() const {
UnicodeString FormattedStringBuilder::toUnicodeString() const {
return UnicodeString(getCharPtr() + fZero, fLength);
}
const UnicodeString NumberStringBuilder::toTempUnicodeString() const {
const UnicodeString FormattedStringBuilder::toTempUnicodeString() const {
// Readonly-alias constructor:
return UnicodeString(FALSE, getCharPtr() + fZero, fLength);
}
UnicodeString NumberStringBuilder::toDebugString() const {
UnicodeString FormattedStringBuilder::toDebugString() const {
UnicodeString sb;
sb.append(u"<NumberStringBuilder [", -1);
sb.append(u"<FormattedStringBuilder [", -1);
sb.append(toUnicodeString());
sb.append(u"] [", -1);
for (int i = 0; i < fLength; i++) {
@ -419,11 +417,11 @@ UnicodeString NumberStringBuilder::toDebugString() const {
return sb;
}
const char16_t *NumberStringBuilder::chars() const {
const char16_t *FormattedStringBuilder::chars() const {
return getCharPtr() + fZero;
}
bool NumberStringBuilder::contentEquals(const NumberStringBuilder &other) const {
bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const {
if (fLength != other.fLength) {
return false;
}
@ -435,136 +433,7 @@ bool NumberStringBuilder::contentEquals(const NumberStringBuilder &other) const
return true;
}
bool NumberStringBuilder::nextFieldPosition(FieldPosition& fp, UErrorCode& status) const {
int32_t rawField = fp.getField();
if (rawField == FieldPosition::DONT_CARE) {
return FALSE;
}
if (rawField < 0 || rawField >= UNUM_FIELD_COUNT) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
ConstrainedFieldPosition cfpos;
cfpos.constrainField(UFIELD_CATEGORY_NUMBER, rawField);
cfpos.setState(UFIELD_CATEGORY_NUMBER, rawField, fp.getBeginIndex(), fp.getEndIndex());
if (nextPosition(cfpos, 0, status)) {
fp.setBeginIndex(cfpos.getStart());
fp.setEndIndex(cfpos.getLimit());
return true;
}
// Special case: fraction should start after integer if fraction is not present
if (rawField == UNUM_FRACTION_FIELD && fp.getEndIndex() == 0) {
bool inside = false;
int32_t i = fZero;
for (; i < fZero + fLength; i++) {
if (isIntOrGroup(getFieldPtr()[i]) || getFieldPtr()[i] == UNUM_DECIMAL_SEPARATOR_FIELD) {
inside = true;
} else if (inside) {
break;
}
}
fp.setBeginIndex(i - fZero);
fp.setEndIndex(i - fZero);
}
return false;
}
void NumberStringBuilder::getAllFieldPositions(FieldPositionIteratorHandler& fpih,
UErrorCode& status) const {
ConstrainedFieldPosition cfpos;
while (nextPosition(cfpos, 0, status)) {
fpih.addAttribute(cfpos.getField(), cfpos.getStart(), cfpos.getLimit());
}
}
// Signal the end of the string using a field that doesn't exist and that is
// different from UNUM_FIELD_COUNT, which is used for "null number field".
static constexpr Field kEndField = 0xff;
bool NumberStringBuilder::nextPosition(ConstrainedFieldPosition& cfpos, Field numericField, UErrorCode& /*status*/) const {
auto numericCAF = NumFieldUtils::expand(numericField);
int32_t fieldStart = -1;
Field currField = UNUM_FIELD_COUNT;
for (int32_t i = fZero + cfpos.getLimit(); i <= fZero + fLength; i++) {
Field _field = (i < fZero + fLength) ? getFieldPtr()[i] : kEndField;
// Case 1: currently scanning a field.
if (currField != UNUM_FIELD_COUNT) {
if (currField != _field) {
int32_t end = i - fZero;
// Grouping separators can be whitespace; don't throw them out!
if (currField != UNUM_GROUPING_SEPARATOR_FIELD) {
end = trimBack(i - fZero);
}
if (end <= fieldStart) {
// Entire field position is ignorable; skip.
fieldStart = -1;
currField = UNUM_FIELD_COUNT;
i--; // look at this index again
continue;
}
int32_t start = fieldStart;
if (currField != UNUM_GROUPING_SEPARATOR_FIELD) {
start = trimFront(start);
}
auto caf = NumFieldUtils::expand(currField);
cfpos.setState(caf.category, caf.field, start, end);
return true;
}
continue;
}
// Special case: coalesce the INTEGER if we are pointing at the end of the INTEGER.
if (cfpos.matchesField(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)
&& i > fZero
// don't return the same field twice in a row:
&& i - fZero > cfpos.getLimit()
&& isIntOrGroup(getFieldPtr()[i - 1])
&& !isIntOrGroup(_field)) {
int j = i - 1;
for (; j >= fZero && isIntOrGroup(getFieldPtr()[j]); j--) {}
cfpos.setState(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD, j - fZero + 1, i - fZero);
return true;
}
// Special case: coalesce NUMERIC if we are pointing at the end of the NUMERIC.
if (numericField != 0
&& cfpos.matchesField(numericCAF.category, numericCAF.field)
&& i > fZero
// don't return the same field twice in a row:
&& (i - fZero > cfpos.getLimit()
|| cfpos.getCategory() != numericCAF.category
|| cfpos.getField() != numericCAF.field)
&& isNumericField(getFieldPtr()[i - 1])
&& !isNumericField(_field)) {
int j = i - 1;
for (; j >= fZero && isNumericField(getFieldPtr()[j]); j--) {}
cfpos.setState(numericCAF.category, numericCAF.field, j - fZero + 1, i - fZero);
return true;
}
// Special case: skip over INTEGER; will be coalesced later.
if (_field == UNUM_INTEGER_FIELD) {
_field = UNUM_FIELD_COUNT;
}
// Case 2: no field starting at this position.
if (_field == UNUM_FIELD_COUNT || _field == kEndField) {
continue;
}
// Case 3: check for field starting at this position
auto caf = NumFieldUtils::expand(_field);
if (cfpos.matchesField(caf.category, caf.field)) {
fieldStart = i - fZero;
currField = _field;
}
}
U_ASSERT(currField == UNUM_FIELD_COUNT);
return false;
}
bool NumberStringBuilder::containsField(Field field) const {
bool FormattedStringBuilder::containsField(Field field) const {
for (int32_t i = 0; i < fLength; i++) {
if (field == fieldAt(i)) {
return true;
@ -573,27 +442,6 @@ bool NumberStringBuilder::containsField(Field field) const {
return false;
}
bool NumberStringBuilder::isIntOrGroup(Field field) {
return field == UNUM_INTEGER_FIELD
|| field == UNUM_GROUPING_SEPARATOR_FIELD;
}
bool NumberStringBuilder::isNumericField(Field field) {
return NumFieldUtils::isNumericField(field);
}
int32_t NumberStringBuilder::trimBack(int32_t limit) const {
return unisets::get(unisets::DEFAULT_IGNORABLES)->spanBack(
getCharPtr() + fZero,
limit,
USET_SPAN_CONTAINED);
}
int32_t NumberStringBuilder::trimFront(int32_t start) const {
return start + unisets::get(unisets::DEFAULT_IGNORABLES)->span(
getCharPtr() + fZero + start,
fLength - start,
USET_SPAN_CONTAINED);
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -9,17 +9,29 @@
#include <cstdint>
#include "unicode/numfmt.h"
#include "unicode/ustring.h"
#include "unicode/unum.h" // for UNUM_FIELD_COUNT
#include "cstring.h"
#include "uassert.h"
#include "number_types.h"
#include "fphdlimp.h"
U_NAMESPACE_BEGIN namespace number {
namespace impl {
U_NAMESPACE_BEGIN
class U_I18N_API NumberStringBuilder : public UMemory {
class FormattedValueStringBuilderImpl;
/**
* A StringBuilder optimized for formatting. It implements the following key
* features beyond a UnicodeString:
*
* <ol>
* <li>Efficient prepend as well as append.
* <li>Keeps tracks of Fields in an efficient manner.
* </ol>
*
* See also FormattedValueStringBuilderImpl.
*
* @author sffc (Shane Carr)
*/
class U_I18N_API FormattedStringBuilder : public UMemory {
private:
static const int32_t DEFAULT_CAPACITY = 40;
@ -33,13 +45,19 @@ class U_I18N_API NumberStringBuilder : public UMemory {
};
public:
NumberStringBuilder();
FormattedStringBuilder();
~NumberStringBuilder();
~FormattedStringBuilder();
NumberStringBuilder(const NumberStringBuilder &other);
FormattedStringBuilder(const FormattedStringBuilder &other);
NumberStringBuilder &operator=(const NumberStringBuilder &other);
// Convention: bottom 4 bits for field, top 4 bits for field category.
// Field category 0 implies the number category so that the number field
// literals can be directly passed as a Field type.
// See the helper functions in "StringBuilderFieldUtils" below.
typedef uint8_t Field;
FormattedStringBuilder &operator=(const FormattedStringBuilder &other);
int32_t length() const;
@ -65,7 +83,7 @@ class U_I18N_API NumberStringBuilder : public UMemory {
UChar32 codePointBefore(int32_t index) const;
NumberStringBuilder &clear();
FormattedStringBuilder &clear();
int32_t appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status);
@ -81,19 +99,19 @@ class U_I18N_API NumberStringBuilder : public UMemory {
int32_t splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr,
int32_t startOther, int32_t endOther, Field field, UErrorCode& status);
int32_t append(const NumberStringBuilder &other, UErrorCode &status);
int32_t append(const FormattedStringBuilder &other, UErrorCode &status);
int32_t insert(int32_t index, const NumberStringBuilder &other, UErrorCode &status);
int32_t insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status);
void writeTerminator(UErrorCode& status);
/**
* Gets a "safe" UnicodeString that can be used even after the NumberStringBuilder is destructed.
* Gets a "safe" UnicodeString that can be used even after the FormattedStringBuilder is destructed.
* */
UnicodeString toUnicodeString() const;
/**
* Gets an "unsafe" UnicodeString that is valid only as long as the NumberStringBuilder is alive and
* Gets an "unsafe" UnicodeString that is valid only as long as the FormattedStringBuilder is alive and
* unchanged. Slightly faster than toUnicodeString().
*/
const UnicodeString toTempUnicodeString() const;
@ -102,13 +120,7 @@ class U_I18N_API NumberStringBuilder : public UMemory {
const char16_t *chars() const;
bool contentEquals(const NumberStringBuilder &other) const;
bool nextFieldPosition(FieldPosition& fp, UErrorCode& status) const;
void getAllFieldPositions(FieldPositionIteratorHandler& fpih, UErrorCode& status) const;
bool nextPosition(ConstrainedFieldPosition& cfpos, Field numericField, UErrorCode& status) const;
bool contentEquals(const FormattedStringBuilder &other) const;
bool containsField(Field field) const;
@ -145,17 +157,50 @@ class U_I18N_API NumberStringBuilder : public UMemory {
int32_t remove(int32_t index, int32_t count);
static bool isIntOrGroup(Field field);
static bool isNumericField(Field field);
int32_t trimBack(int32_t limit) const;
int32_t trimFront(int32_t start) const;
friend class FormattedValueStringBuilderImpl;
};
/**
* Helper functions for dealing with the Field typedef, which stores fields
* in a compressed format.
*/
class StringBuilderFieldUtils {
public:
struct CategoryFieldPair {
int32_t category;
int32_t field;
};
/** Compile-time function to construct a Field from a category and a field */
template <int32_t category, int32_t field>
static constexpr FormattedStringBuilder::Field compress() {
static_assert(category != 0, "cannot use Undefined category in FieldUtils");
static_assert(category <= 0xf, "only 4 bits for category");
static_assert(field <= 0xf, "only 4 bits for field");
return static_cast<int8_t>((category << 4) | field);
}
/** Runtime inline function to unpack the category and field from the Field */
static inline CategoryFieldPair expand(FormattedStringBuilder::Field field) {
if (field == UNUM_FIELD_COUNT) {
return {UFIELD_CATEGORY_UNDEFINED, 0};
}
CategoryFieldPair ret = {
(field >> 4),
(field & 0xf)
};
if (ret.category == 0) {
ret.category = UFIELD_CATEGORY_NUMBER;
}
return ret;
}
static inline bool isNumericField(FormattedStringBuilder::Field field) {
int8_t category = field >> 4;
return category == 0 || category == UFIELD_CATEGORY_NUMBER;
}
};
} // namespace impl
} // namespace number
U_NAMESPACE_END

View file

@ -18,7 +18,7 @@
#include "fphdlimp.h"
#include "util.h"
#include "uvectr32.h"
#include "number_stringbuilder.h"
#include "formatted_string_builder.h"
/**
@ -67,7 +67,9 @@ typedef enum UCFPosConstraintType {
U_NAMESPACE_BEGIN
/** Implementation using FieldPositionHandler to accept fields. */
/**
* Implementation of FormattedValue using FieldPositionHandler to accept fields.
*/
class FormattedValueFieldPositionIteratorImpl : public UMemory, public FormattedValue {
public:
@ -112,12 +114,21 @@ private:
};
class FormattedValueNumberStringBuilderImpl : public UMemory, public FormattedValue {
/**
* Implementation of FormattedValue based on FormattedStringBuilder.
*
* The implementation currently revolves around numbers and number fields.
* However, it can be generalized in the future when there is a need.
*
* @author sffc (Shane Carr)
*/
// Exported as U_I18N_API for tests
class U_I18N_API FormattedValueStringBuilderImpl : public UMemory, public FormattedValue {
public:
FormattedValueNumberStringBuilderImpl(number::impl::Field numericField);
FormattedValueStringBuilderImpl(FormattedStringBuilder::Field numericField);
virtual ~FormattedValueNumberStringBuilderImpl();
virtual ~FormattedValueStringBuilderImpl();
// Implementation of FormattedValue (const):
@ -126,17 +137,25 @@ public:
Appendable& appendTo(Appendable& appendable, UErrorCode& status) const U_OVERRIDE;
UBool nextPosition(ConstrainedFieldPosition& cfpos, UErrorCode& status) const U_OVERRIDE;
inline number::impl::NumberStringBuilder& getStringRef() {
// Additional helper functions:
UBool nextFieldPosition(FieldPosition& fp, UErrorCode& status) const;
void getAllFieldPositions(FieldPositionIteratorHandler& fpih, UErrorCode& status) const;
inline FormattedStringBuilder& getStringRef() {
return fString;
}
inline const number::impl::NumberStringBuilder& getStringRef() const {
inline const FormattedStringBuilder& getStringRef() const {
return fString;
}
private:
number::impl::NumberStringBuilder fString;
number::impl::Field fNumericField;
FormattedStringBuilder fString;
FormattedStringBuilder::Field fNumericField;
bool nextPositionImpl(ConstrainedFieldPosition& cfpos, FormattedStringBuilder::Field numericField, UErrorCode& status) const;
static bool isIntOrGroup(FormattedStringBuilder::Field field);
static bool isNumericField(FormattedStringBuilder::Field field);
int32_t trimBack(int32_t limit) const;
int32_t trimFront(int32_t start) const;
};

View file

@ -9,35 +9,203 @@
// Other independent implementations should go into their own cpp file for
// better dependency modularization.
#include "unicode/ustring.h"
#include "formattedval_impl.h"
#include "number_types.h"
#include "formatted_string_builder.h"
#include "number_utils.h"
#include "static_unicode_sets.h"
U_NAMESPACE_BEGIN
FormattedValueNumberStringBuilderImpl::FormattedValueNumberStringBuilderImpl(number::impl::Field numericField)
typedef FormattedStringBuilder::Field Field;
FormattedValueStringBuilderImpl::FormattedValueStringBuilderImpl(Field numericField)
: fNumericField(numericField) {
}
FormattedValueNumberStringBuilderImpl::~FormattedValueNumberStringBuilderImpl() {
FormattedValueStringBuilderImpl::~FormattedValueStringBuilderImpl() {
}
UnicodeString FormattedValueNumberStringBuilderImpl::toString(UErrorCode&) const {
UnicodeString FormattedValueStringBuilderImpl::toString(UErrorCode&) const {
return fString.toUnicodeString();
}
UnicodeString FormattedValueNumberStringBuilderImpl::toTempString(UErrorCode&) const {
UnicodeString FormattedValueStringBuilderImpl::toTempString(UErrorCode&) const {
return fString.toTempUnicodeString();
}
Appendable& FormattedValueNumberStringBuilderImpl::appendTo(Appendable& appendable, UErrorCode&) const {
Appendable& FormattedValueStringBuilderImpl::appendTo(Appendable& appendable, UErrorCode&) const {
appendable.appendString(fString.chars(), fString.length());
return appendable;
}
UBool FormattedValueNumberStringBuilderImpl::nextPosition(ConstrainedFieldPosition& cfpos, UErrorCode& status) const {
UBool FormattedValueStringBuilderImpl::nextPosition(ConstrainedFieldPosition& cfpos, UErrorCode& status) const {
// NOTE: MSVC sometimes complains when implicitly converting between bool and UBool
return fString.nextPosition(cfpos, fNumericField, status) ? TRUE : FALSE;
return nextPositionImpl(cfpos, fNumericField, status) ? TRUE : FALSE;
}
UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErrorCode& status) const {
int32_t rawField = fp.getField();
if (rawField == FieldPosition::DONT_CARE) {
return FALSE;
}
if (rawField < 0 || rawField >= UNUM_FIELD_COUNT) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return FALSE;
}
ConstrainedFieldPosition cfpos;
cfpos.constrainField(UFIELD_CATEGORY_NUMBER, rawField);
cfpos.setState(UFIELD_CATEGORY_NUMBER, rawField, fp.getBeginIndex(), fp.getEndIndex());
if (nextPositionImpl(cfpos, 0, status)) {
fp.setBeginIndex(cfpos.getStart());
fp.setEndIndex(cfpos.getLimit());
return TRUE;
}
// Special case: fraction should start after integer if fraction is not present
if (rawField == UNUM_FRACTION_FIELD && fp.getEndIndex() == 0) {
bool inside = false;
int32_t i = fString.fZero;
for (; i < fString.fZero + fString.fLength; i++) {
if (isIntOrGroup(fString.getFieldPtr()[i]) || fString.getFieldPtr()[i] == UNUM_DECIMAL_SEPARATOR_FIELD) {
inside = true;
} else if (inside) {
break;
}
}
fp.setBeginIndex(i - fString.fZero);
fp.setEndIndex(i - fString.fZero);
}
return FALSE;
}
void FormattedValueStringBuilderImpl::getAllFieldPositions(FieldPositionIteratorHandler& fpih,
UErrorCode& status) const {
ConstrainedFieldPosition cfpos;
while (nextPositionImpl(cfpos, 0, status)) {
fpih.addAttribute(cfpos.getField(), cfpos.getStart(), cfpos.getLimit());
}
}
// Signal the end of the string using a field that doesn't exist and that is
// different from UNUM_FIELD_COUNT, which is used for "null number field".
static constexpr Field kEndField = 0xff;
bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& cfpos, Field numericField, UErrorCode& /*status*/) const {
auto numericCAF = StringBuilderFieldUtils::expand(numericField);
int32_t fieldStart = -1;
Field currField = UNUM_FIELD_COUNT;
for (int32_t i = fString.fZero + cfpos.getLimit(); i <= fString.fZero + fString.fLength; i++) {
Field _field = (i < fString.fZero + fString.fLength) ? fString.getFieldPtr()[i] : kEndField;
// Case 1: currently scanning a field.
if (currField != UNUM_FIELD_COUNT) {
if (currField != _field) {
int32_t end = i - fString.fZero;
// Grouping separators can be whitespace; don't throw them out!
if (currField != UNUM_GROUPING_SEPARATOR_FIELD) {
end = trimBack(i - fString.fZero);
}
if (end <= fieldStart) {
// Entire field position is ignorable; skip.
fieldStart = -1;
currField = UNUM_FIELD_COUNT;
i--; // look at this index again
continue;
}
int32_t start = fieldStart;
if (currField != UNUM_GROUPING_SEPARATOR_FIELD) {
start = trimFront(start);
}
auto caf = StringBuilderFieldUtils::expand(currField);
cfpos.setState(caf.category, caf.field, start, end);
return true;
}
continue;
}
// Special case: coalesce the INTEGER if we are pointing at the end of the INTEGER.
if (cfpos.matchesField(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)
&& i > fString.fZero
// don't return the same field twice in a row:
&& i - fString.fZero > cfpos.getLimit()
&& isIntOrGroup(fString.getFieldPtr()[i - 1])
&& !isIntOrGroup(_field)) {
int j = i - 1;
for (; j >= fString.fZero && isIntOrGroup(fString.getFieldPtr()[j]); j--) {}
cfpos.setState(
UFIELD_CATEGORY_NUMBER,
UNUM_INTEGER_FIELD,
j - fString.fZero + 1,
i - fString.fZero);
return true;
}
// Special case: coalesce NUMERIC if we are pointing at the end of the NUMERIC.
if (numericField != 0
&& cfpos.matchesField(numericCAF.category, numericCAF.field)
&& i > fString.fZero
// don't return the same field twice in a row:
&& (i - fString.fZero > cfpos.getLimit()
|| cfpos.getCategory() != numericCAF.category
|| cfpos.getField() != numericCAF.field)
&& isNumericField(fString.getFieldPtr()[i - 1])
&& !isNumericField(_field)) {
int j = i - 1;
for (; j >= fString.fZero && isNumericField(fString.getFieldPtr()[j]); j--) {}
cfpos.setState(
numericCAF.category,
numericCAF.field,
j - fString.fZero + 1,
i - fString.fZero);
return true;
}
// Special case: skip over INTEGER; will be coalesced later.
if (_field == UNUM_INTEGER_FIELD) {
_field = UNUM_FIELD_COUNT;
}
// Case 2: no field starting at this position.
if (_field == UNUM_FIELD_COUNT || _field == kEndField) {
continue;
}
// Case 3: check for field starting at this position
auto caf = StringBuilderFieldUtils::expand(_field);
if (cfpos.matchesField(caf.category, caf.field)) {
fieldStart = i - fString.fZero;
currField = _field;
}
}
U_ASSERT(currField == UNUM_FIELD_COUNT);
return false;
}
bool FormattedValueStringBuilderImpl::isIntOrGroup(Field field) {
return field == UNUM_INTEGER_FIELD
|| field == UNUM_GROUPING_SEPARATOR_FIELD;
}
bool FormattedValueStringBuilderImpl::isNumericField(Field field) {
return StringBuilderFieldUtils::isNumericField(field);
}
int32_t FormattedValueStringBuilderImpl::trimBack(int32_t limit) const {
return unisets::get(unisets::DEFAULT_IGNORABLES)->spanBack(
fString.getCharPtr() + fString.fZero,
limit,
USET_SPAN_CONTAINED);
}
int32_t FormattedValueStringBuilderImpl::trimFront(int32_t start) const {
return start + unisets::get(unisets::DEFAULT_IGNORABLES)->span(
fString.getCharPtr() + fString.fZero + start,
fString.fLength - start,
USET_SPAN_CONTAINED);
}

View file

@ -278,14 +278,14 @@
<ClCompile Include="number_patternstring.cpp" />
<ClCompile Include="number_rounding.cpp" />
<ClCompile Include="number_scientific.cpp" />
<ClCompile Include="number_stringbuilder.cpp" />
<ClCompile Include="formatted_string_builder.cpp" />
<ClCompile Include="number_utils.cpp" />
<ClCompile Include="number_mapper.cpp" />
<ClCompile Include="number_multiplier.cpp" />
<ClCompile Include="number_currencysymbols.cpp" />
<ClCompile Include="number_skeletons.cpp" />
<ClCompile Include="number_capi.cpp" />
<ClCompile Include="numparse_stringsegment.cpp" />
<ClCompile Include="string_segment.cpp" />
<ClCompile Include="numparse_parsednumber.cpp" />
<ClCompile Include="numparse_impl.cpp" />
<ClCompile Include="numparse_symbols.cpp" />
@ -541,7 +541,7 @@
<ClInclude Include="number_patternstring.h" />
<ClInclude Include="number_roundingutils.h" />
<ClInclude Include="number_scientific.h" />
<ClInclude Include="number_stringbuilder.h" />
<ClInclude Include="formatted_string_builder.h" />
<ClInclude Include="number_types.h" />
<ClInclude Include="number_utypes.h" />
<ClInclude Include="number_utils.h" />
@ -549,7 +549,7 @@
<ClInclude Include="number_multiplier.h" />
<ClInclude Include="number_currencysymbols.h" />
<ClInclude Include="number_skeletons.h" />
<ClInclude Include="numparse_stringsegment.h" />
<ClInclude Include="string_segment.h" />
<ClInclude Include="numparse_impl.h" />
<ClInclude Include="numparse_symbols.h" />
<ClInclude Include="numparse_decimal.h" />

View file

@ -585,7 +585,7 @@
<ClCompile Include="number_scientific.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="number_stringbuilder.cpp">
<ClCompile Include="formatted_string_builder.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="number_utils.cpp">
@ -606,7 +606,7 @@
<ClCompile Include="number_capi.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="numparse_stringsegment.cpp">
<ClCompile Include="string_segment.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="numparse_parsednumber.cpp">
@ -878,7 +878,7 @@
<ClInclude Include="number_scientific.h">
<Filter>formatting</Filter>
</ClInclude>
<ClInclude Include="number_stringbuilder.h">
<ClInclude Include="formatted_string_builder.h">
<Filter>formatting</Filter>
</ClInclude>
<ClInclude Include="number_types.h">
@ -902,7 +902,7 @@
<ClInclude Include="number_skeletons.h">
<Filter>formatting</Filter>
</ClInclude>
<ClInclude Include="numparse_stringsegment.h">
<ClInclude Include="string_segment.h">
<Filter>formatting</Filter>
</ClInclude>
<ClInclude Include="numparse_impl.h">

View file

@ -385,14 +385,14 @@
<ClCompile Include="number_patternstring.cpp" />
<ClCompile Include="number_rounding.cpp" />
<ClCompile Include="number_scientific.cpp" />
<ClCompile Include="number_stringbuilder.cpp" />
<ClCompile Include="formatted_string_builder.cpp" />
<ClCompile Include="number_utils.cpp" />
<ClCompile Include="number_mapper.cpp" />
<ClCompile Include="number_multiplier.cpp" />
<ClCompile Include="number_currencysymbols.cpp" />
<ClCompile Include="number_skeletons.cpp" />
<ClCompile Include="number_capi.cpp" />
<ClCompile Include="numparse_stringsegment.cpp" />
<ClCompile Include="string_segment.cpp" />
<ClCompile Include="numparse_parsednumber.cpp" />
<ClCompile Include="numparse_impl.cpp" />
<ClCompile Include="numparse_symbols.cpp" />
@ -646,7 +646,7 @@
<ClInclude Include="number_patternstring.h" />
<ClInclude Include="number_roundingutils.h" />
<ClInclude Include="number_scientific.h" />
<ClInclude Include="number_stringbuilder.h" />
<ClInclude Include="formatted_string_builder.h" />
<ClInclude Include="number_types.h" />
<ClInclude Include="number_utypes.h" />
<ClInclude Include="number_utils.h" />
@ -654,7 +654,7 @@
<ClInclude Include="number_multiplier.h" />
<ClInclude Include="number_currencysymbols.h" />
<ClInclude Include="number_skeletons.h" />
<ClInclude Include="numparse_stringsegment.h" />
<ClInclude Include="string_segment.h" />
<ClInclude Include="numparse_impl.h" />
<ClInclude Include="numparse_symbols.h" />
<ClInclude Include="numparse_decimal.h" />

View file

@ -156,7 +156,7 @@ Field AffixUtils::getFieldForType(AffixPatternType type) {
}
int32_t
AffixUtils::unescape(const UnicodeString &affixPattern, NumberStringBuilder &output, int32_t position,
AffixUtils::unescape(const UnicodeString &affixPattern, FormattedStringBuilder &output, int32_t position,
const SymbolProvider &provider, Field field, UErrorCode &status) {
int32_t length = 0;
AffixTag tag;

View file

@ -11,7 +11,7 @@
#include "number_types.h"
#include "unicode/stringpiece.h"
#include "unicode/unistr.h"
#include "number_stringbuilder.h"
#include "formatted_string_builder.h"
#include "unicode/uniset.h"
U_NAMESPACE_BEGIN namespace number {
@ -134,16 +134,16 @@ class U_I18N_API AffixUtils {
/**
* Executes the unescape state machine. Replaces the unquoted characters "-", "+", "%", "", and
* "¤" with the corresponding symbols provided by the {@link SymbolProvider}, and inserts the
* result into the NumberStringBuilder at the requested location.
* result into the FormattedStringBuilder at the requested location.
*
* <p>Example input: "'-'¤x"; example output: "-$x"
*
* @param affixPattern The original string to be unescaped.
* @param output The NumberStringBuilder to mutate with the result.
* @param position The index into the NumberStringBuilder to insert the string.
* @param output The FormattedStringBuilder to mutate with the result.
* @param position The index into the FormattedStringBuilder to insert the string.
* @param provider An object to generate locale symbols.
*/
static int32_t unescape(const UnicodeString& affixPattern, NumberStringBuilder& output,
static int32_t unescape(const UnicodeString& affixPattern, FormattedStringBuilder& output,
int32_t position, const SymbolProvider& provider, Field field,
UErrorCode& status);

View file

@ -62,12 +62,12 @@ UnicodeString& LocalizedNumberFormatterAsFormat::format(const Formattable& obj,
// always return first occurrence:
pos.setBeginIndex(0);
pos.setEndIndex(0);
bool found = data.getStringRef().nextFieldPosition(pos, status);
bool found = data.nextFieldPosition(pos, status);
if (found && appendTo.length() != 0) {
pos.setBeginIndex(pos.getBeginIndex() + appendTo.length());
pos.setEndIndex(pos.getEndIndex() + appendTo.length());
}
appendTo.append(data.getStringRef().toTempUnicodeString());
appendTo.append(data.toTempString(status));
return appendTo;
}
@ -84,10 +84,10 @@ UnicodeString& LocalizedNumberFormatterAsFormat::format(const Formattable& obj,
if (U_FAILURE(status)) {
return appendTo;
}
appendTo.append(data.getStringRef().toTempUnicodeString());
appendTo.append(data.toTempString(status));
if (posIter != nullptr) {
FieldPositionIteratorHandler fpih(posIter, status);
data.getStringRef().getAllFieldPositions(fpih, status);
data.getAllFieldPositions(fpih, status);
}
return appendTo;
}

View file

@ -696,7 +696,7 @@ void LocalizedNumberFormatter::formatImpl(impl::UFormattedNumberData* results, U
void LocalizedNumberFormatter::getAffixImpl(bool isPrefix, bool isNegative, UnicodeString& result,
UErrorCode& status) const {
NumberStringBuilder string;
FormattedStringBuilder string;
auto signum = static_cast<int8_t>(isNegative ? -1 : 1);
// Always return affixes for plural form OTHER.
static const StandardPlural::Form plural = StandardPlural::OTHER;

View file

@ -72,7 +72,7 @@ NumberFormatterImpl::NumberFormatterImpl(const MacroProps& macros, UErrorCode& s
}
int32_t NumberFormatterImpl::formatStatic(const MacroProps& macros, DecimalQuantity& inValue,
NumberStringBuilder& outString, UErrorCode& status) {
FormattedStringBuilder& outString, UErrorCode& status) {
NumberFormatterImpl impl(macros, false, status);
MicroProps& micros = impl.preProcessUnsafe(inValue, status);
if (U_FAILURE(status)) { return 0; }
@ -83,7 +83,7 @@ int32_t NumberFormatterImpl::formatStatic(const MacroProps& macros, DecimalQuant
int32_t NumberFormatterImpl::getPrefixSuffixStatic(const MacroProps& macros, int8_t signum,
StandardPlural::Form plural,
NumberStringBuilder& outString, UErrorCode& status) {
FormattedStringBuilder& outString, UErrorCode& status) {
NumberFormatterImpl impl(macros, false, status);
return impl.getPrefixSuffixUnsafe(signum, plural, outString, status);
}
@ -93,7 +93,7 @@ int32_t NumberFormatterImpl::getPrefixSuffixStatic(const MacroProps& macros, int
// The "unsafe" method simply re-uses fMicros, eliminating the extra copy operation.
// See MicroProps::processQuantity() for details.
int32_t NumberFormatterImpl::format(DecimalQuantity& inValue, NumberStringBuilder& outString,
int32_t NumberFormatterImpl::format(DecimalQuantity& inValue, FormattedStringBuilder& outString,
UErrorCode& status) const {
MicroProps micros;
preProcess(inValue, micros, status);
@ -130,7 +130,7 @@ MicroProps& NumberFormatterImpl::preProcessUnsafe(DecimalQuantity& inValue, UErr
}
int32_t NumberFormatterImpl::getPrefixSuffix(int8_t signum, StandardPlural::Form plural,
NumberStringBuilder& outString, UErrorCode& status) const {
FormattedStringBuilder& outString, UErrorCode& status) const {
if (U_FAILURE(status)) { return 0; }
// #13453: DecimalFormat wants the affixes from the pattern only (modMiddle, aka pattern modifier).
// Safe path: use fImmutablePatternModifier.
@ -141,7 +141,7 @@ int32_t NumberFormatterImpl::getPrefixSuffix(int8_t signum, StandardPlural::Form
}
int32_t NumberFormatterImpl::getPrefixSuffixUnsafe(int8_t signum, StandardPlural::Form plural,
NumberStringBuilder& outString, UErrorCode& status) {
FormattedStringBuilder& outString, UErrorCode& status) {
if (U_FAILURE(status)) { return 0; }
// #13453: DecimalFormat wants the affixes from the pattern only (modMiddle, aka pattern modifier).
// Unsafe path: use fPatternModifier.
@ -430,7 +430,7 @@ NumberFormatterImpl::resolvePluralRules(const PluralRules* rulesPtr, const Local
return fRules.getAlias();
}
int32_t NumberFormatterImpl::writeAffixes(const MicroProps& micros, NumberStringBuilder& string,
int32_t NumberFormatterImpl::writeAffixes(const MicroProps& micros, FormattedStringBuilder& string,
int32_t start, int32_t end, UErrorCode& status) {
// Always apply the inner modifier (which is "strong").
int32_t length = micros.modInner->apply(string, start, end, status);
@ -445,7 +445,7 @@ int32_t NumberFormatterImpl::writeAffixes(const MicroProps& micros, NumberString
}
int32_t NumberFormatterImpl::writeNumber(const MicroProps& micros, DecimalQuantity& quantity,
NumberStringBuilder& string, int32_t index,
FormattedStringBuilder& string, int32_t index,
UErrorCode& status) {
int32_t length = 0;
if (quantity.isInfinite()) {
@ -487,7 +487,7 @@ int32_t NumberFormatterImpl::writeNumber(const MicroProps& micros, DecimalQuanti
}
int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps& micros, DecimalQuantity& quantity,
NumberStringBuilder& string, int32_t index,
FormattedStringBuilder& string, int32_t index,
UErrorCode& status) {
int length = 0;
int integerCount = quantity.getUpperDisplayMagnitude() + 1;
@ -513,7 +513,7 @@ int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps& micros, Decima
}
int32_t NumberFormatterImpl::writeFractionDigits(const MicroProps& micros, DecimalQuantity& quantity,
NumberStringBuilder& string, int32_t index,
FormattedStringBuilder& string, int32_t index,
UErrorCode& status) {
int length = 0;
int fractionCount = -quantity.getLowerDisplayMagnitude();

View file

@ -8,7 +8,7 @@
#define __NUMBER_FORMATIMPL_H__
#include "number_types.h"
#include "number_stringbuilder.h"
#include "formatted_string_builder.h"
#include "number_patternstring.h"
#include "number_utils.h"
#include "number_patternmodifier.h"
@ -35,7 +35,7 @@ class NumberFormatterImpl : public UMemory {
* Builds and evaluates an "unsafe" MicroPropsGenerator, which is cheaper but can be used only once.
*/
static int32_t
formatStatic(const MacroProps &macros, DecimalQuantity &inValue, NumberStringBuilder &outString,
formatStatic(const MacroProps &macros, DecimalQuantity &inValue, FormattedStringBuilder &outString,
UErrorCode &status);
/**
@ -45,13 +45,13 @@ class NumberFormatterImpl : public UMemory {
* the prefix length.
*/
static int32_t getPrefixSuffixStatic(const MacroProps& macros, int8_t signum,
StandardPlural::Form plural, NumberStringBuilder& outString,
StandardPlural::Form plural, FormattedStringBuilder& outString,
UErrorCode& status);
/**
* Evaluates the "safe" MicroPropsGenerator created by "fromMacros".
*/
int32_t format(DecimalQuantity& inValue, NumberStringBuilder& outString, UErrorCode& status) const;
int32_t format(DecimalQuantity& inValue, FormattedStringBuilder& outString, UErrorCode& status) const;
/**
* Like format(), but saves the result into an output MicroProps without additional processing.
@ -61,7 +61,7 @@ class NumberFormatterImpl : public UMemory {
/**
* Like getPrefixSuffixStatic() but uses the safe compiled object.
*/
int32_t getPrefixSuffix(int8_t signum, StandardPlural::Form plural, NumberStringBuilder& outString,
int32_t getPrefixSuffix(int8_t signum, StandardPlural::Form plural, FormattedStringBuilder& outString,
UErrorCode& status) const;
const MicroProps& getRawMicroProps() const {
@ -73,12 +73,12 @@ class NumberFormatterImpl : public UMemory {
* This method formats only the main number, not affixes.
*/
static int32_t writeNumber(const MicroProps& micros, DecimalQuantity& quantity,
NumberStringBuilder& string, int32_t index, UErrorCode& status);
FormattedStringBuilder& string, int32_t index, UErrorCode& status);
/**
* Adds the affixes. Intended to be called immediately after formatNumber.
*/
static int32_t writeAffixes(const MicroProps& micros, NumberStringBuilder& string, int32_t start,
static int32_t writeAffixes(const MicroProps& micros, FormattedStringBuilder& string, int32_t start,
int32_t end, UErrorCode& status);
private:
@ -110,7 +110,7 @@ class NumberFormatterImpl : public UMemory {
MicroProps& preProcessUnsafe(DecimalQuantity &inValue, UErrorCode &status);
int32_t getPrefixSuffixUnsafe(int8_t signum, StandardPlural::Form plural,
NumberStringBuilder& outString, UErrorCode& status);
FormattedStringBuilder& outString, UErrorCode& status);
/**
* If rulesPtr is non-null, return it. Otherwise, return a PluralRules owned by this object for the
@ -136,11 +136,11 @@ class NumberFormatterImpl : public UMemory {
macrosToMicroGenerator(const MacroProps &macros, bool safe, UErrorCode &status);
static int32_t
writeIntegerDigits(const MicroProps &micros, DecimalQuantity &quantity, NumberStringBuilder &string,
writeIntegerDigits(const MicroProps &micros, DecimalQuantity &quantity, FormattedStringBuilder &string,
int32_t index, UErrorCode &status);
static int32_t
writeFractionDigits(const MicroProps &micros, DecimalQuantity &quantity, NumberStringBuilder &string,
writeFractionDigits(const MicroProps &micros, DecimalQuantity &quantity, FormattedStringBuilder &string,
int32_t index, UErrorCode &status);
};

View file

@ -69,7 +69,7 @@ AdoptingModifierStore::~AdoptingModifierStore() {
}
int32_t ConstantAffixModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
int32_t ConstantAffixModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
UErrorCode &status) const {
// Insert the suffix first since inserting the prefix will change the rightIndex
int length = output.insert(rightIndex, fSuffix, fField, status);
@ -154,7 +154,7 @@ SimpleModifier::SimpleModifier()
: fField(UNUM_FIELD_COUNT), fStrong(false), fPrefixLength(0), fSuffixLength(0) {
}
int32_t SimpleModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
int32_t SimpleModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
UErrorCode &status) const {
return formatAsPrefixSuffix(output, leftIndex, rightIndex, status);
}
@ -203,7 +203,7 @@ bool SimpleModifier::semanticallyEquivalent(const Modifier& other) const {
int32_t
SimpleModifier::formatAsPrefixSuffix(NumberStringBuilder &result, int32_t startIndex, int32_t endIndex,
SimpleModifier::formatAsPrefixSuffix(FormattedStringBuilder &result, int32_t startIndex, int32_t endIndex,
UErrorCode &status) const {
if (fSuffixOffset == -1 && fPrefixLength + fSuffixLength > 0) {
// There is no argument for the inner number; overwrite the entire segment with our string.
@ -227,7 +227,7 @@ SimpleModifier::formatAsPrefixSuffix(NumberStringBuilder &result, int32_t startI
int32_t
SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, NumberStringBuilder& result,
SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, FormattedStringBuilder& result,
int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength,
Field field, UErrorCode& status) {
const UnicodeString& compiledPattern = compiled.compiledPattern;
@ -284,7 +284,7 @@ SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, NumberStrin
}
int32_t ConstantMultiFieldModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
int32_t ConstantMultiFieldModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
UErrorCode &status) const {
int32_t length = output.insert(leftIndex, fPrefix, status);
if (fOverwrite) {
@ -333,8 +333,8 @@ bool ConstantMultiFieldModifier::semanticallyEquivalent(const Modifier& other) c
}
CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const NumberStringBuilder &prefix,
const NumberStringBuilder &suffix,
CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const FormattedStringBuilder &prefix,
const FormattedStringBuilder &suffix,
bool overwrite,
bool strong,
const DecimalFormatSymbols &symbols,
@ -374,7 +374,7 @@ CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const NumberStrin
}
}
int32_t CurrencySpacingEnabledModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
int32_t CurrencySpacingEnabledModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
UErrorCode &status) const {
// Currency spacing logic
int length = 0;
@ -395,7 +395,7 @@ int32_t CurrencySpacingEnabledModifier::apply(NumberStringBuilder &output, int l
}
int32_t
CurrencySpacingEnabledModifier::applyCurrencySpacing(NumberStringBuilder &output, int32_t prefixStart,
CurrencySpacingEnabledModifier::applyCurrencySpacing(FormattedStringBuilder &output, int32_t prefixStart,
int32_t prefixLen, int32_t suffixStart,
int32_t suffixLen,
const DecimalFormatSymbols &symbols,
@ -414,7 +414,7 @@ CurrencySpacingEnabledModifier::applyCurrencySpacing(NumberStringBuilder &output
}
int32_t
CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(NumberStringBuilder &output, int32_t index,
CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(FormattedStringBuilder &output, int32_t index,
EAffix affix,
const DecimalFormatSymbols &symbols,
UErrorCode &status) {

View file

@ -12,7 +12,7 @@
#include "unicode/uniset.h"
#include "unicode/simpleformatter.h"
#include "standardplural.h"
#include "number_stringbuilder.h"
#include "formatted_string_builder.h"
#include "number_types.h"
U_NAMESPACE_BEGIN namespace number {
@ -28,7 +28,7 @@ class U_I18N_API ConstantAffixModifier : public Modifier, public UObject {
bool strong)
: fPrefix(prefix), fSuffix(suffix), fField(field), fStrong(strong) {}
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
int32_t apply(FormattedStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const U_OVERRIDE;
int32_t getPrefixLength() const U_OVERRIDE;
@ -64,7 +64,7 @@ class U_I18N_API SimpleModifier : public Modifier, public UMemory {
// Default constructor for LongNameHandler.h
SimpleModifier();
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
int32_t apply(FormattedStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const U_OVERRIDE;
int32_t getPrefixLength() const U_OVERRIDE;
@ -81,7 +81,7 @@ class U_I18N_API SimpleModifier : public Modifier, public UMemory {
/**
* TODO: This belongs in SimpleFormatterImpl. The only reason I haven't moved it there yet is because
* NumberStringBuilder is an internal class and SimpleFormatterImpl feels like it should not depend on it.
* FormattedStringBuilder is an internal class and SimpleFormatterImpl feels like it should not depend on it.
*
* <p>
* Formats a value that is already stored inside the StringBuilder <code>result</code> between the indices
@ -100,22 +100,22 @@ class U_I18N_API SimpleModifier : public Modifier, public UMemory {
* @return The number of characters (UTF-16 code points) that were added to the StringBuilder.
*/
int32_t
formatAsPrefixSuffix(NumberStringBuilder& result, int32_t startIndex, int32_t endIndex,
formatAsPrefixSuffix(FormattedStringBuilder& result, int32_t startIndex, int32_t endIndex,
UErrorCode& status) const;
/**
* TODO: Like above, this belongs with the rest of the SimpleFormatterImpl code.
* I put it here so that the SimpleFormatter uses in NumberStringBuilder are near each other.
* I put it here so that the SimpleFormatter uses in FormattedStringBuilder are near each other.
*
* <p>
* Applies the compiled two-argument pattern to the NumberStringBuilder.
* Applies the compiled two-argument pattern to the FormattedStringBuilder.
*
* <p>
* This method is optimized for the case where the prefix and suffix are often empty, such as
* in the range pattern like "{0}-{1}".
*/
static int32_t
formatTwoArgPattern(const SimpleFormatter& compiled, NumberStringBuilder& result,
formatTwoArgPattern(const SimpleFormatter& compiled, FormattedStringBuilder& result,
int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength,
Field field, UErrorCode& status);
@ -131,13 +131,13 @@ class U_I18N_API SimpleModifier : public Modifier, public UMemory {
/**
* An implementation of {@link Modifier} that allows for multiple types of fields in the same modifier. Constructed
* based on the contents of two {@link NumberStringBuilder} instances (one for the prefix, one for the suffix).
* based on the contents of two {@link FormattedStringBuilder} instances (one for the prefix, one for the suffix).
*/
class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory {
public:
ConstantMultiFieldModifier(
const NumberStringBuilder &prefix,
const NumberStringBuilder &suffix,
const FormattedStringBuilder &prefix,
const FormattedStringBuilder &suffix,
bool overwrite,
bool strong,
const Modifier::Parameters parameters)
@ -148,8 +148,8 @@ class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory {
fParameters(parameters) {}
ConstantMultiFieldModifier(
const NumberStringBuilder &prefix,
const NumberStringBuilder &suffix,
const FormattedStringBuilder &prefix,
const FormattedStringBuilder &suffix,
bool overwrite,
bool strong)
: fPrefix(prefix),
@ -157,7 +157,7 @@ class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory {
fOverwrite(overwrite),
fStrong(strong) {}
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
int32_t apply(FormattedStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const U_OVERRIDE;
int32_t getPrefixLength() const U_OVERRIDE;
@ -173,10 +173,10 @@ class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory {
bool semanticallyEquivalent(const Modifier& other) const U_OVERRIDE;
protected:
// NOTE: In Java, these are stored as array pointers. In C++, the NumberStringBuilder is stored by
// NOTE: In Java, these are stored as array pointers. In C++, the FormattedStringBuilder is stored by
// value and is treated internally as immutable.
NumberStringBuilder fPrefix;
NumberStringBuilder fSuffix;
FormattedStringBuilder fPrefix;
FormattedStringBuilder fSuffix;
bool fOverwrite;
bool fStrong;
Modifier::Parameters fParameters;
@ -187,19 +187,19 @@ class U_I18N_API CurrencySpacingEnabledModifier : public ConstantMultiFieldModif
public:
/** Safe code path */
CurrencySpacingEnabledModifier(
const NumberStringBuilder &prefix,
const NumberStringBuilder &suffix,
const FormattedStringBuilder &prefix,
const FormattedStringBuilder &suffix,
bool overwrite,
bool strong,
const DecimalFormatSymbols &symbols,
UErrorCode &status);
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
int32_t apply(FormattedStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const U_OVERRIDE;
/** Unsafe code path */
static int32_t
applyCurrencySpacing(NumberStringBuilder &output, int32_t prefixStart, int32_t prefixLen,
applyCurrencySpacing(FormattedStringBuilder &output, int32_t prefixStart, int32_t prefixLen,
int32_t suffixStart, int32_t suffixLen, const DecimalFormatSymbols &symbols,
UErrorCode &status);
@ -218,7 +218,7 @@ class U_I18N_API CurrencySpacingEnabledModifier : public ConstantMultiFieldModif
};
/** Unsafe code path */
static int32_t applyCurrencySpacingAffix(NumberStringBuilder &output, int32_t index, EAffix affix,
static int32_t applyCurrencySpacingAffix(FormattedStringBuilder &output, int32_t index, EAffix affix,
const DecimalFormatSymbols &symbols, UErrorCode &status);
static UnicodeSet
@ -234,7 +234,7 @@ class U_I18N_API EmptyModifier : public Modifier, public UMemory {
public:
explicit EmptyModifier(bool isStrong) : fStrong(isStrong) {}
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
int32_t apply(FormattedStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const U_OVERRIDE {
(void)output;
(void)leftIndex;

View file

@ -20,8 +20,7 @@ UPRV_FORMATTED_VALUE_SUBCLASS_AUTO_IMPL(FormattedNumber)
UBool FormattedNumber::nextFieldPosition(FieldPosition& fieldPosition, UErrorCode& status) const {
UPRV_FORMATTED_VALUE_METHOD_GUARD(FALSE)
// NOTE: MSVC sometimes complains when implicitly converting between bool and UBool
return fData->getStringRef().nextFieldPosition(fieldPosition, status) ? TRUE : FALSE;
return fData->nextFieldPosition(fieldPosition, status);
}
void FormattedNumber::getAllFieldPositions(FieldPositionIterator& iterator, UErrorCode& status) const {
@ -32,7 +31,7 @@ void FormattedNumber::getAllFieldPositions(FieldPositionIterator& iterator, UErr
void FormattedNumber::getAllFieldPositionsImpl(FieldPositionIteratorHandler& fpih,
UErrorCode& status) const {
UPRV_FORMATTED_VALUE_METHOD_GUARD(UPRV_NOARG)
fData->getStringRef().getAllFieldPositions(fpih, status);
fData->getAllFieldPositions(fpih, status);
}
void FormattedNumber::getDecimalQuantity(impl::DecimalQuantity& output, UErrorCode& status) const {

View file

@ -7,7 +7,7 @@
#include "unicode/numberformatter.h"
#include "number_types.h"
#include "number_stringbuilder.h"
#include "formatted_string_builder.h"
#include "number_decimfmtprops.h"
using namespace icu;
@ -17,7 +17,7 @@ using namespace icu::number::impl;
namespace {
int32_t
addPaddingHelper(UChar32 paddingCp, int32_t requiredPadding, NumberStringBuilder &string, int32_t index,
addPaddingHelper(UChar32 paddingCp, int32_t requiredPadding, FormattedStringBuilder &string, int32_t index,
UErrorCode &status) {
for (int32_t i = 0; i < requiredPadding; i++) {
// TODO: If appending to the end, this will cause actual insertion operations. Improve.
@ -60,7 +60,7 @@ Padder Padder::forProperties(const DecimalFormatProperties& properties) {
}
int32_t Padder::padAndApply(const Modifier &mod1, const Modifier &mod2,
NumberStringBuilder &string, int32_t leftIndex, int32_t rightIndex,
FormattedStringBuilder &string, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const {
int32_t modLength = mod1.getCodePointCount() + mod2.getCodePointCount();
int32_t requiredPadding = fWidth - modLength - string.codePointCount();

View file

@ -108,8 +108,8 @@ MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator* paren
}
ConstantMultiFieldModifier* MutablePatternModifier::createConstantModifier(UErrorCode& status) {
NumberStringBuilder a;
NumberStringBuilder b;
FormattedStringBuilder a;
FormattedStringBuilder b;
insertPrefix(a, 0, status);
insertSuffix(b, 0, status);
if (fPatternInfo->hasCurrencySign()) {
@ -170,7 +170,7 @@ void MutablePatternModifier::processQuantity(DecimalQuantity& fq, MicroProps& mi
micros.modMiddle = this;
}
int32_t MutablePatternModifier::apply(NumberStringBuilder& output, int32_t leftIndex, int32_t rightIndex,
int32_t MutablePatternModifier::apply(FormattedStringBuilder& output, int32_t leftIndex, int32_t rightIndex,
UErrorCode& status) const {
// The unsafe code path performs self-mutation, so we need a const_cast.
// This method needs to be const because it overrides a const method in the parent class.
@ -248,13 +248,13 @@ bool MutablePatternModifier::semanticallyEquivalent(const Modifier& other) const
UPRV_UNREACHABLE;
}
int32_t MutablePatternModifier::insertPrefix(NumberStringBuilder& sb, int position, UErrorCode& status) {
int32_t MutablePatternModifier::insertPrefix(FormattedStringBuilder& sb, int position, UErrorCode& status) {
prepareAffix(true);
int32_t length = AffixUtils::unescape(currentAffix, sb, position, *this, fField, status);
return length;
}
int32_t MutablePatternModifier::insertSuffix(NumberStringBuilder& sb, int position, UErrorCode& status) {
int32_t MutablePatternModifier::insertSuffix(FormattedStringBuilder& sb, int position, UErrorCode& status) {
prepareAffix(false);
int32_t length = AffixUtils::unescape(currentAffix, sb, position, *this, fField, status);
return length;

View file

@ -184,7 +184,7 @@ class U_I18N_API MutablePatternModifier
void processQuantity(DecimalQuantity &, MicroProps &micros, UErrorCode &status) const U_OVERRIDE;
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
int32_t apply(FormattedStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const U_OVERRIDE;
int32_t getPrefixLength() const U_OVERRIDE;
@ -240,17 +240,17 @@ class U_I18N_API MutablePatternModifier
* CREATES A NEW HEAP OBJECT; THE CALLER GETS OWNERSHIP.
*
* @param a
* A working NumberStringBuilder object; passed from the outside to prevent the need to create many new
* A working FormattedStringBuilder object; passed from the outside to prevent the need to create many new
* instances if this method is called in a loop.
* @param b
* Another working NumberStringBuilder object.
* Another working FormattedStringBuilder object.
* @return The constant modifier object.
*/
ConstantMultiFieldModifier *createConstantModifier(UErrorCode &status);
int32_t insertPrefix(NumberStringBuilder &sb, int position, UErrorCode &status);
int32_t insertPrefix(FormattedStringBuilder &sb, int position, UErrorCode &status);
int32_t insertSuffix(NumberStringBuilder &sb, int position, UErrorCode &status);
int32_t insertSuffix(FormattedStringBuilder &sb, int position, UErrorCode &status);
void prepareAffix(bool isPrefix);
};

View file

@ -8,7 +8,7 @@
#include <cstdlib>
#include "number_scientific.h"
#include "number_utils.h"
#include "number_stringbuilder.h"
#include "formatted_string_builder.h"
#include "unicode/unum.h"
#include "number_microprops.h"
@ -36,7 +36,7 @@ void ScientificModifier::set(int32_t exponent, const ScientificHandler *handler)
fHandler = handler;
}
int32_t ScientificModifier::apply(NumberStringBuilder &output, int32_t /*leftIndex*/, int32_t rightIndex,
int32_t ScientificModifier::apply(FormattedStringBuilder &output, int32_t /*leftIndex*/, int32_t rightIndex,
UErrorCode &status) const {
// FIXME: Localized exponent separator location.
int i = rightIndex;

View file

@ -21,7 +21,7 @@ class U_I18N_API ScientificModifier : public UMemory, public Modifier {
void set(int32_t exponent, const ScientificHandler *handler);
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
int32_t apply(FormattedStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const U_OVERRIDE;
int32_t getPrefixLength() const U_OVERRIDE;

View file

@ -20,6 +20,7 @@
#include "unicode/numberformatter.h"
#include "uinvchar.h"
#include "charstr.h"
#include "string_segment.h"
using namespace icu;
using namespace icu::number;
@ -1217,7 +1218,7 @@ void blueprint_helpers::parseIntegerWidthOption(const StringSegment& segment, Ma
maxInt = 0;
}
for (; offset < segment.length(); offset++) {
if (segment.charAt(offset) == u'#') {
if (maxInt != -1 && segment.charAt(offset) == u'#') {
maxInt++;
} else {
break;

View file

@ -10,10 +10,10 @@
#include "number_types.h"
#include "numparse_types.h"
#include "unicode/ucharstrie.h"
#include "string_segment.h"
using icu::numparse::impl::StringSegment;
U_NAMESPACE_BEGIN namespace number {
U_NAMESPACE_BEGIN
namespace number {
namespace impl {
// Forward-declaration

View file

@ -17,17 +17,16 @@
#include "unicode/platform.h"
#include "unicode/uniset.h"
#include "standardplural.h"
#include "formatted_string_builder.h"
U_NAMESPACE_BEGIN namespace number {
U_NAMESPACE_BEGIN
namespace number {
namespace impl {
// Typedef several enums for brevity and for easier comparison to Java.
// For convenience and historical reasons, import the Field typedef to the namespace.
typedef FormattedStringBuilder::Field Field;
// Convention: bottom 4 bits for field, top 4 bits for field category.
// Field category 0 implies the number category so that the number field
// literals can be directly passed as a Field type.
// See the helper functions in "NumFieldUtils" in number_utils.h
typedef uint8_t Field;
// Typedef several enums for brevity and for easier comparison to Java.
typedef UNumberFormatRoundingMode RoundingMode;
@ -49,7 +48,6 @@ static constexpr char16_t kFallbackPaddingString[] = u" ";
class Modifier;
class MutablePatternModifier;
class DecimalQuantity;
class NumberStringBuilder;
class ModifierStore;
struct MicroProps;
@ -160,7 +158,7 @@ class U_I18N_API Modifier {
* formatted.
* @return The number of characters (UTF-16 code units) that were added to the string builder.
*/
virtual int32_t apply(NumberStringBuilder& output, int leftIndex, int rightIndex,
virtual int32_t apply(FormattedStringBuilder& output, int leftIndex, int rightIndex,
UErrorCode& status) const = 0;
/**

View file

@ -17,6 +17,7 @@
#include "number_roundingutils.h"
#include "decNumber.h"
#include "charstr.h"
#include "formatted_string_builder.h"
U_NAMESPACE_BEGIN
@ -32,52 +33,10 @@ enum CldrPatternStyle {
CLDR_PATTERN_STYLE_COUNT,
};
/**
* Helper functions for dealing with the Field typedef, which stores fields
* in a compressed format.
*/
class NumFieldUtils {
public:
struct CategoryFieldPair {
int32_t category;
int32_t field;
};
/** Compile-time function to construct a Field from a category and a field */
template <int32_t category, int32_t field>
static constexpr Field compress() {
static_assert(category != 0, "cannot use Undefined category in NumFieldUtils");
static_assert(category <= 0xf, "only 4 bits for category");
static_assert(field <= 0xf, "only 4 bits for field");
return static_cast<int8_t>((category << 4) | field);
}
/** Runtime inline function to unpack the category and field from the Field */
static inline CategoryFieldPair expand(Field field) {
if (field == UNUM_FIELD_COUNT) {
return {UFIELD_CATEGORY_UNDEFINED, 0};
}
CategoryFieldPair ret = {
(field >> 4),
(field & 0xf)
};
if (ret.category == 0) {
ret.category = UFIELD_CATEGORY_NUMBER;
}
return ret;
}
static inline bool isNumericField(Field field) {
int8_t category = field >> 4;
return category == 0 || category == UFIELD_CATEGORY_NUMBER;
}
};
// Namespace for naked functions
namespace utils {
inline int32_t insertDigitFromSymbols(NumberStringBuilder& output, int32_t index, int8_t digit,
inline int32_t insertDigitFromSymbols(FormattedStringBuilder& output, int32_t index, int8_t digit,
const DecimalFormatSymbols& symbols, Field field,
UErrorCode& status) {
if (symbols.getCodePointZero() != -1) {

View file

@ -10,7 +10,7 @@
#include "unicode/numberformatter.h"
#include "number_types.h"
#include "number_decimalquantity.h"
#include "number_stringbuilder.h"
#include "formatted_string_builder.h"
#include "formattedval_impl.h"
U_NAMESPACE_BEGIN namespace number {
@ -31,9 +31,9 @@ const DecimalQuantity* validateUFormattedNumberToDecimalQuantity(
* The DecimalQuantity is not currently being used by FormattedNumber, but at some point it could be used
* to add a toDecNumber() or similar method.
*/
class UFormattedNumberData : public FormattedValueNumberStringBuilderImpl {
class UFormattedNumberData : public FormattedValueStringBuilderImpl {
public:
UFormattedNumberData() : FormattedValueNumberStringBuilderImpl(0) {}
UFormattedNumberData() : FormattedValueStringBuilderImpl(0) {}
virtual ~UFormattedNumberData();
DecimalQuantity quantity;

View file

@ -13,6 +13,7 @@
#include "numparse_affixes.h"
#include "numparse_utils.h"
#include "number_utils.h"
#include "string_segment.h"
using namespace icu;
using namespace icu::numparse;

View file

@ -11,6 +11,7 @@
#include "numparse_types.h"
#include "numparse_compositions.h"
#include "string_segment.h"
#include "unicode/uniset.h"
using namespace icu;

View file

@ -14,6 +14,7 @@
#include "ucurrimp.h"
#include "unicode/errorcode.h"
#include "numparse_utils.h"
#include "string_segment.h"
using namespace icu;
using namespace icu::numparse;

View file

@ -16,6 +16,7 @@
#include "unicode/uchar.h"
#include "putilimp.h"
#include "number_decimalquantity.h"
#include "string_segment.h"
using namespace icu;
using namespace icu::numparse;

View file

@ -18,6 +18,7 @@
#include "unicode/localpointer.h"
#include "numparse_validators.h"
#include "number_multiplier.h"
#include "string_segment.h"
U_NAMESPACE_BEGIN

View file

@ -11,6 +11,7 @@
#include "numparse_types.h"
#include "number_decimalquantity.h"
#include "string_segment.h"
#include "putilimp.h"
#include <cmath>

View file

@ -12,6 +12,7 @@
#include "numparse_types.h"
#include "numparse_scientific.h"
#include "static_unicode_sets.h"
#include "string_segment.h"
using namespace icu;
using namespace icu::numparse;

View file

@ -1,24 +0,0 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#ifndef __NUMPARSE_STRINGSEGMENT_H__
#define __NUMPARSE_STRINGSEGMENT_H__
#include "numparse_types.h"
#include "number_types.h"
#include "unicode/unistr.h"
U_NAMESPACE_BEGIN
namespace numparse {
namespace impl {
} // namespace impl
} // namespace numparse
U_NAMESPACE_END
#endif //__NUMPARSE_STRINGSEGMENT_H__
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -12,6 +12,7 @@
#include "numparse_types.h"
#include "numparse_symbols.h"
#include "numparse_utils.h"
#include "string_segment.h"
using namespace icu;
using namespace icu::numparse;

View file

@ -9,12 +9,13 @@
#include "unicode/uobject.h"
#include "number_decimalquantity.h"
#include "string_segment.h"
U_NAMESPACE_BEGIN namespace numparse {
U_NAMESPACE_BEGIN
namespace numparse {
namespace impl {
// Forward-declarations
class StringSegment;
class ParsedNumber;
typedef int32_t result_flags_t;
@ -169,115 +170,6 @@ class U_I18N_API ParsedNumber {
};
/**
* A mutable class allowing for a String with a variable offset and length. The charAt, length, and
* subSequence methods all operate relative to the fixed offset into the String.
*
* @author sffc
*/
// Exported as U_I18N_API for tests
class U_I18N_API StringSegment : public UMemory {
public:
StringSegment(const UnicodeString& str, bool ignoreCase);
int32_t getOffset() const;
void setOffset(int32_t start);
/**
* Equivalent to <code>setOffset(getOffset()+delta)</code>.
*
* <p>
* This method is usually called by a Matcher to register that a char was consumed. If the char is
* strong (it usually is, except for things like whitespace), follow this with a call to
* {@link ParsedNumber#setCharsConsumed}. For more information on strong chars, see that method.
*/
void adjustOffset(int32_t delta);
/**
* Adjusts the offset by the width of the current code point, either 1 or 2 chars.
*/
void adjustOffsetByCodePoint();
void setLength(int32_t length);
void resetLength();
int32_t length() const;
char16_t charAt(int32_t index) const;
UChar32 codePointAt(int32_t index) const;
UnicodeString toUnicodeString() const;
const UnicodeString toTempUnicodeString() const;
/**
* Returns the first code point in the string segment, or -1 if the string starts with an invalid
* code point.
*
* <p>
* <strong>Important:</strong> Most of the time, you should use {@link #matches}, which handles case
* folding logic, instead of this method.
*/
UChar32 getCodePoint() const;
/**
* Returns true if the first code point of this StringSegment equals the given code point.
*
* <p>
* This method will perform case folding if case folding is enabled for the parser.
*/
bool startsWith(UChar32 otherCp) const;
/**
* Returns true if the first code point of this StringSegment is in the given UnicodeSet.
*/
bool startsWith(const UnicodeSet& uniset) const;
/**
* Returns true if there is at least one code point of overlap between this StringSegment and the
* given UnicodeString.
*/
bool startsWith(const UnicodeString& other) const;
/**
* Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
* example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
* since the first 2 characters are the same.
*
* <p>
* This method only returns offsets along code point boundaries.
*
* <p>
* This method will perform case folding if case folding was enabled in the constructor.
*
* <p>
* IMPORTANT: The given UnicodeString must not be empty! It is the caller's responsibility to check.
*/
int32_t getCommonPrefixLength(const UnicodeString& other);
/**
* Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding is
* enabled for the parser.
*/
int32_t getCaseSensitivePrefixLength(const UnicodeString& other);
bool operator==(const UnicodeString& other) const;
private:
const UnicodeString& fStr;
int32_t fStart;
int32_t fEnd;
bool fFoldCase;
int32_t getPrefixLengthInternal(const UnicodeString& other, bool foldCase);
static bool codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase);
};
/**
* The core interface implemented by all matchers used for number parsing.
*

View file

@ -382,7 +382,7 @@ UPRV_FORMATTED_VALUE_SUBCLASS_AUTO_IMPL(FormattedNumberRange)
UBool FormattedNumberRange::nextFieldPosition(FieldPosition& fieldPosition, UErrorCode& status) const {
UPRV_FORMATTED_VALUE_METHOD_GUARD(FALSE)
// NOTE: MSVC sometimes complains when implicitly converting between bool and UBool
return fData->getStringRef().nextFieldPosition(fieldPosition, status) ? TRUE : FALSE;
return fData->nextFieldPosition(fieldPosition, status);
}
void FormattedNumberRange::getAllFieldPositions(FieldPositionIterator& iterator, UErrorCode& status) const {
@ -393,7 +393,7 @@ void FormattedNumberRange::getAllFieldPositions(FieldPositionIterator& iterator,
void FormattedNumberRange::getAllFieldPositionsImpl(
FieldPositionIteratorHandler& fpih, UErrorCode& status) const {
UPRV_FORMATTED_VALUE_METHOD_GUARD(UPRV_NOARG)
fData->getStringRef().getAllFieldPositions(fpih, status);
fData->getAllFieldPositions(fpih, status);
}
UnicodeString FormattedNumberRange::getFirstDecimal(UErrorCode& status) const {

View file

@ -397,7 +397,7 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
break;
}
NumberStringBuilder& string = data.getStringRef();
FormattedStringBuilder& string = data.getStringRef();
int32_t lengthPrefix = 0;
int32_t length1 = 0;
int32_t lengthInfix = 0;

View file

@ -13,7 +13,7 @@
#include "number_types.h"
#include "number_decimalquantity.h"
#include "number_formatimpl.h"
#include "number_stringbuilder.h"
#include "formatted_string_builder.h"
#include "formattedval_impl.h"
U_NAMESPACE_BEGIN namespace number {
@ -29,9 +29,9 @@ namespace impl {
* Possible magic number: 0x46445200
* Reads in ASCII as "FDR" (FormatteDnumberRange with room at the end)
*/
class UFormattedNumberRangeData : public FormattedValueNumberStringBuilderImpl {
class UFormattedNumberRangeData : public FormattedValueStringBuilderImpl {
public:
UFormattedNumberRangeData() : FormattedValueNumberStringBuilderImpl(0) {}
UFormattedNumberRangeData() : FormattedValueStringBuilderImpl(0) {}
virtual ~UFormattedNumberRangeData();
DecimalQuantity quantity1;

View file

@ -26,7 +26,7 @@
#include "uassert.h"
#include "number_decimalquantity.h"
#include "number_utypes.h"
#include "number_stringbuilder.h"
#include "formatted_string_builder.h"
U_NAMESPACE_BEGIN
@ -180,7 +180,7 @@ void QuantityFormatter::formatAndSelect(
double quantity,
const NumberFormat& fmt,
const PluralRules& rules,
number::impl::NumberStringBuilder& output,
FormattedStringBuilder& output,
StandardPlural::Form& pluralForm,
UErrorCode& status) {
UnicodeString pluralKeyword;

View file

@ -26,12 +26,7 @@ class PluralRules;
class NumberFormat;
class Formattable;
class FieldPosition;
namespace number {
namespace impl {
class NumberStringBuilder;
}
}
class FormattedStringBuilder;
/**
* A plural aware formatter that is good for expressing a single quantity and
@ -129,7 +124,7 @@ public:
/**
* Formats a quantity and selects its plural form. The output is appended
* to a NumberStringBuilder in order to retain field information.
* to a FormattedStringBuilder in order to retain field information.
*
* @param quantity The number to format.
* @param fmt The formatter to use to format the number.
@ -144,7 +139,7 @@ public:
double quantity,
const NumberFormat& fmt,
const PluralRules& rules,
number::impl::NumberStringBuilder& output,
FormattedStringBuilder& output,
StandardPlural::Form& pluralForm,
UErrorCode& status);

View file

@ -43,7 +43,7 @@
#include "standardplural.h"
#include "unifiedcache.h"
#include "util.h"
#include "number_stringbuilder.h"
#include "formatted_string_builder.h"
#include "number_utypes.h"
#include "number_modifiers.h"
#include "formattedval_impl.h"
@ -725,14 +725,14 @@ const RelativeDateTimeCacheData *LocaleCacheKey<RelativeDateTimeCacheData>::crea
static constexpr number::impl::Field kRDTNumericField
= number::impl::NumFieldUtils::compress<UFIELD_CATEGORY_RELATIVE_DATETIME, UDAT_REL_NUMERIC_FIELD>();
= StringBuilderFieldUtils::compress<UFIELD_CATEGORY_RELATIVE_DATETIME, UDAT_REL_NUMERIC_FIELD>();
static constexpr number::impl::Field kRDTLiteralField
= number::impl::NumFieldUtils::compress<UFIELD_CATEGORY_RELATIVE_DATETIME, UDAT_REL_LITERAL_FIELD>();
= StringBuilderFieldUtils::compress<UFIELD_CATEGORY_RELATIVE_DATETIME, UDAT_REL_LITERAL_FIELD>();
class FormattedRelativeDateTimeData : public FormattedValueNumberStringBuilderImpl {
class FormattedRelativeDateTimeData : public FormattedValueStringBuilderImpl {
public:
FormattedRelativeDateTimeData() : FormattedValueNumberStringBuilderImpl(kRDTNumericField) {}
FormattedRelativeDateTimeData() : FormattedValueStringBuilderImpl(kRDTNumericField) {}
virtual ~FormattedRelativeDateTimeData();
};

View file

@ -10,14 +10,12 @@
#define UNISTR_FROM_STRING_EXPLICIT
#include "numparse_types.h"
#include "numparse_stringsegment.h"
#include "string_segment.h"
#include "putilimp.h"
#include "unicode/utf16.h"
#include "unicode/uniset.h"
using namespace icu;
using namespace icu::numparse;
using namespace icu::numparse::impl;
U_NAMESPACE_BEGIN
StringSegment::StringSegment(const UnicodeString& str, bool ignoreCase)
@ -143,4 +141,5 @@ bool StringSegment::operator==(const UnicodeString& other) const {
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -0,0 +1,134 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#ifndef __NUMPARSE_STRINGSEGMENT_H__
#define __NUMPARSE_STRINGSEGMENT_H__
#include "unicode/unistr.h"
#include "unicode/uniset.h"
U_NAMESPACE_BEGIN
/**
* A mutable UnicodeString wrapper with a variable offset and length and
* support for case folding. The charAt, length, and subSequence methods all
* operate relative to the fixed offset into the UnicodeString.
*
* Intended to be useful for parsing.
*
* CAUTION: Since this class is mutable, it must not be used anywhere that an
* immutable object is required, like in a cache or as the key of a hash map.
*
* @author sffc (Shane Carr)
*/
// Exported as U_I18N_API for tests
class U_I18N_API StringSegment : public UMemory {
public:
StringSegment(const UnicodeString& str, bool ignoreCase);
int32_t getOffset() const;
void setOffset(int32_t start);
/**
* Equivalent to <code>setOffset(getOffset()+delta)</code>.
*
* <p>
* This method is usually called by a Matcher to register that a char was consumed. If the char is
* strong (it usually is, except for things like whitespace), follow this with a call to
* {@link ParsedNumber#setCharsConsumed}. For more information on strong chars, see that method.
*/
void adjustOffset(int32_t delta);
/**
* Adjusts the offset by the width of the current code point, either 1 or 2 chars.
*/
void adjustOffsetByCodePoint();
void setLength(int32_t length);
void resetLength();
int32_t length() const;
char16_t charAt(int32_t index) const;
UChar32 codePointAt(int32_t index) const;
UnicodeString toUnicodeString() const;
const UnicodeString toTempUnicodeString() const;
/**
* Returns the first code point in the string segment, or -1 if the string starts with an invalid
* code point.
*
* <p>
* <strong>Important:</strong> Most of the time, you should use {@link #startsWith}, which handles case
* folding logic, instead of this method.
*/
UChar32 getCodePoint() const;
/**
* Returns true if the first code point of this StringSegment equals the given code point.
*
* <p>
* This method will perform case folding if case folding is enabled for the parser.
*/
bool startsWith(UChar32 otherCp) const;
/**
* Returns true if the first code point of this StringSegment is in the given UnicodeSet.
*/
bool startsWith(const UnicodeSet& uniset) const;
/**
* Returns true if there is at least one code point of overlap between this StringSegment and the
* given UnicodeString.
*/
bool startsWith(const UnicodeString& other) const;
/**
* Returns the length of the prefix shared by this StringSegment and the given UnicodeString. For
* example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
* since the first 2 characters are the same.
*
* <p>
* This method only returns offsets along code point boundaries.
*
* <p>
* This method will perform case folding if case folding was enabled in the constructor.
*
* <p>
* IMPORTANT: The given UnicodeString must not be empty! It is the caller's responsibility to check.
*/
int32_t getCommonPrefixLength(const UnicodeString& other);
/**
* Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding is
* enabled for the parser.
*/
int32_t getCaseSensitivePrefixLength(const UnicodeString& other);
bool operator==(const UnicodeString& other) const;
private:
const UnicodeString& fStr;
int32_t fStart;
int32_t fEnd;
bool fFoldCase;
int32_t getPrefixLengthInternal(const UnicodeString& other, bool foldCase);
static bool codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase);
};
U_NAMESPACE_END
#endif //__NUMPARSE_STRINGSEGMENT_H__
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -85,6 +85,7 @@ U_NAMESPACE_BEGIN
// Forward declarations:
class IFixedDecimal;
class FieldPositionIteratorHandler;
class FormattedStringBuilder;
namespace numparse {
namespace impl {
@ -142,7 +143,6 @@ class MultiplierProducer;
class RoundingImpl;
class ScientificHandler;
class Modifier;
class NumberStringBuilder;
class AffixPatternProvider;
class NumberPropertyMapper;
struct DecimalFormatProperties;
@ -1343,7 +1343,7 @@ class U_I18N_API Padder : public UMemory {
}
int32_t padAndApply(const impl::Modifier &mod1, const impl::Modifier &mod2,
impl::NumberStringBuilder &string, int32_t leftIndex, int32_t rightIndex,
FormattedStringBuilder &string, int32_t leftIndex, int32_t rightIndex,
UErrorCode &status) const;
// To allow MacroProps/MicroProps to initialize empty instances:

View file

@ -24,7 +24,7 @@
* \file
* \brief C API: StringSearch
*
* C Apis for an engine that provides language-sensitive text searching based
* C APIs for an engine that provides language-sensitive text searching based
* on the comparison rules defined in a <tt>UCollator</tt> data struct,
* see <tt>ucol.h</tt>. This ensures that language eccentricity can be
* handled, e.g. for the German collator, characters &szlig; and SS will be matched
@ -55,7 +55,7 @@
* <p>
* This search has APIs similar to that of other text iteration mechanisms
* such as the break iterators in <tt>ubrk.h</tt>. Using these
* APIs, it is easy to scan through text looking for all occurances of
* APIs, it is easy to scan through text looking for all occurrences of
* a given pattern. This search iterator allows changing of direction by
* calling a <tt>reset</tt> followed by a <tt>next</tt> or <tt>previous</tt>.
* Though a direction change can occur without calling <tt>reset</tt> first,
@ -130,7 +130,7 @@
* pos = usearch_next(search, &status))
* {
* printf("Found match at %d pos, length is %d\n", pos,
* usearch_getMatchLength(search));
* usearch_getMatchedLength(search));
* }
* }
*
@ -479,7 +479,7 @@ U_STABLE int32_t U_EXPORT2 usearch_getMatchedLength(
* possible. If the buffer fits the matched text exactly, a null-termination
* is not possible, then a U_STRING_NOT_TERMINATED_ERROR set in status.
* Pre-flighting can be either done with length = 0 or the API
* <tt>usearch_getMatchLength</tt>.
* <tt>usearch_getMatchedLength</tt>.
* @param strsrch search iterator data struct
* @param result UChar buffer to store the matched string
* @param resultCapacity length of the result buffer
@ -766,7 +766,7 @@ U_STABLE void U_EXPORT2 usearch_reset(UStringSearch *strsrch);
#ifndef U_HIDE_INTERNAL_API
/**
* Simple forward search for the pattern, starting at a specified index,
* and using using a default set search options.
* and using a default set search options.
*
* This is an experimental function, and is not an official part of the
* ICU API.
@ -783,7 +783,7 @@ U_STABLE void U_EXPORT2 usearch_reset(UStringSearch *strsrch);
* are part of a combining sequence, as described below.
*
* A match will not include a partial combining sequence. Combining
* character sequences are considered to be inseperable units,
* character sequences are considered to be inseparable units,
* and either match the pattern completely, or are considered to not match
* at all. Thus, for example, an A followed a combining accent mark will
* not be found when searching for a plain (unaccented) A. (unless
@ -792,7 +792,7 @@ U_STABLE void U_EXPORT2 usearch_reset(UStringSearch *strsrch);
* When beginning a search, the initial starting position, startIdx,
* is assumed to be an acceptable match boundary with respect to
* combining characters. A combining sequence that spans across the
* starting point will not supress a match beginning at startIdx.
* starting point will not suppress a match beginning at startIdx.
*
* Characters that expand to multiple collation elements
* (German sharp-S becoming 'ss', or the composed forms of accented
@ -843,7 +843,7 @@ U_INTERNAL UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
* are part of a combining sequence, as described below.
*
* A match will not include a partial combining sequence. Combining
* character sequences are considered to be inseperable units,
* character sequences are considered to be inseparable units,
* and either match the pattern completely, or are considered to not match
* at all. Thus, for example, an A followed a combining accent mark will
* not be found when searching for a plain (unaccented) A. (unless
@ -852,7 +852,7 @@ U_INTERNAL UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
* When beginning a search, the initial starting position, startIdx,
* is assumed to be an acceptable match boundary with respect to
* combining characters. A combining sequence that spans across the
* starting point will not supress a match beginning at startIdx.
* starting point will not suppress a match beginning at startIdx.
*
* Characters that expand to multiple collation elements
* (German sharp-S becoming 'ss', or the composed forms of accented

View file

@ -1351,7 +1351,7 @@ inline int getUnblockedAccentIndex(UChar *accents, int32_t *accentsindex)
* @param destinationlength target array size, returning the appended length
* @param source1 null-terminated first array
* @param source2 second array
* @param source2length length of seond array
* @param source2length length of second array
* @param source3 null-terminated third array
* @param status error status if any
* @return new destination array, destination if there was no new allocation
@ -1560,7 +1560,7 @@ inline void cleanUpSafeText(const UStringSearch *strsrch, UChar *safetext,
/**
* Take the rearranged end accents and tries matching. If match failed at
* a seperate preceding set of accents (seperated from the rearranged on by
* a separate preceding set of accents (separated from the rearranged on by
* at least a base character) then we rearrange the preceding accents and
* tries matching again.
* We allow skipping of the ends of the accent set if the ces do not match.
@ -2220,7 +2220,7 @@ int32_t doPreviousCanonicalSuffixMatch(UStringSearch *strsrch,
/**
* Take the rearranged start accents and tries matching. If match failed at
* a seperate following set of accents (seperated from the rearranged on by
* a separate following set of accents (separated from the rearranged on by
* at least a base character) then we rearrange the preceding accents and
* tries matching again.
* We allow skipping of the ends of the accent set if the ces do not match.
@ -3852,7 +3852,7 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
#endif
// Input parameter sanity check.
// TODO: should input indicies clip to the text length
// TODO: should input indices clip to the text length
// in the same way that UText does.
if(strsrch->pattern.cesLength == 0 ||
startIdx < 0 ||
@ -4014,7 +4014,7 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
// Check for the start of the match being within an Collation Element Expansion,
// meaning that the first char of the match is only partially matched.
// With exapnsions, the first CE will report the index of the source
// With expansions, the first CE will report the index of the source
// character, and all subsequent (expansions) CEs will report the source index of the
// _following_ character.
int32_t secondIx = firstCEI->highIndex;

View file

@ -932,7 +932,9 @@ group: double_conversion
platform
group: number_representation
number_decimalquantity.o number_stringbuilder.o numparse_stringsegment.o number_utils.o
number_decimalquantity.o string_segment.o number_utils.o
# TODO(ICU-20429) Move formatted_string_builder to its own unit.
formatted_string_builder.o
deps
decnumber double_conversion
# for trimming whitespace around fields

View file

@ -64,10 +64,10 @@ scientificnumberformattertest.o datadrivennumberformattestsuite.o \
numberformattesttuple.o pluralmaptest.o \
numbertest_affixutils.o numbertest_api.o numbertest_decimalquantity.o \
numbertest_modifiers.o numbertest_patternmodifier.o numbertest_patternstring.o \
numbertest_stringbuilder.o numbertest_stringsegment.o \
string_segment_test.o \
numbertest_parse.o numbertest_doubleconversion.o numbertest_skeletons.o \
static_unisets_test.o numfmtdatadriventest.o numbertest_range.o erarulestest.o \
formattedvaluetest.o
formattedvaluetest.o formatted_string_builder_test.o
DEPS = $(OBJECTS:.o=.d)

View file

@ -1695,6 +1695,49 @@ void DateIntervalFormatTest::testFormattedDateInterval() {
UPRV_LENGTHOF(expectedFieldPositions));
}
{
const char16_t* message = u"FormattedDateInterval identical dates test: no span field";
const char16_t* expectedString = u"July 20, 2018";
LocalPointer<Calendar> input1(Calendar::createInstance("en-GB", status));
input1->set(2018, 6, 20);
FormattedDateInterval result = fmt->formatToValue(*input1, *input1, status);
static const UFieldPositionWithCategory expectedFieldPositions[] = {
// field, begin index, end index
{UFIELD_CATEGORY_DATE, UDAT_MONTH_FIELD, 0, 4},
{UFIELD_CATEGORY_DATE, UDAT_DATE_FIELD, 5, 7},
{UFIELD_CATEGORY_DATE, UDAT_YEAR_FIELD, 9, 13}};
checkMixedFormattedValue(
message,
result,
expectedString,
expectedFieldPositions,
UPRV_LENGTHOF(expectedFieldPositions));
}
// Test sample code
{
LocalPointer<Calendar> input1(Calendar::createInstance("en-GB", status));
LocalPointer<Calendar> input2(Calendar::createInstance("en-GB", status));
input1->set(2018, 6, 20);
input2->set(2018, 7, 3);
// Let fmt be a DateIntervalFormat for locale en-US and skeleton dMMMMy
// Let input1 be July 20, 2018 and input2 be August 3, 2018:
FormattedDateInterval result = fmt->formatToValue(*input1, *input2, status);
assertEquals("Expected output from format",
u"July 20 \u2013 August 3, 2018", result.toString(status));
ConstrainedFieldPosition cfpos;
cfpos.constrainField(UFIELD_CATEGORY_DATE_INTERVAL_SPAN, 0);
if (result.nextPosition(cfpos, status)) {
assertEquals("Expect start index", 0, cfpos.getStart());
assertEquals("Expect end index", 7, cfpos.getLimit());
} else {
// No such span: can happen if input dates are equal.
}
assertFalse("No more than one occurrence of the field",
result.nextPosition(cfpos, status));
}
// To test the fallback pattern behavior, make a custom DateIntervalInfo.
DateIntervalInfo dtitvinf(status);
dtitvinf.setFallbackIntervalPattern("<< {1} --- {0} >>", status);

View file

@ -6,7 +6,26 @@
#if !UCONFIG_NO_FORMATTING
#include "putilimp.h"
#include "numbertest.h"
#include "intltest.h"
#include "formatted_string_builder.h"
#include "formattedval_impl.h"
class FormattedStringBuilderTest : public IntlTest {
public:
void testInsertAppendUnicodeString();
void testSplice();
void testInsertAppendCodePoint();
void testCopy();
void testFields();
void testUnlimitedCapacity();
void testCodePoints();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
private:
void assertEqualsImpl(const UnicodeString &a, const FormattedStringBuilder &b);
};
static const char16_t *EXAMPLE_STRINGS[] = {
u"",
@ -17,9 +36,9 @@ static const char16_t *EXAMPLE_STRINGS[] = {
u"with combining characters like 🇦🇧🇨🇩",
u"A very very very very very very very very very very long string to force heap"};
void NumberStringBuilderTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *) {
void FormattedStringBuilderTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *) {
if (exec) {
logln("TestSuite NumberStringBuilderTest: ");
logln("TestSuite FormattedStringBuilderTest: ");
}
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testInsertAppendUnicodeString);
@ -32,14 +51,14 @@ void NumberStringBuilderTest::runIndexedTest(int32_t index, UBool exec, const ch
TESTCASE_AUTO_END;
}
void NumberStringBuilderTest::testInsertAppendUnicodeString() {
void FormattedStringBuilderTest::testInsertAppendUnicodeString() {
UErrorCode status = U_ZERO_ERROR;
UnicodeString sb1;
NumberStringBuilder sb2;
FormattedStringBuilder sb2;
for (const char16_t* strPtr : EXAMPLE_STRINGS) {
UnicodeString str(strPtr);
NumberStringBuilder sb3;
FormattedStringBuilder sb3;
sb1.append(str);
// Note: UNUM_FIELD_COUNT is like passing null in Java
sb2.append(str, UNUM_FIELD_COUNT, status);
@ -50,7 +69,7 @@ void NumberStringBuilderTest::testInsertAppendUnicodeString() {
assertEqualsImpl(str, sb3);
UnicodeString sb4;
NumberStringBuilder sb5;
FormattedStringBuilder sb5;
sb4.append(u"😇");
sb4.append(str);
sb4.append(u"xx");
@ -68,7 +87,7 @@ void NumberStringBuilderTest::testInsertAppendUnicodeString() {
assertEqualsImpl(sb4, sb5);
UnicodeString sb4cp(sb4);
NumberStringBuilder sb5cp(sb5);
FormattedStringBuilder sb5cp(sb5);
sb4.append(sb4cp);
sb5.append(sb5cp, status);
assertSuccess("Appending again to sb5", status);
@ -76,7 +95,7 @@ void NumberStringBuilderTest::testInsertAppendUnicodeString() {
}
}
void NumberStringBuilderTest::testSplice() {
void FormattedStringBuilderTest::testSplice() {
static const struct TestCase {
const char16_t* input;
const int32_t startThis;
@ -94,7 +113,7 @@ void NumberStringBuilderTest::testSplice() {
UErrorCode status = U_ZERO_ERROR;
UnicodeString sb1;
NumberStringBuilder sb2;
FormattedStringBuilder sb2;
for (auto cas : cases) {
for (const char16_t* replacementPtr : EXAMPLE_STRINGS) {
UnicodeString replacement(replacementPtr);
@ -125,14 +144,14 @@ void NumberStringBuilderTest::testSplice() {
}
}
void NumberStringBuilderTest::testInsertAppendCodePoint() {
void FormattedStringBuilderTest::testInsertAppendCodePoint() {
static const UChar32 cases[] = {
0, 1, 60, 127, 128, 0x7fff, 0x8000, 0xffff, 0x10000, 0x1f000, 0x10ffff};
UErrorCode status = U_ZERO_ERROR;
UnicodeString sb1;
NumberStringBuilder sb2;
FormattedStringBuilder sb2;
for (UChar32 cas : cases) {
NumberStringBuilder sb3;
FormattedStringBuilder sb3;
sb1.append(cas);
sb2.appendCodePoint(cas, UNUM_FIELD_COUNT, status);
assertSuccess("Appending to sb2", status);
@ -147,7 +166,7 @@ void NumberStringBuilderTest::testInsertAppendCodePoint() {
sb3.charAt(0));
UnicodeString sb4;
NumberStringBuilder sb5;
FormattedStringBuilder sb5;
sb4.append(u"😇xx");
sb4.insert(2, cas);
sb5.append(u"😇xx", UNUM_FIELD_COUNT, status);
@ -158,13 +177,13 @@ void NumberStringBuilderTest::testInsertAppendCodePoint() {
}
}
void NumberStringBuilderTest::testCopy() {
void FormattedStringBuilderTest::testCopy() {
UErrorCode status = U_ZERO_ERROR;
for (UnicodeString str : EXAMPLE_STRINGS) {
NumberStringBuilder sb1;
FormattedStringBuilder sb1;
sb1.append(str, UNUM_FIELD_COUNT, status);
assertSuccess("Appending to sb1 first time", status);
NumberStringBuilder sb2(sb1);
FormattedStringBuilder sb2(sb1);
assertTrue("Content should equal itself", sb1.contentEquals(sb2));
sb1.append("12345", UNUM_FIELD_COUNT, status);
@ -173,25 +192,28 @@ void NumberStringBuilderTest::testCopy() {
}
}
void NumberStringBuilderTest::testFields() {
void FormattedStringBuilderTest::testFields() {
UErrorCode status = U_ZERO_ERROR;
// Note: This is a C++11 for loop that calls the UnicodeString constructor on each iteration.
for (UnicodeString str : EXAMPLE_STRINGS) {
NumberStringBuilder sb;
FormattedValueStringBuilderImpl sbi(0);
FormattedStringBuilder& sb = sbi.getStringRef();
sb.append(str, UNUM_FIELD_COUNT, status);
assertSuccess("Appending to sb", status);
sb.append(str, UNUM_CURRENCY_FIELD, status);
assertSuccess("Appending to sb", status);
assertEquals("Reference string copied twice", str.length() * 2, sb.length());
for (int32_t i = 0; i < str.length(); i++) {
assertEquals("Null field first", (Field) UNUM_FIELD_COUNT, sb.fieldAt(i));
assertEquals("Currency field second", (Field) UNUM_CURRENCY_FIELD, sb.fieldAt(i + str.length()));
assertEquals("Null field first",
(FormattedStringBuilder::Field) UNUM_FIELD_COUNT, sb.fieldAt(i));
assertEquals("Currency field second",
(FormattedStringBuilder::Field) UNUM_CURRENCY_FIELD, sb.fieldAt(i + str.length()));
}
// Very basic FieldPosition test. More robust tests happen in NumberFormatTest.
// Let NumberFormatTest also take care of FieldPositionIterator material.
FieldPosition fp(UNUM_CURRENCY_FIELD);
sb.nextFieldPosition(fp, status);
sbi.nextFieldPosition(fp, status);
assertSuccess("Populating the FieldPosition", status);
assertEquals("Currency start position", str.length(), fp.getBeginIndex());
assertEquals("Currency end position", str.length() * 2, fp.getEndIndex());
@ -200,17 +222,17 @@ void NumberStringBuilderTest::testFields() {
sb.insertCodePoint(2, 100, UNUM_INTEGER_FIELD, status);
assertSuccess("Inserting code point into sb", status);
assertEquals("New length", str.length() * 2 + 1, sb.length());
assertEquals("Integer field", (Field) UNUM_INTEGER_FIELD, sb.fieldAt(2));
assertEquals("Integer field", (FormattedStringBuilder::Field) UNUM_INTEGER_FIELD, sb.fieldAt(2));
}
NumberStringBuilder old(sb);
FormattedStringBuilder old(sb);
sb.append(old, status);
assertSuccess("Appending to myself", status);
int32_t numNull = 0;
int32_t numCurr = 0;
int32_t numInt = 0;
for (int32_t i = 0; i < sb.length(); i++) {
Field field = sb.fieldAt(i);
FormattedStringBuilder::Field field = sb.fieldAt(i);
assertEquals("Field should equal location in old", old.fieldAt(i % old.length()), field);
if (field == UNUM_FIELD_COUNT) {
numNull++;
@ -228,9 +250,9 @@ void NumberStringBuilderTest::testFields() {
}
}
void NumberStringBuilderTest::testUnlimitedCapacity() {
void FormattedStringBuilderTest::testUnlimitedCapacity() {
UErrorCode status = U_ZERO_ERROR;
NumberStringBuilder builder;
FormattedStringBuilder builder;
// The builder should never fail upon repeated appends.
for (int i = 0; i < 1000; i++) {
UnicodeString message("Iteration #");
@ -242,9 +264,9 @@ void NumberStringBuilderTest::testUnlimitedCapacity() {
}
}
void NumberStringBuilderTest::testCodePoints() {
void FormattedStringBuilderTest::testCodePoints() {
UErrorCode status = U_ZERO_ERROR;
NumberStringBuilder nsb;
FormattedStringBuilder nsb;
assertEquals("First is -1 on empty string", -1, nsb.getFirstCodePoint());
assertEquals("Last is -1 on empty string", -1, nsb.getLastCodePoint());
assertEquals("Length is 0 on empty string", 0, nsb.codePointCount());
@ -268,7 +290,7 @@ void NumberStringBuilderTest::testCodePoints() {
assertEquals("Code point count is 2", 2, nsb.codePointCount());
}
void NumberStringBuilderTest::assertEqualsImpl(const UnicodeString &a, const NumberStringBuilder &b) {
void FormattedStringBuilderTest::assertEqualsImpl(const UnicodeString &a, const FormattedStringBuilder &b) {
// TODO: Why won't this compile without the IntlTest:: qualifier?
IntlTest::assertEquals("Lengths should be the same", a.length(), b.length());
IntlTest::assertEquals("Code point counts should be the same", a.countChar32(), b.codePointCount());
@ -285,4 +307,9 @@ void NumberStringBuilderTest::assertEqualsImpl(const UnicodeString &a, const Num
}
}
extern IntlTest *createFormattedStringBuilderTest() {
return new FormattedStringBuilderTest();
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -232,6 +232,7 @@
</ClCompile>
<ClCompile Include="dtptngts.cpp" />
<ClCompile Include="fldset.cpp" />
<ClCompile Include="formatted_string_builder_test.cpp" />
<ClCompile Include="genderinfotest.cpp" />
<ClCompile Include="incaltst.cpp" />
<ClCompile Include="itformat.cpp" />
@ -251,8 +252,7 @@
<ClCompile Include="numbertest_modifiers.cpp" />
<ClCompile Include="numbertest_patternmodifier.cpp" />
<ClCompile Include="numbertest_patternstring.cpp" />
<ClCompile Include="numbertest_stringbuilder.cpp" />
<ClCompile Include="numbertest_stringsegment.cpp" />
<ClCompile Include="string_segment_test.cpp" />
<ClCompile Include="numbertest_parse.cpp" />
<ClCompile Include="numbertest_doubleconversion.cpp" />
<ClCompile Include="numbertest_skeletons.cpp" />

View file

@ -217,6 +217,9 @@
<ClCompile Include="fldset.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="formatted_string_builder_test.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="genderinfotest.cpp">
<Filter>formatting</Filter>
</ClCompile>
@ -274,10 +277,7 @@
<ClCompile Include="numbertest_patternstring.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="numbertest_stringbuilder.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="numbertest_stringsegment.cpp">
<ClCompile Include="string_segment_test.cpp">
<Filter>formatting</Filter>
</ClCompile>
<ClCompile Include="numbertest_parse.cpp">

View file

@ -72,6 +72,8 @@ extern IntlTest *createMeasureFormatTest();
extern IntlTest *createNumberFormatSpecificationTest();
extern IntlTest *createScientificNumberFormatterTest();
extern IntlTest *createFormattedValueTest();
extern IntlTest *createFormattedStringBuilderTest();
extern IntlTest *createStringSegmentTest();
#define TESTCLASS(id, TestClass) \
@ -227,6 +229,24 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam
callTest(*test, par);
}
break;
case 54:
name = "FormattedStringBuilderTest";
if (exec) {
logln("FormattedStringBuilderTest test---");
logln((UnicodeString)"");
LocalPointer<IntlTest> test(createFormattedStringBuilderTest());
callTest(*test, par);
}
break;
case 55:
name = "StringSegmentTest";
if (exec) {
logln("StringSegmentTest test---");
logln((UnicodeString)"");
LocalPointer<IntlTest> test(createStringSegmentTest());
callTest(*test, par);
}
break;
default: name = ""; break; //needed to end loop
}
if (exec) {

View file

@ -6,11 +6,11 @@
#if !UCONFIG_NO_FORMATTING
#pragma once
#include "number_stringbuilder.h"
#include "formatted_string_builder.h"
#include "intltest.h"
#include "itformat.h"
#include "number_affixutils.h"
#include "numparse_stringsegment.h"
#include "string_segment.h"
#include "numrange_impl.h"
#include "unicode/locid.h"
#include "unicode/numberformatter.h"
@ -71,6 +71,7 @@ class NumberFormatterApiTest : public IntlTestWithFieldPosition {
void decimal();
void scale();
void locale();
void skeletonUserGuideExamples();
void formatTypes();
void fieldPositionLogic();
void fieldPositionCoverage();
@ -173,7 +174,7 @@ class ModifiersTest : public IntlTest {
UnicodeString expectedChars, UnicodeString expectedFields,
UErrorCode &status);
void assertModifierEquals(const Modifier &mod, NumberStringBuilder &sb, int32_t expectedPrefixLength,
void assertModifierEquals(const Modifier &mod, FormattedStringBuilder &sb, int32_t expectedPrefixLength,
bool expectedStrong, UnicodeString expectedChars,
UnicodeString expectedFields, UErrorCode &status);
};
@ -203,33 +204,6 @@ class PatternStringTest : public IntlTest {
private:
};
class NumberStringBuilderTest : public IntlTest {
public:
void testInsertAppendUnicodeString();
void testSplice();
void testInsertAppendCodePoint();
void testCopy();
void testFields();
void testUnlimitedCapacity();
void testCodePoints();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
private:
void assertEqualsImpl(const UnicodeString &a, const NumberStringBuilder &b);
};
class StringSegmentTest : public IntlTest {
public:
void testOffset();
void testLength();
void testCharAt();
void testGetCodePoint();
void testCommonPrefixLength();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
};
class NumberParserTest : public IntlTest {
public:
void testBasic();
@ -339,12 +313,10 @@ class NumberTest : public IntlTest {
TESTCLASS(3, ModifiersTest);
TESTCLASS(4, PatternModifierTest);
TESTCLASS(5, PatternStringTest);
TESTCLASS(6, NumberStringBuilderTest);
TESTCLASS(7, DoubleConversionTest);
TESTCLASS(8, StringSegmentTest);
TESTCLASS(9, NumberParserTest);
TESTCLASS(10, NumberSkeletonTest);
TESTCLASS(11, NumberRangeFormatterTest);
TESTCLASS(6, DoubleConversionTest);
TESTCLASS(7, NumberParserTest);
TESTCLASS(8, NumberSkeletonTest);
TESTCLASS(9, NumberRangeFormatterTest);
default: name = ""; break; // needed to end loop
}
}

View file

@ -217,7 +217,7 @@ void AffixUtilsTest::testUnescapeWithSymbolProvider() {
NumericSymbolProvider provider;
UErrorCode status = U_ZERO_ERROR;
NumberStringBuilder sb;
FormattedStringBuilder sb;
for (auto& cas : cases) {
UnicodeString input(cas[0]);
UnicodeString expected(cas[1]);
@ -239,7 +239,7 @@ void AffixUtilsTest::testUnescapeWithSymbolProvider() {
UnicodeString AffixUtilsTest::unescapeWithDefaults(const SymbolProvider &defaultProvider,
UnicodeString input, UErrorCode &status) {
NumberStringBuilder nsb;
FormattedStringBuilder nsb;
int32_t length = AffixUtils::unescape(input, nsb, 0, defaultProvider, UNUM_FIELD_COUNT, status);
assertEquals("Return value of unescape", nsb.length(), length);
return nsb.toUnicodeString();

View file

@ -89,6 +89,7 @@ void NumberFormatterApiTest::runIndexedTest(int32_t index, UBool exec, const cha
TESTCASE_AUTO(decimal);
TESTCASE_AUTO(scale);
TESTCASE_AUTO(locale);
TESTCASE_AUTO(skeletonUserGuideExamples);
TESTCASE_AUTO(formatTypes);
TESTCASE_AUTO(fieldPositionLogic);
TESTCASE_AUTO(fieldPositionCoverage);
@ -2232,6 +2233,47 @@ void NumberFormatterApiTest::locale() {
assertEquals("Locale withLocale()", u"1\u202f234", actual);
}
void NumberFormatterApiTest::skeletonUserGuideExamples() {
IcuTestErrorCode status(*this, "skeletonUserGuideExamples");
// Test the skeleton examples in userguide/format_parse/numbers/skeletons.md
struct TestCase {
const char16_t* skeleton;
double input;
const char16_t* expected;
} cases[] = {
{u"percent", 25, u"25%"},
{u".00", 25, u"25.00"},
{u"percent .00", 25, u"25.00%"},
{u"scale/100", 0.3, u"30"},
{u"percent scale/100", 0.3, u"30%"},
{u"measure-unit/length-meter", 5, u"5 m"},
{u"measure-unit/length-meter unit-width-full-name", 5, u"5 meters"},
{u"currency/CAD", 10, u"CA$10.00"},
{u"currency/CAD unit-width-narrow", 10, u"$10.00"},
{u"compact-short", 5000, u"5K"},
{u"compact-long", 5000, u"5 thousand"},
{u"compact-short currency/CAD", 5000, u"CA$5K"},
{u"", 5000, u"5,000"},
{u"group-min2", 5000, u"5000"},
{u"group-min2", 15000, u"15,000"},
{u"sign-always", 60, u"+60"},
{u"sign-always", 0, u"+0"},
{u"sign-except-zero", 60, u"+60"},
{u"sign-except-zero", 0, u"0"},
{u"sign-accounting currency/CAD", -40, u"(CA$40.00)"}
};
for (const auto& cas : cases) {
status.setScope(cas.skeleton);
FormattedNumber actual = NumberFormatter::forSkeleton(cas.skeleton, status)
.locale("en-US")
.formatDouble(cas.input, status);
assertEquals(cas.skeleton, cas.expected, actual.toTempString(status));
status.errIfFailureAndReset();
}
}
void NumberFormatterApiTest::formatTypes() {
UErrorCode status = U_ZERO_ERROR;
LocalizedNumberFormatter formatter = NumberFormatter::withLocale(Locale::getEnglish());

View file

@ -7,7 +7,7 @@
#include "putilimp.h"
#include "intltest.h"
#include "number_stringbuilder.h"
#include "formatted_string_builder.h"
#include "number_modifiers.h"
#include "numbertest.h"
@ -36,8 +36,8 @@ void ModifiersTest::testConstantAffixModifier() {
void ModifiersTest::testConstantMultiFieldModifier() {
UErrorCode status = U_ZERO_ERROR;
NumberStringBuilder prefix;
NumberStringBuilder suffix;
FormattedStringBuilder prefix;
FormattedStringBuilder suffix;
ConstantMultiFieldModifier mod1(prefix, suffix, false, true);
assertModifierEquals(mod1, 0, true, u"|", u"n", status);
assertSuccess("Spot 1", status);
@ -87,7 +87,7 @@ void ModifiersTest::testSimpleModifier() {
// Test strange insertion positions
for (int32_t j = 0; j < NUM_OUTPUTS; j++) {
NumberStringBuilder output;
FormattedStringBuilder output;
output.append(outputs[j].baseString, UNUM_FIELD_COUNT, status);
mod.apply(output, outputs[j].leftIndex, outputs[j].rightIndex, status);
UnicodeString expected = expecteds[j][i];
@ -105,8 +105,8 @@ void ModifiersTest::testCurrencySpacingEnabledModifier() {
return;
}
NumberStringBuilder prefix;
NumberStringBuilder suffix;
FormattedStringBuilder prefix;
FormattedStringBuilder suffix;
CurrencySpacingEnabledModifier mod1(prefix, suffix, false, true, symbols, status);
assertSuccess("Spot 2", status);
assertModifierEquals(mod1, 0, true, u"|", u"n", status);
@ -120,15 +120,15 @@ void ModifiersTest::testCurrencySpacingEnabledModifier() {
assertSuccess("Spot 6", status);
// Test the default currency spacing rules
NumberStringBuilder sb;
FormattedStringBuilder sb;
sb.append("123", UNUM_INTEGER_FIELD, status);
assertSuccess("Spot 7", status);
NumberStringBuilder sb1(sb);
FormattedStringBuilder sb1(sb);
assertModifierEquals(mod2, sb1, 3, true, u"USD\u00A0123", u"$$$niii", status);
assertSuccess("Spot 8", status);
// Compare with the unsafe code path
NumberStringBuilder sb2(sb);
FormattedStringBuilder sb2(sb);
sb2.insert(0, "USD", UNUM_CURRENCY_FIELD, status);
assertSuccess("Spot 9", status);
CurrencySpacingEnabledModifier::applyCurrencySpacing(sb2, 0, 3, 6, 0, symbols, status);
@ -149,14 +149,14 @@ void ModifiersTest::testCurrencySpacingEnabledModifier() {
void ModifiersTest::assertModifierEquals(const Modifier &mod, int32_t expectedPrefixLength,
bool expectedStrong, UnicodeString expectedChars,
UnicodeString expectedFields, UErrorCode &status) {
NumberStringBuilder sb;
FormattedStringBuilder sb;
sb.appendCodePoint('|', UNUM_FIELD_COUNT, status);
assertModifierEquals(
mod, sb, expectedPrefixLength, expectedStrong, expectedChars, expectedFields, status);
}
void ModifiersTest::assertModifierEquals(const Modifier &mod, NumberStringBuilder &sb,
void ModifiersTest::assertModifierEquals(const Modifier &mod, FormattedStringBuilder &sb,
int32_t expectedPrefixLength, bool expectedStrong,
UnicodeString expectedChars, UnicodeString expectedFields,
UErrorCode &status) {
@ -171,7 +171,7 @@ void ModifiersTest::assertModifierEquals(const Modifier &mod, NumberStringBuilde
}
UnicodeString debugString;
debugString.append(u"<NumberStringBuilder [");
debugString.append(u"<FormattedStringBuilder [");
debugString.append(expectedChars);
debugString.append(u"] [");
debugString.append(expectedFields);

View file

@ -99,7 +99,7 @@ void PatternModifierTest::testPatternWithNoPlaceholder() {
mod.setNumberProperties(1, StandardPlural::Form::COUNT);
// Unsafe Code Path
NumberStringBuilder nsb;
FormattedStringBuilder nsb;
nsb.append(u"x123y", UNUM_FIELD_COUNT, status);
assertSuccess("Spot 3", status);
mod.apply(nsb, 1, 4, status);
@ -141,21 +141,21 @@ void PatternModifierTest::testMutableEqualsImmutable() {
DecimalQuantity fq;
fq.setToInt(1);
NumberStringBuilder nsb1;
FormattedStringBuilder nsb1;
MicroProps micros1;
mod.addToChain(&micros1);
mod.processQuantity(fq, micros1, status);
micros1.modMiddle->apply(nsb1, 0, 0, status);
assertSuccess("Spot 3", status);
NumberStringBuilder nsb2;
FormattedStringBuilder nsb2;
MicroProps micros2;
LocalPointer<ImmutablePatternModifier> immutable(mod.createImmutable(status));
immutable->applyToMicros(micros2, fq, status);
micros2.modMiddle->apply(nsb2, 0, 0, status);
assertSuccess("Spot 4", status);
NumberStringBuilder nsb3;
FormattedStringBuilder nsb3;
MicroProps micros3;
mod.addToChain(&micros3);
mod.setPatternAttributes(UNUM_SIGN_ALWAYS, false);
@ -168,14 +168,14 @@ void PatternModifierTest::testMutableEqualsImmutable() {
}
UnicodeString PatternModifierTest::getPrefix(const MutablePatternModifier &mod, UErrorCode &status) {
NumberStringBuilder nsb;
FormattedStringBuilder nsb;
mod.apply(nsb, 0, 0, status);
int32_t prefixLength = mod.getPrefixLength();
return UnicodeString(nsb.toUnicodeString(), 0, prefixLength);
}
UnicodeString PatternModifierTest::getSuffix(const MutablePatternModifier &mod, UErrorCode &status) {
NumberStringBuilder nsb;
FormattedStringBuilder nsb;
mod.apply(nsb, 0, 0, status);
int32_t prefixLength = mod.getPrefixLength();
return UnicodeString(nsb.toUnicodeString(), prefixLength, nsb.length() - prefixLength);

View file

@ -149,6 +149,8 @@ void NumberSkeletonTest::invalidTokens() {
u"integer-width/xxx",
u"integer-width/0+",
u"integer-width/+0#",
u"integer-width/+#",
u"integer-width/+#0",
u"scientific/foo"};
expectedErrorSkeleton(cases, UPRV_LENGTHOF(cases));

View file

@ -5,8 +5,19 @@
#if !UCONFIG_NO_FORMATTING
#include "numbertest.h"
#include "numparse_stringsegment.h"
#include "string_segment.h"
#include "intltest.h"
class StringSegmentTest : public IntlTest {
public:
void testOffset();
void testLength();
void testCharAt();
void testGetCodePoint();
void testCommonPrefixLength();
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
};
static const char16_t* SAMPLE_STRING = u"📻 radio 📻";
@ -101,4 +112,9 @@ void StringSegmentTest::testCommonPrefixLength() {
assertEquals("", 0, segment.getCommonPrefixLength(u"foo"));
}
extern IntlTest *createStringSegmentTest() {
return new StringSegmentTest();
}
#endif

View file

@ -14,9 +14,9 @@ top_builddir = ../..
## All the flags and other definitions are included here.
include $(top_builddir)/icudefs.mk
MIDDLE_SO_TARGET=
OUTPUTFILE=pkgdata.inc
MIDDLE_SO_TARGET=
PKGDATA_TRAILING_SPACE=" "
all : clean
@echo GENCCODE_ASSEMBLY_TYPE=$(GENCCODE_ASSEMBLY) >> $(OUTPUTFILE)

View file

@ -205,10 +205,10 @@ main(int argc,
"\t-c or --copyright include copyright notice\n");
fprintf(stderr,
"\t-e or --encoding encoding of source files\n"
"\t-d of --destdir destination directory, followed by the path, defaults to %s\n"
"\t-s or --sourcedir source directory for files followed by path, defaults to %s\n"
"\t-d or --destdir destination directory, followed by the path, defaults to '%s'\n"
"\t-s or --sourcedir source directory for files followed by path, defaults to '%s'\n"
"\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
"\t followed by path, defaults to %s\n",
"\t followed by path, defaults to '%s'\n",
u_getDataDirectory(), u_getDataDirectory(), u_getDataDirectory());
fprintf(stderr,
"\t-j or --write-java write a Java ListResourceBundle for ICU4J, followed by optional encoding\n"

View file

@ -274,11 +274,11 @@ expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenV
}
}
static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment,
int32_t &stringLength, UErrorCode *status)
{
struct UString *tokenValue;
char *result;
uint32_t count;
expect(state, TOK_STRING, &tokenValue, comment, line, status);
@ -287,14 +287,13 @@ static char *getInvariantString(ParseState* state, uint32_t *line, struct UStrin
return NULL;
}
count = u_strlen(tokenValue->fChars);
if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
if(!uprv_isInvariantUString(tokenValue->fChars, tokenValue->fLength)) {
*status = U_INVALID_FORMAT_ERROR;
error(*line, "invariant characters required for table keys, binary data, etc.");
return NULL;
}
result = static_cast<char *>(uprv_malloc(count+1));
result = static_cast<char *>(uprv_malloc(tokenValue->fLength+1));
if (result == NULL)
{
@ -302,7 +301,8 @@ static char *getInvariantString(ParseState* state, uint32_t *line, struct UStrin
return NULL;
}
u_UCharsToChars(tokenValue->fChars, result, count+1);
u_UCharsToChars(tokenValue->fChars, result, tokenValue->fLength+1);
stringLength = tokenValue->fLength;
return result;
}
@ -1371,7 +1371,6 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US
int32_t value;
UBool readToken = FALSE;
char *stopstring;
uint32_t len;
struct UString memberComments;
IntVectorResource *result = intvector_open(state->bundle, tag, comment, status);
@ -1404,7 +1403,8 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US
return result;
}
string = getInvariantString(state, NULL, NULL, status);
int32_t stringLength;
string = getInvariantString(state, NULL, NULL, stringLength, status);
if (U_FAILURE(*status))
{
@ -1414,9 +1414,9 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US
/* For handling illegal char in the Intvector */
value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
len=(uint32_t)(stopstring-string);
int32_t len = (int32_t)(stopstring-string);
if(len==uprv_strlen(string))
if(len==stringLength)
{
result->add(value, *status);
uprv_free(string);
@ -1454,7 +1454,8 @@ static struct SResource *
parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
{
uint32_t line;
LocalMemory<char> string(getInvariantString(state, &line, NULL, status));
int32_t stringLength;
LocalMemory<char> string(getInvariantString(state, &line, NULL, stringLength, status));
if (string.isNull() || U_FAILURE(*status))
{
return NULL;
@ -1470,46 +1471,45 @@ parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UStri
printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
uint32_t count = (uint32_t)uprv_strlen(string.getAlias());
if (count > 0){
if((count % 2)==0){
LocalMemory<uint8_t> value;
if (value.allocateInsteadAndCopy(count) == NULL)
{
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
LocalMemory<uint8_t> value;
int32_t count = 0;
if (stringLength > 0 && value.allocateInsteadAndCopy(stringLength) == NULL)
{
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
char toConv[3] = {'\0', '\0', '\0'};
for (uint32_t i = 0; i < count; i += 2)
{
toConv[0] = string[i];
toConv[1] = string[i + 1];
char *stopstring;
value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
uint32_t len=(uint32_t)(stopstring-toConv);
if(len!=2)
{
*status=U_INVALID_CHAR_FOUND;
return NULL;
}
}
return bin_open(state->bundle, tag, count >> 1, value.getAlias(), NULL, comment, status);
char toConv[3] = {'\0', '\0', '\0'};
for (int32_t i = 0; i < stringLength;)
{
// Skip spaces (which may have been line endings).
char c0 = string[i++];
if (c0 == ' ') { continue; }
if (i == stringLength) {
*status=U_INVALID_CHAR_FOUND;
error(line, "Encountered invalid binary value (odd number of hex digits)");
return NULL;
}
else
toConv[0] = c0;
toConv[1] = string[i++];
char *stopstring;
value[count++] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
uint32_t len=(uint32_t)(stopstring-toConv);
if(len!=2)
{
*status = U_INVALID_CHAR_FOUND;
error(line, "Encountered invalid binary value (length is odd)");
*status=U_INVALID_CHAR_FOUND;
error(line, "Encountered invalid binary value (not all pairs of hex digits)");
return NULL;
}
}
else
{
if (count == 0) {
warning(startline, "Encountered empty binary value");
return bin_open(state->bundle, tag, 0, NULL, "", comment, status);
} else {
return bin_open(state->bundle, tag, count, value.getAlias(), NULL, comment, status);
}
}
@ -1520,9 +1520,9 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr
int32_t value;
char *string;
char *stopstring;
uint32_t len;
string = getInvariantString(state, NULL, NULL, status);
int32_t stringLength;
string = getInvariantString(state, NULL, NULL, stringLength, status);
if (string == NULL || U_FAILURE(*status))
{
@ -1541,7 +1541,7 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr
printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
if (uprv_strlen(string) <= 0)
if (stringLength == 0)
{
warning(startline, "Encountered empty integer. Default value is 0.");
}
@ -1549,8 +1549,8 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr
/* Allow integer support for hexdecimal, octal digit and decimal*/
/* and handle illegal char in the integer*/
value = uprv_strtoul(string, &stopstring, 0);
len=(uint32_t)(stopstring-string);
if(len==uprv_strlen(string))
int32_t len = (int32_t)(stopstring-string);
if(len==stringLength)
{
result = int_open(state->bundle, tag, value, comment, status);
}
@ -1567,7 +1567,8 @@ static struct SResource *
parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
{
uint32_t line;
LocalMemory<char> filename(getInvariantString(state, &line, NULL, status));
int32_t stringLength;
LocalMemory<char> filename(getInvariantString(state, &line, NULL, stringLength, status));
if (U_FAILURE(*status))
{
return NULL;
@ -1628,12 +1629,11 @@ parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UStr
UCHARBUF *ucbuf;
char *fullname = NULL;
int32_t count = 0;
const char* cp = NULL;
const UChar* uBuffer = NULL;
filename = getInvariantString(state, &line, NULL, status);
count = (int32_t)uprv_strlen(filename);
int32_t stringLength;
filename = getInvariantString(state, &line, NULL, stringLength, status);
if (U_FAILURE(*status))
{
@ -1652,7 +1652,7 @@ parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UStr
printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
}
fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
fullname = (char *) uprv_malloc(state->inputdirLength + stringLength + 2);
/* test for NULL */
if(fullname == NULL)
{

View file

@ -504,7 +504,6 @@ main(int argc, char* argv[]) {
if (o.files != NULL) {
pkg_deleteList(o.files);
}
return result;
}
@ -544,6 +543,7 @@ normal_command_mode:
int result = system(cmd);
if (result != 0) {
fprintf(stderr, "-- return status = %d\n", result);
result = 1; // system() result code is platform specific.
}
if (cmd != cmdBuffer && cmd != command) {

View file

@ -368,6 +368,7 @@
<pathelement location="${icu4j.regiondata.jar}"/>
<pathelement location="${icu4j.translit.jar}"/>
<pathelement location="${icu4j.test-framework.jar}"/>
<pathelement location="${icu4j.tools.jar}"/>
<pathelement location="${icu4j.core-tests.jar}"/>
<pathelement location="${icu4j.collate-tests.jar}"/>
<pathelement location="${icu4j.charset-tests.jar}"/>
@ -570,7 +571,7 @@
</icu-junit>
</target>
<target name="packagingCheck" depends="info, core, packaging-tests" description="Run packaging tests">
<target name="packagingCheck" depends="info, core, langdata, regiondata, packaging-tests" description="Run packaging tests">
<antcall target="_packagingCheckNoLangData"/>
<antcall target="_packagingCheckNoRegionData"/>
<antcall target="_packagingCheckNoLangNorRegionData"/>
@ -1201,7 +1202,7 @@
</ant>
</target>
<target name="core-tests" depends="core, test-framework" description="Build core tests">
<target name="core-tests" depends="core, test-framework, tools" description="Build core tests">
<ant dir="${icu4j.core-tests.dir}" inheritAll="false">
<reference refid="junit.jars"/>
</ant>
@ -1249,7 +1250,7 @@
<ant dir="${icu4j.build-tools.dir}" inheritAll="false"/>
</target>
<target name="tools" depends="core, core-tests, collate, translit, translit-tests" description="Build tool classes">
<target name="tools" depends="core, collate, translit" description="Build tool classes">
<ant dir="${icu4j.tools.dir}" inheritAll="false"/>
</target>

View file

@ -28,7 +28,7 @@ class CharsetUTF7 extends CharsetICU {
public CharsetUTF7(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
super(icuCanonicalName, javaCanonicalName, aliases);
maxBytesPerChar=4; /* max 3 bytes per code unit from UTF-7 (base64) */
maxBytesPerChar=5; /* max 3 bytes per code unit from UTF-7 (base64) plus SIN SOUT */
minBytesPerChar=1;
maxCharsPerByte=1;

View file

@ -1,22 +1,17 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number;
package com.ibm.icu.impl;
import java.text.AttributedCharacterIterator;
import java.text.AttributedString;
import java.text.FieldPosition;
import java.text.Format.Field;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.text.ConstrainedFieldPosition;
// NumberFormat is imported only for the toDebugString() implementation.
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.text.UnicodeSet;
/**
* A StringBuilder optimized for number formatting. It implements the following key features beyond a
* A StringBuilder optimized for formatting. It implements the following key features beyond a
* normal JDK StringBuilder:
*
* <ol>
@ -24,33 +19,37 @@ import com.ibm.icu.text.UnicodeSet;
* <li>Keeps tracks of Fields in an efficient manner.
* <li>String operations are fast-pathed to code point operations when possible.
* </ol>
*
* See also FormattedValueStringBuilderImpl.
*
* @author sffc (Shane Carr)
*/
public class NumberStringBuilder implements CharSequence {
public class FormattedStringBuilder implements CharSequence {
/** A constant, empty NumberStringBuilder. Do NOT call mutative operations on this. */
public static final NumberStringBuilder EMPTY = new NumberStringBuilder();
/** A constant, empty FormattedStringBuilder. Do NOT call mutative operations on this. */
public static final FormattedStringBuilder EMPTY = new FormattedStringBuilder();
private char[] chars;
private Field[] fields;
private int zero;
private int length;
char[] chars;
Field[] fields;
int zero;
int length;
public NumberStringBuilder() {
public FormattedStringBuilder() {
this(40);
}
public NumberStringBuilder(int capacity) {
public FormattedStringBuilder(int capacity) {
chars = new char[capacity];
fields = new Field[capacity];
zero = capacity / 2;
length = 0;
}
public NumberStringBuilder(NumberStringBuilder source) {
public FormattedStringBuilder(FormattedStringBuilder source) {
copyFrom(source);
}
public void copyFrom(NumberStringBuilder source) {
public void copyFrom(FormattedStringBuilder source) {
chars = Arrays.copyOf(source.chars, source.chars.length);
fields = Arrays.copyOf(source.fields, source.fields.length);
zero = source.zero;
@ -101,7 +100,7 @@ public class NumberStringBuilder implements CharSequence {
return Character.codePointBefore(chars, zero + index, zero);
}
public NumberStringBuilder clear() {
public FormattedStringBuilder clear() {
zero = getCapacity() / 2;
length = 0;
return this;
@ -237,20 +236,20 @@ public class NumberStringBuilder implements CharSequence {
}
/**
* Appends the contents of another {@link NumberStringBuilder} to the end of this instance.
* Appends the contents of another {@link FormattedStringBuilder} to the end of this instance.
*
* @return The number of chars added, which is the length of the other {@link NumberStringBuilder}.
* @return The number of chars added, which is the length of the other {@link FormattedStringBuilder}.
*/
public int append(NumberStringBuilder other) {
public int append(FormattedStringBuilder other) {
return insert(length, other);
}
/**
* Inserts the contents of another {@link NumberStringBuilder} into this instance at the given index.
* Inserts the contents of another {@link FormattedStringBuilder} into this instance at the given index.
*
* @return The number of chars added, which is the length of the other {@link NumberStringBuilder}.
* @return The number of chars added, which is the length of the other {@link FormattedStringBuilder}.
*/
public int insert(int index, NumberStringBuilder other) {
public int insert(int index, FormattedStringBuilder other) {
if (this == other) {
throw new IllegalArgumentException("Cannot call insert/append on myself");
}
@ -365,14 +364,14 @@ public class NumberStringBuilder implements CharSequence {
return chars.length;
}
/** Note: this returns a NumberStringBuilder. Do not return publicly. */
/** Note: this returns a FormattedStringBuilder. Do not return publicly. */
@Override
@Deprecated
public CharSequence subSequence(int start, int end) {
assert start >= 0;
assert end <= length;
assert end >= start;
NumberStringBuilder other = new NumberStringBuilder(this);
FormattedStringBuilder other = new FormattedStringBuilder(this);
other.zero = zero + start;
other.length = end - start;
return other;
@ -420,20 +419,22 @@ public class NumberStringBuilder implements CharSequence {
*
* <p>
* For example, if the string is "-12.345", the debug string will be something like
* "&lt;NumberStringBuilder [-123.45] [-iii.ff]&gt;"
* "&lt;FormattedStringBuilder [-123.45] [-iii.ff]&gt;"
*
* @return A string for debugging purposes.
*/
public String toDebugString() {
StringBuilder sb = new StringBuilder();
sb.append("<NumberStringBuilder [");
sb.append("<FormattedStringBuilder [");
sb.append(this.toString());
sb.append("] [");
for (int i = zero; i < zero + length; i++) {
if (fields[i] == null) {
sb.append('n');
} else {
} else if (fieldToDebugChar.containsKey(fields[i])) {
sb.append(fieldToDebugChar.get(fields[i]));
} else {
sb.append('?');
}
}
sb.append("]>");
@ -475,7 +476,7 @@ public class NumberStringBuilder implements CharSequence {
* The instance to compare.
* @return Whether the contents of this instance is currently equal to the given instance.
*/
public boolean contentEquals(NumberStringBuilder other) {
public boolean contentEquals(FormattedStringBuilder other) {
if (length != other.length)
return false;
for (int i = 0; i < length; i++) {
@ -495,170 +496,4 @@ public class NumberStringBuilder implements CharSequence {
public boolean equals(Object other) {
throw new UnsupportedOperationException("Don't call #hashCode() or #equals() on a mutable.");
}
public boolean nextFieldPosition(FieldPosition fp) {
java.text.Format.Field rawField = fp.getFieldAttribute();
if (rawField == null) {
// Backwards compatibility: read from fp.getField()
if (fp.getField() == NumberFormat.INTEGER_FIELD) {
rawField = NumberFormat.Field.INTEGER;
} else if (fp.getField() == NumberFormat.FRACTION_FIELD) {
rawField = NumberFormat.Field.FRACTION;
} else {
// No field is set
return false;
}
}
if (!(rawField instanceof NumberFormat.Field)) {
throw new IllegalArgumentException(
"You must pass an instance of com.ibm.icu.text.NumberFormat.Field as your FieldPosition attribute. You passed: "
+ rawField.getClass().toString());
}
ConstrainedFieldPosition cfpos = new ConstrainedFieldPosition();
cfpos.constrainField(rawField);
cfpos.setState(rawField, null, fp.getBeginIndex(), fp.getEndIndex());
if (nextPosition(cfpos, null)) {
fp.setBeginIndex(cfpos.getStart());
fp.setEndIndex(cfpos.getLimit());
return true;
}
// Special case: fraction should start after integer if fraction is not present
if (rawField == NumberFormat.Field.FRACTION && fp.getEndIndex() == 0) {
boolean inside = false;
int i = zero;
for (; i < zero + length; i++) {
if (isIntOrGroup(fields[i]) || fields[i] == NumberFormat.Field.DECIMAL_SEPARATOR) {
inside = true;
} else if (inside) {
break;
}
}
fp.setBeginIndex(i - zero);
fp.setEndIndex(i - zero);
}
return false;
}
public AttributedCharacterIterator toCharacterIterator(Field numericField) {
ConstrainedFieldPosition cfpos = new ConstrainedFieldPosition();
AttributedString as = new AttributedString(toString());
while (this.nextPosition(cfpos, numericField)) {
// Backwards compatibility: field value = field
as.addAttribute(cfpos.getField(), cfpos.getField(), cfpos.getStart(), cfpos.getLimit());
}
return as.getIterator();
}
static class NullField extends Field {
private static final long serialVersionUID = 1L;
static final NullField END = new NullField("end");
private NullField(String name) {
super(name);
}
}
/**
* Implementation of nextPosition consistent with the contract of FormattedValue.
*
* @param cfpos
* The argument passed to the public API.
* @param numericField
* Optional. If non-null, apply this field to the entire numeric portion of the string.
* @return See FormattedValue#nextPosition.
*/
public boolean nextPosition(ConstrainedFieldPosition cfpos, Field numericField) {
int fieldStart = -1;
Field currField = null;
for (int i = zero + cfpos.getLimit(); i <= zero + length; i++) {
Field _field = (i < zero + length) ? fields[i] : NullField.END;
// Case 1: currently scanning a field.
if (currField != null) {
if (currField != _field) {
int end = i - zero;
// Grouping separators can be whitespace; don't throw them out!
if (currField != NumberFormat.Field.GROUPING_SEPARATOR) {
end = trimBack(end);
}
if (end <= fieldStart) {
// Entire field position is ignorable; skip.
fieldStart = -1;
currField = null;
i--; // look at this index again
continue;
}
int start = fieldStart;
if (currField != NumberFormat.Field.GROUPING_SEPARATOR) {
start = trimFront(start);
}
cfpos.setState(currField, null, start, end);
return true;
}
continue;
}
// Special case: coalesce the INTEGER if we are pointing at the end of the INTEGER.
if (cfpos.matchesField(NumberFormat.Field.INTEGER, null)
&& i > zero
// don't return the same field twice in a row:
&& i - zero > cfpos.getLimit()
&& isIntOrGroup(fields[i - 1])
&& !isIntOrGroup(_field)) {
int j = i - 1;
for (; j >= zero && isIntOrGroup(fields[j]); j--) {}
cfpos.setState(NumberFormat.Field.INTEGER, null, j - zero + 1, i - zero);
return true;
}
// Special case: coalesce NUMERIC if we are pointing at the end of the NUMERIC.
if (numericField != null
&& cfpos.matchesField(numericField, null)
&& i > zero
// don't return the same field twice in a row:
&& (i - zero > cfpos.getLimit() || cfpos.getField() != numericField)
&& isNumericField(fields[i - 1])
&& !isNumericField(_field)) {
int j = i - 1;
for (; j >= zero && isNumericField(fields[j]); j--) {}
cfpos.setState(numericField, null, j - zero + 1, i - zero);
return true;
}
// Special case: skip over INTEGER; will be coalesced later.
if (_field == NumberFormat.Field.INTEGER) {
_field = null;
}
// Case 2: no field starting at this position.
if (_field == null || _field == NullField.END) {
continue;
}
// Case 3: check for field starting at this position
if (cfpos.matchesField(_field, null)) {
fieldStart = i - zero;
currField = _field;
}
}
assert currField == null;
return false;
}
private static boolean isIntOrGroup(Field field) {
return field == NumberFormat.Field.INTEGER || field == NumberFormat.Field.GROUPING_SEPARATOR;
}
private static boolean isNumericField(Field field) {
return field == null || NumberFormat.Field.class.isAssignableFrom(field.getClass());
}
private int trimBack(int limit) {
return StaticUnicodeSets.get(StaticUnicodeSets.Key.DEFAULT_IGNORABLES)
.spanBack(this, limit, UnicodeSet.SpanCondition.CONTAINED);
}
private int trimFront(int start) {
return StaticUnicodeSets.get(StaticUnicodeSets.Key.DEFAULT_IGNORABLES)
.span(this, start, UnicodeSet.SpanCondition.CONTAINED);
}
}

View file

@ -0,0 +1,193 @@
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl;
import java.text.AttributedCharacterIterator;
import java.text.AttributedString;
import java.text.FieldPosition;
import java.text.Format.Field;
import com.ibm.icu.text.ConstrainedFieldPosition;
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.text.UnicodeSet;
/**
* Implementation of FormattedValue based on FormattedStringBuilder.
*
* The implementation currently revolves around numbers and number fields.
* However, it can be generalized in the future when there is a need.
*
* In C++, this implements FormattedValue. In Java, it is a stateless
* collection of static functions to avoid having to use nested objects.
*
* @author sffc (Shane Carr)
*/
public class FormattedValueStringBuilderImpl {
public static boolean nextFieldPosition(FormattedStringBuilder self, FieldPosition fp) {
java.text.Format.Field rawField = fp.getFieldAttribute();
if (rawField == null) {
// Backwards compatibility: read from fp.getField()
if (fp.getField() == NumberFormat.INTEGER_FIELD) {
rawField = NumberFormat.Field.INTEGER;
} else if (fp.getField() == NumberFormat.FRACTION_FIELD) {
rawField = NumberFormat.Field.FRACTION;
} else {
// No field is set
return false;
}
}
if (!(rawField instanceof NumberFormat.Field)) {
throw new IllegalArgumentException(
"You must pass an instance of com.ibm.icu.text.NumberFormat.Field as your FieldPosition attribute. You passed: "
+ rawField.getClass().toString());
}
ConstrainedFieldPosition cfpos = new ConstrainedFieldPosition();
cfpos.constrainField(rawField);
cfpos.setState(rawField, null, fp.getBeginIndex(), fp.getEndIndex());
if (nextPosition(self, cfpos, null)) {
fp.setBeginIndex(cfpos.getStart());
fp.setEndIndex(cfpos.getLimit());
return true;
}
// Special case: fraction should start after integer if fraction is not present
if (rawField == NumberFormat.Field.FRACTION && fp.getEndIndex() == 0) {
boolean inside = false;
int i = self.zero;
for (; i < self.zero + self.length; i++) {
if (isIntOrGroup(self.fields[i]) || self.fields[i] == NumberFormat.Field.DECIMAL_SEPARATOR) {
inside = true;
} else if (inside) {
break;
}
}
fp.setBeginIndex(i - self.zero);
fp.setEndIndex(i - self.zero);
}
return false;
}
public static AttributedCharacterIterator toCharacterIterator(FormattedStringBuilder self, Field numericField) {
ConstrainedFieldPosition cfpos = new ConstrainedFieldPosition();
AttributedString as = new AttributedString(self.toString());
while (nextPosition(self, cfpos, numericField)) {
// Backwards compatibility: field value = field
as.addAttribute(cfpos.getField(), cfpos.getField(), cfpos.getStart(), cfpos.getLimit());
}
return as.getIterator();
}
static class NullField extends Field {
private static final long serialVersionUID = 1L;
static final NullField END = new NullField("end");
private NullField(String name) {
super(name);
}
}
/**
* Implementation of nextPosition consistent with the contract of FormattedValue.
*
* @param cfpos
* The argument passed to the public API.
* @param numericField
* Optional. If non-null, apply this field to the entire numeric portion of the string.
* @return See FormattedValue#nextPosition.
*/
public static boolean nextPosition(FormattedStringBuilder self, ConstrainedFieldPosition cfpos, Field numericField) {
int fieldStart = -1;
Field currField = null;
for (int i = self.zero + cfpos.getLimit(); i <= self.zero + self.length; i++) {
Field _field = (i < self.zero + self.length) ? self.fields[i] : NullField.END;
// Case 1: currently scanning a field.
if (currField != null) {
if (currField != _field) {
int end = i - self.zero;
// Grouping separators can be whitespace; don't throw them out!
if (currField != NumberFormat.Field.GROUPING_SEPARATOR) {
end = trimBack(self, end);
}
if (end <= fieldStart) {
// Entire field position is ignorable; skip.
fieldStart = -1;
currField = null;
i--; // look at this index again
continue;
}
int start = fieldStart;
if (currField != NumberFormat.Field.GROUPING_SEPARATOR) {
start = trimFront(self, start);
}
cfpos.setState(currField, null, start, end);
return true;
}
continue;
}
// Special case: coalesce the INTEGER if we are pointing at the end of the INTEGER.
if (cfpos.matchesField(NumberFormat.Field.INTEGER, null)
&& i > self.zero
// don't return the same field twice in a row:
&& i - self.zero > cfpos.getLimit()
&& isIntOrGroup(self.fields[i - 1])
&& !isIntOrGroup(_field)) {
int j = i - 1;
for (; j >= self.zero && isIntOrGroup(self.fields[j]); j--) {}
cfpos.setState(NumberFormat.Field.INTEGER, null, j - self.zero + 1, i - self.zero);
return true;
}
// Special case: coalesce NUMERIC if we are pointing at the end of the NUMERIC.
if (numericField != null
&& cfpos.matchesField(numericField, null)
&& i > self.zero
// don't return the same field twice in a row:
&& (i - self.zero > cfpos.getLimit() || cfpos.getField() != numericField)
&& isNumericField(self.fields[i - 1])
&& !isNumericField(_field)) {
int j = i - 1;
for (; j >= self.zero && isNumericField(self.fields[j]); j--) {}
cfpos.setState(numericField, null, j - self.zero + 1, i - self.zero);
return true;
}
// Special case: skip over INTEGER; will be coalesced later.
if (_field == NumberFormat.Field.INTEGER) {
_field = null;
}
// Case 2: no field starting at this position.
if (_field == null || _field == NullField.END) {
continue;
}
// Case 3: check for field starting at this position
if (cfpos.matchesField(_field, null)) {
fieldStart = i - self.zero;
currField = _field;
}
}
assert currField == null;
return false;
}
private static boolean isIntOrGroup(Field field) {
return field == NumberFormat.Field.INTEGER || field == NumberFormat.Field.GROUPING_SEPARATOR;
}
private static boolean isNumericField(Field field) {
return field == null || NumberFormat.Field.class.isAssignableFrom(field.getClass());
}
private static int trimBack(FormattedStringBuilder self, int limit) {
return StaticUnicodeSets.get(StaticUnicodeSets.Key.DEFAULT_IGNORABLES)
.spanBack(self, limit, UnicodeSet.SpanCondition.CONTAINED);
}
private static int trimFront(FormattedStringBuilder self, int start) {
return StaticUnicodeSets.get(StaticUnicodeSets.Key.DEFAULT_IGNORABLES)
.span(self, start, UnicodeSet.SpanCondition.CONTAINED);
}
}

View file

@ -35,17 +35,15 @@ public final class IDNA2003 {
private static final StringPrep namePrep = StringPrep.getInstance(StringPrep.RFC3491_NAMEPREP);
private static boolean startsWithPrefix(StringBuffer src){
boolean startsWithPrefix = true;
if(src.length() < ACE_PREFIX.length){
return false;
}
for(int i=0; i<ACE_PREFIX.length;i++){
if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){
startsWithPrefix = false;
return false;
}
}
return startsWithPrefix;
return true;
}
private static char toASCIILower(char ch){
@ -168,6 +166,7 @@ public final class IDNA2003 {
while((ch = src.next())!= UCharacterIterator.DONE){
if(ch> 0x7f){
srcIsASCII = false;
break;
}
}
int failPos = -1;

View file

@ -6,14 +6,16 @@ import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UnicodeSet;
/**
* A mutable String wrapper with a variable offset and length and support for case folding.
* <p>
* The charAt, length, and subSequence methods all operate relative to the fixed offset into the String.
* <p>
* CAUTION: Since this class is mutable, it must not be used anywhere that an immutable object is
* required, like in a cache or as the key of a hash map.
* A mutable String wrapper with a variable offset and length and
* support for case folding. The charAt, length, and subSequence methods all
* operate relative to the fixed offset into the String.
*
* @author sffc
* Intended to be useful for parsing.
*
* CAUTION: Since this class is mutable, it must not be used anywhere that an
* immutable object is required, like in a cache or as the key of a hash map.
*
* @author sffc (Shane Carr)
*/
public class StringSegment implements CharSequence {
private final String str;

View file

@ -4,16 +4,18 @@ package com.ibm.icu.impl.locale;
import java.util.Objects;
final class LSR {
static final int REGION_INDEX_LIMIT = 1000 + 26 * 26;
public final class LSR {
public static final int REGION_INDEX_LIMIT = 1001 + 26 * 26;
final String language;
final String script;
final String region;
public static final boolean DEBUG_OUTPUT = false;
public final String language;
public final String script;
public final String region;
/** Index for region, negative if ill-formed. @see indexForRegion */
final int regionIndex;
LSR(String language, String script, String region) {
public LSR(String language, String script, String region) {
this.language = language;
this.script = script;
this.region = region;
@ -21,27 +23,27 @@ final class LSR {
}
/**
* Returns a non-negative index for a well-formed region code.
* Returns a positive index (>0) for a well-formed region code.
* Do not rely on a particular region->index mapping; it may change.
* Returns -1 for ill-formed strings.
* Returns 0 for ill-formed strings.
*/
static final int indexForRegion(String region) {
public static final int indexForRegion(String region) {
if (region.length() == 2) {
int a = region.charAt(0) - 'A';
if (a < 0 || 25 < a) { return -1; }
if (a < 0 || 25 < a) { return 0; }
int b = region.charAt(1) - 'A';
if (b < 0 || 25 < b) { return -1; }
return 26 * a + b + 1000;
if (b < 0 || 25 < b) { return 0; }
return 26 * a + b + 1001;
} else if (region.length() == 3) {
int a = region.charAt(0) - '0';
if (a < 0 || 9 < a) { return -1; }
if (a < 0 || 9 < a) { return 0; }
int b = region.charAt(1) - '0';
if (b < 0 || 9 < b) { return -1; }
if (b < 0 || 9 < b) { return 0; }
int c = region.charAt(2) - '0';
if (c < 0 || 9 < c) { return -1; }
return (10 * a + b) * 10 + c;
if (c < 0 || 9 < c) { return 0; }
return (10 * a + b) * 10 + c + 1;
}
return -1;
return 0;
}
@Override

View file

@ -324,7 +324,11 @@ public class LanguageTag {
if (_variants.isEmpty()) {
_variants = new ArrayList<String>(3);
}
_variants.add(s);
// Ignore repeated variant
s = s.toUpperCase();
if (!_variants.contains(s)) {
_variants.add(s);
}
sts._parseLength = itr.currentEnd();
itr.next();
}
@ -343,7 +347,7 @@ public class LanguageTag {
String s = itr.current();
if (isExtensionSingleton(s)) {
int start = itr.currentStart();
String singleton = s;
String singleton = s.toLowerCase();
StringBuilder sb = new StringBuilder(singleton);
itr.next();
@ -367,7 +371,14 @@ public class LanguageTag {
if (_extensions.size() == 0) {
_extensions = new ArrayList<String>(4);
}
_extensions.add(sb.toString());
// Ignore the extension if it is already in _extensions.
boolean alreadyHas = false;
for (String extension : _extensions) {
alreadyHas |= extension.charAt(0) == sb.charAt(0);
}
if (!alreadyHas) {
_extensions.add(sb.toString());
}
found = true;
} else {
break;

View file

@ -2,11 +2,20 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.locale;
import java.util.LinkedHashMap;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
import java.util.MissingResourceException;
import java.util.Set;
import java.util.TreeMap;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.UResource;
import com.ibm.icu.util.BytesTrie;
import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
import com.ibm.icu.util.ULocale;
/**
@ -14,9 +23,21 @@ import com.ibm.icu.util.ULocale;
* Mostly but not only the data for mapping locales to their maximized forms.
*/
public class LocaleDistance {
/** Distance value bit flag, set by the builder. */
public static final int DISTANCE_SKIP_SCRIPT = 0x80;
/** Distance value bit flag, set by trieNext(). */
private static final int DISTANCE_IS_FINAL = 0x100;
private static final int DISTANCE_IS_FINAL_OR_SKIP_SCRIPT =
DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT;
// Indexes into array of distances.
public static final int IX_DEF_LANG_DISTANCE = 0;
public static final int IX_DEF_SCRIPT_DISTANCE = 1;
public static final int IX_DEF_REGION_DISTANCE = 2;
public static final int IX_MIN_REGION_DISTANCE = 3;
public static final int IX_LIMIT = 4;
private static final int ABOVE_THRESHOLD = 100;
private static final boolean DEBUG_OUTPUT = false;
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
// The trie maps each dlang+slang+dscript+sscript+dregion+sregion
// (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
@ -28,7 +49,7 @@ public class LocaleDistance {
* Maps each region to zero or more single-character partitions.
*/
private final byte[] regionToPartitionsIndex;
private final String[][] partitionArrays;
private final String[] partitionArrays;
/**
* Used to get the paradigm region for a cluster, if there is one.
@ -38,49 +59,127 @@ public class LocaleDistance {
private final int defaultLanguageDistance;
private final int defaultScriptDistance;
private final int defaultRegionDistance;
private final int minRegionDistance;
private final int defaultDemotionPerDesiredLocale;
// TODO: Load prebuilt data from a resource bundle
// to avoid the dependency on the builder code.
// VisibleForTesting
public static final LocaleDistance INSTANCE = LocaleDistanceBuilder.build();
public static final class Data {
public byte[] trie;
public byte[] regionToPartitionsIndex;
public String[] partitionArrays;
public Set<LSR> paradigmLSRs;
public int[] distances;
public Data(byte[] trie,
byte[] regionToPartitionsIndex, String[] partitionArrays,
Set<LSR> paradigmLSRs, int[] distances) {
this.trie = trie;
this.regionToPartitionsIndex = regionToPartitionsIndex;
this.partitionArrays = partitionArrays;
this.paradigmLSRs = paradigmLSRs;
this.distances = distances;
}
private static UResource.Value getValue(UResource.Table table,
String key, UResource.Value value) {
if (!table.findValue(key, value)) {
throw new MissingResourceException(
"langInfo.res missing data", "", "match/" + key);
}
return value;
}
// VisibleForTesting
public static Data load() throws MissingResourceException {
ICUResourceBundle langInfo = ICUResourceBundle.getBundleInstance(
ICUData.ICU_BASE_NAME, "langInfo",
ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
UResource.Value value = langInfo.getValueWithFallback("match");
UResource.Table matchTable = value.getTable();
ByteBuffer buffer = getValue(matchTable, "trie", value).getBinary();
byte[] trie = new byte[buffer.remaining()];
buffer.get(trie);
buffer = getValue(matchTable, "regionToPartitions", value).getBinary();
byte[] regionToPartitions = new byte[buffer.remaining()];
buffer.get(regionToPartitions);
if (regionToPartitions.length < LSR.REGION_INDEX_LIMIT) {
throw new MissingResourceException(
"langInfo.res binary data too short", "", "match/regionToPartitions");
}
String[] partitions = getValue(matchTable, "partitions", value).getStringArray();
Set<LSR> paradigmLSRs;
if (matchTable.findValue("paradigms", value)) {
String[] paradigms = value.getStringArray();
paradigmLSRs = new HashSet<>(paradigms.length / 3);
for (int i = 0; i < paradigms.length; i += 3) {
paradigmLSRs.add(new LSR(paradigms[i], paradigms[i + 1], paradigms[i + 2]));
}
} else {
paradigmLSRs = Collections.emptySet();
}
int[] distances = getValue(matchTable, "distances", value).getIntVector();
if (distances.length < IX_LIMIT) {
throw new MissingResourceException(
"langInfo.res intvector too short", "", "match/distances");
}
return new Data(trie, regionToPartitions, partitions, paradigmLSRs, distances);
}
@Override
public boolean equals(Object other) {
if (this == other) { return true; }
if (!getClass().equals(other.getClass())) { return false; }
Data od = (Data)other;
return Arrays.equals(trie, od.trie) &&
Arrays.equals(regionToPartitionsIndex, od.regionToPartitionsIndex) &&
Arrays.equals(partitionArrays, od.partitionArrays) &&
paradigmLSRs.equals(od.paradigmLSRs) &&
Arrays.equals(distances, od.distances);
}
}
// VisibleForTesting
public static final LocaleDistance INSTANCE = new LocaleDistance(Data.load());
private LocaleDistance(Data data) {
this.trie = new BytesTrie(data.trie, 0);
this.regionToPartitionsIndex = data.regionToPartitionsIndex;
this.partitionArrays = data.partitionArrays;
this.paradigmLSRs = data.paradigmLSRs;
defaultLanguageDistance = data.distances[IX_DEF_LANG_DISTANCE];
defaultScriptDistance = data.distances[IX_DEF_SCRIPT_DISTANCE];
defaultRegionDistance = data.distances[IX_DEF_REGION_DISTANCE];
this.minRegionDistance = data.distances[IX_MIN_REGION_DISTANCE];
LSR en = new LSR("en", "Latn", "US");
LSR enGB = new LSR("en", "Latn", "GB");
defaultDemotionPerDesiredLocale = getBestIndexAndDistance(en, new LSR[] { enGB },
50, FavorSubtag.LANGUAGE) & 0xff;
LocaleDistance(BytesTrie trie,
byte[] regionToPartitionsIndex, String[][] partitionArrays,
Set<LSR> paradigmLSRs) {
this.trie = trie;
if (DEBUG_OUTPUT) {
System.out.println("*** locale distance");
System.out.println("defaultLanguageDistance=" + defaultLanguageDistance);
System.out.println("defaultScriptDistance=" + defaultScriptDistance);
System.out.println("defaultRegionDistance=" + defaultRegionDistance);
testOnlyPrintDistanceTable();
}
this.regionToPartitionsIndex = regionToPartitionsIndex;
this.partitionArrays = partitionArrays;
this.paradigmLSRs = paradigmLSRs;
BytesTrie iter = new BytesTrie(trie);
BytesTrie.Result result = iter.next('*');
assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
defaultLanguageDistance = iter.getValue();
result = iter.next('*');
assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
defaultScriptDistance = iter.getValue();
result = iter.next('*');
assert result.hasValue();
defaultRegionDistance = iter.getValue();
}
// VisibleForTesting
public int testOnlyDistance(ULocale desired, ULocale supported,
int threshold, DistanceOption distanceOption) {
int threshold, FavorSubtag favorSubtag) {
LSR supportedLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported);
LSR desiredLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired);
return getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR },
threshold, distanceOption) & 0xff;
threshold, favorSubtag) & 0xff;
}
public enum DistanceOption {REGION_FIRST, SCRIPT_FIRST}
// NOTE: Replaced "NORMAL" with "REGION_FIRST". By default, scripts have greater weight
// than regions, so they might be considered the "normal" case.
/**
* Finds the supported LSR with the smallest distance from the desired one.
* Equivalent LSR subtags must be normalized into a canonical form.
@ -89,14 +188,13 @@ public class LocaleDistance {
* (negative if none has a distance below the threshold),
* and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
*/
int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
int threshold, DistanceOption distanceOption) {
public int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
int threshold, FavorSubtag favorSubtag) {
BytesTrie iter = new BytesTrie(trie);
// Look up the desired language only once for all supported LSRs.
// Its "distance" is either a match point value of 0, or a non-match negative value.
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
// Set wantValue=true so that iter reads & skips the match point value.
int desLangDistance = trieNext(iter, desired.language, true, true);
int desLangDistance = trieNext(iter, desired.language, false);
long desLangState = desLangDistance >= 0 && supportedLsrs.length > 1 ? iter.getState64() : 0;
// Index of the supported LSR with the lowest distance.
int bestIndex = -1;
@ -105,26 +203,31 @@ public class LocaleDistance {
boolean star = false;
int distance = desLangDistance;
if (distance >= 0) {
assert (distance & DISTANCE_IS_FINAL) == 0;
if (slIndex != 0) {
iter.resetToState64(desLangState);
}
distance = trieNext(iter, supported.language, true, true);
distance = trieNext(iter, supported.language, true);
}
// Note: The data builder verifies that there are no rules with "any" (*) language and
// real (non *) script or region subtags.
// This means that if the lookup for either language fails we can use
// the default distances without further lookups.
if (distance < 0) { // <*, *>
int flags;
if (distance >= 0) {
flags = distance & DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
distance &= ~DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
} else { // <*, *>
if (desired.language.equals(supported.language)) {
distance = 0;
} else {
distance = defaultLanguageDistance;
}
flags = 0;
star = true;
}
assert 0 <= distance && distance <= 100;
boolean scriptFirst = distanceOption == DistanceOption.SCRIPT_FIRST;
if (scriptFirst) {
if (favorSubtag == FavorSubtag.SCRIPT) {
distance >>= 2;
}
if (distance >= threshold) {
@ -132,18 +235,17 @@ public class LocaleDistance {
}
int scriptDistance;
if (star) {
if (star || flags != 0) {
if (desired.script.equals(supported.script)) {
scriptDistance = 0;
} else {
scriptDistance = defaultScriptDistance;
}
} else {
scriptDistance = getDesSuppDistance(iter, iter.getState64(),
desired.script, supported.script, false);
}
if (scriptFirst) {
scriptDistance >>= 1;
scriptDistance = getDesSuppScriptDistance(iter, iter.getState64(),
desired.script, supported.script);
flags = scriptDistance & DISTANCE_IS_FINAL;
scriptDistance &= ~DISTANCE_IS_FINAL;
}
distance += scriptDistance;
if (distance >= threshold) {
@ -152,27 +254,24 @@ public class LocaleDistance {
if (desired.region.equals(supported.region)) {
// regionDistance = 0
} else if (star) {
} else if (star || (flags & DISTANCE_IS_FINAL) != 0) {
distance += defaultRegionDistance;
} else {
long startState = iter.getState64();
int remainingThreshold = threshold - distance;
if (minRegionDistance >= remainingThreshold) {
continue;
}
// From here on we know the regions are not equal.
// Map each region to zero or more partitions. (zero = one empty string)
// Map each region to zero or more partitions. (zero = one non-matching string)
// (Each array of single-character partition strings is encoded as one string.)
// If either side has more than one, then we find the maximum distance.
// This could be optimized by adding some more structure, but probably not worth it.
final String[] desiredPartitions = partitionsForRegion(desired);
final String[] supportedPartitions = partitionsForRegion(supported);
int regionDistance;
if (desiredPartitions.length > 1 || supportedPartitions.length > 1) {
regionDistance = getRegionPartitionsDistance(iter, startState,
desiredPartitions, supportedPartitions, threshold - distance);
} else {
regionDistance = getDesSuppDistance(iter, startState,
desiredPartitions[0], supportedPartitions[0], true);
}
distance += regionDistance;
distance += getRegionPartitionsDistance(
iter, iter.getState64(),
partitionsForRegion(desired),
partitionsForRegion(supported),
remainingThreshold);
}
if (distance < threshold) {
if (distance == 0) {
@ -185,105 +284,144 @@ public class LocaleDistance {
return bestIndex >= 0 ? (bestIndex << 8) | threshold : 0xffffff00 | ABOVE_THRESHOLD;
}
private int getRegionPartitionsDistance(BytesTrie iter, long startState,
String[] desiredPartitions, String[] supportedPartitions, int threshold) {
int regionDistance = -1;
for (String dp : desiredPartitions) {
for (String sp : supportedPartitions) {
if (regionDistance >= 0) { // no need to reset in first iteration
iter.resetToState64(startState);
}
int d = getDesSuppDistance(iter, startState, dp, sp, true);
if (regionDistance < d) {
if (d >= threshold) {
return d;
}
regionDistance = d;
}
}
}
assert regionDistance >= 0;
return regionDistance;
}
// Modified from
// DistanceTable#getDistance(desired, supported, Output distanceTable, starEquals).
private static final int getDesSuppDistance(BytesTrie iter, long startState,
String desired, String supported, boolean finalSubtag) {
private static final int getDesSuppScriptDistance(BytesTrie iter, long startState,
String desired, String supported) {
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
int distance = trieNext(iter, desired, false, true);
int distance = trieNext(iter, desired, false);
if (distance >= 0) {
distance = trieNext(iter, supported, true, !finalSubtag);
distance = trieNext(iter, supported, true);
}
if (distance < 0) {
BytesTrie.Result result = iter.resetToState64(startState).next('*'); // <*, *>
assert finalSubtag ? result.hasValue() : result == BytesTrie.Result.INTERMEDIATE_VALUE;
if (!finalSubtag && desired.equals(supported)) {
distance = 0; // same language or script
assert result.hasValue();
if (desired.equals(supported)) {
distance = 0; // same script
} else {
distance = iter.getValue();
assert distance >= 0;
}
if (result == BytesTrie.Result.FINAL_VALUE) {
distance |= DISTANCE_IS_FINAL;
}
}
return distance;
}
private static final int trieNext(BytesTrie iter, String s, boolean wantValue, boolean wantNext) {
private static final int getRegionPartitionsDistance(BytesTrie iter, long startState,
String desiredPartitions, String supportedPartitions, int threshold) {
int desLength = desiredPartitions.length();
int suppLength = supportedPartitions.length();
if (desLength == 1 && suppLength == 1) {
BytesTrie.Result result = iter.next(desiredPartitions.charAt(0) | 0x80);
if (result.hasNext()) {
result = iter.next(supportedPartitions.charAt(0) | 0x80);
if (result.hasValue()) {
return iter.getValue();
}
}
return getFallbackRegionDistance(iter, startState);
}
int regionDistance = 0;
// Fall back to * only once, not for each pair of partition strings.
boolean star = false;
for (int di = 0;;) {
// Look up each desired-partition string only once,
// not for each (desired, supported) pair.
BytesTrie.Result result = iter.next(desiredPartitions.charAt(di++) | 0x80);
if (result.hasNext()) {
long desState = suppLength > 1 ? iter.getState64() : 0;
for (int si = 0;;) {
result = iter.next(supportedPartitions.charAt(si++) | 0x80);
int d;
if (result.hasValue()) {
d = iter.getValue();
} else if (star) {
d = 0;
} else {
d = getFallbackRegionDistance(iter, startState);
star = true;
}
if (d >= threshold) {
return d;
} else if (regionDistance < d) {
regionDistance = d;
}
if (si < suppLength) {
iter.resetToState64(desState);
} else {
break;
}
}
} else if (!star) {
int d = getFallbackRegionDistance(iter, startState);
if (d >= threshold) {
return d;
} else if (regionDistance < d) {
regionDistance = d;
}
star = true;
}
if (di < desLength) {
iter.resetToState64(startState);
} else {
break;
}
}
return regionDistance;
}
private static final int getFallbackRegionDistance(BytesTrie iter, long startState) {
BytesTrie.Result result = iter.resetToState64(startState).next('*'); // <*, *>
assert result.hasValue();
int distance = iter.getValue();
assert distance >= 0;
return distance;
}
private static final int trieNext(BytesTrie iter, String s, boolean wantValue) {
if (s.isEmpty()) {
return -1; // no empty subtags in the distance data
}
BytesTrie.Result result;
int end = s.length() - 1;
for (int i = 0;; ++i) {
for (int i = 0, end = s.length() - 1;; ++i) {
int c = s.charAt(i);
assert c <= 0x7f;
if (i < end) {
result = iter.next(c);
if (!result.hasNext()) {
if (!iter.next(c).hasNext()) {
return -1;
}
} else {
// last character of this subtag
result = iter.next(c | 0x80);
break;
BytesTrie.Result result = iter.next(c | 0x80);
if (wantValue) {
if (result.hasValue()) {
int value = iter.getValue();
if (result == BytesTrie.Result.FINAL_VALUE) {
value |= DISTANCE_IS_FINAL;
}
return value;
}
} else {
if (result.hasNext()) {
return 0;
}
}
return -1;
}
}
if (wantValue) {
if (wantNext) {
if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
return iter.getValue();
}
} else {
if (result.hasValue()) {
return iter.getValue();
}
}
} else {
if (wantNext) {
if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
return 0;
}
} else {
if (result.hasValue()) {
return 0;
}
}
}
return -1;
}
@Override
public String toString() {
return testOnlyGetDistanceTable(true).toString();
return testOnlyGetDistanceTable().toString();
}
private String[] partitionsForRegion(LSR lsr) {
// ill-formed region -> one empty string
int pIndex = lsr.regionIndex >= 0 ? regionToPartitionsIndex[lsr.regionIndex] : 0;
private String partitionsForRegion(LSR lsr) {
// ill-formed region -> one non-matching string
int pIndex = regionToPartitionsIndex[lsr.regionIndex];
return partitionArrays[pIndex];
}
boolean isParadigmLSR(LSR lsr) {
public boolean isParadigmLSR(LSR lsr) {
return paradigmLSRs.contains(lsr);
}
@ -296,48 +434,50 @@ public class LocaleDistance {
return defaultRegionDistance;
}
public int getDefaultDemotionPerDesiredLocale() {
return defaultDemotionPerDesiredLocale;
}
// TODO: When we build data offline,
// write test code to compare the loaded table with the builder output.
// Fail if different, with instructions for how to update the data file.
// VisibleForTesting
public Map<String, Integer> testOnlyGetDistanceTable(boolean skipIntermediateMatchPoints) {
Map<String, Integer> map = new LinkedHashMap<>();
public Map<String, Integer> testOnlyGetDistanceTable() {
Map<String, Integer> map = new TreeMap<>();
StringBuilder sb = new StringBuilder();
for (BytesTrie.Entry entry : trie) {
sb.setLength(0);
int numSubtags = 0;
int length = entry.bytesLength();
for (int i = 0; i < length; ++i) {
byte b = entry.byteAt(i);
if (b == '*') {
// One * represents a (desired, supported) = (ANY, ANY) pair.
sb.append("*-*-");
numSubtags += 2;
} else {
if (b >= 0) {
sb.append((char) b);
} else { // end of subtag
sb.append((char) (b & 0x7f)).append('-');
++numSubtags;
}
}
}
assert sb.length() > 0 && sb.charAt(sb.length() - 1) == '-';
if (!skipIntermediateMatchPoints || (numSubtags & 1) == 0) {
sb.setLength(sb.length() - 1);
String s = sb.toString();
if (!skipIntermediateMatchPoints && s.endsWith("*-*")) {
// Re-insert single-ANY match points to show consistent structure
// for the test code.
map.put(s.substring(0, s.length() - 2), 0);
}
map.put(s, entry.value);
}
sb.setLength(sb.length() - 1);
map.put(sb.toString(), entry.value);
}
return map;
}
// VisibleForTesting
public void testOnlyPrintDistanceTable() {
for (Map.Entry<String, Integer> mapping : testOnlyGetDistanceTable(true).entrySet()) {
System.out.println(mapping);
for (Map.Entry<String, Integer> mapping : testOnlyGetDistanceTable().entrySet()) {
String suffix = "";
int value = mapping.getValue();
if ((value & DISTANCE_SKIP_SCRIPT) != 0) {
value &= ~DISTANCE_SKIP_SCRIPT;
suffix = " skip script";
}
System.out.println(mapping.getKey() + '=' + value + suffix);
}
}
}

View file

@ -2,11 +2,18 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.locale;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.MissingResourceException;
import java.util.TreeMap;
import com.ibm.icu.impl.ICUData;
import com.ibm.icu.impl.ICUResourceBundle;
import com.ibm.icu.impl.UResource;
import com.ibm.icu.util.BytesTrie;
import com.ibm.icu.util.ULocale;
@ -15,27 +22,93 @@ public final class XLikelySubtags {
private static final String PSEUDO_BIDI_PREFIX = "+"; // -XB, -PSBIDI
private static final String PSEUDO_CRACKED_PREFIX = ","; // -XC, -PSCRACK
private static final boolean DEBUG_OUTPUT = false;
public static final int SKIP_SCRIPT = 1;
// TODO: Load prebuilt data from a resource bundle
// to avoid the dependency on the builder code.
static final XLikelySubtags INSTANCE = new XLikelySubtags(LikelySubtagsBuilder.build());
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
static final class Data {
private final Map<String, String> languageAliases;
private final Map<String, String> regionAliases;
private final BytesTrie trie;
private final LSR[] lsrs;
// VisibleForTesting
public static final class Data {
public final Map<String, String> languageAliases;
public final Map<String, String> regionAliases;
public final byte[] trie;
public final LSR[] lsrs;
Data(Map<String, String> languageAliases, Map<String, String> regionAliases,
BytesTrie trie, LSR[] lsrs) {
public Data(Map<String, String> languageAliases, Map<String, String> regionAliases,
byte[] trie, LSR[] lsrs) {
this.languageAliases = languageAliases;
this.regionAliases = regionAliases;
this.trie = trie;
this.lsrs = lsrs;
}
private static UResource.Value getValue(UResource.Table table,
String key, UResource.Value value) {
if (!table.findValue(key, value)) {
throw new MissingResourceException(
"langInfo.res missing data", "", "likely/" + key);
}
return value;
}
// VisibleForTesting
public static Data load() throws MissingResourceException {
ICUResourceBundle langInfo = ICUResourceBundle.getBundleInstance(
ICUData.ICU_BASE_NAME, "langInfo",
ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
UResource.Value value = langInfo.getValueWithFallback("likely");
UResource.Table likelyTable = value.getTable();
Map<String, String> languageAliases;
if (likelyTable.findValue("languageAliases", value)) {
String[] pairs = value.getStringArray();
languageAliases = new HashMap<>(pairs.length / 2);
for (int i = 0; i < pairs.length; i += 2) {
languageAliases.put(pairs[i], pairs[i + 1]);
}
} else {
languageAliases = Collections.emptyMap();
}
Map<String, String> regionAliases;
if (likelyTable.findValue("regionAliases", value)) {
String[] pairs = value.getStringArray();
regionAliases = new HashMap<>(pairs.length / 2);
for (int i = 0; i < pairs.length; i += 2) {
regionAliases.put(pairs[i], pairs[i + 1]);
}
} else {
regionAliases = Collections.emptyMap();
}
ByteBuffer buffer = getValue(likelyTable, "trie", value).getBinary();
byte[] trie = new byte[buffer.remaining()];
buffer.get(trie);
String[] lsrSubtags = getValue(likelyTable, "lsrs", value).getStringArray();
LSR[] lsrs = new LSR[lsrSubtags.length / 3];
for (int i = 0, j = 0; i < lsrSubtags.length; i += 3, ++j) {
lsrs[j] = new LSR(lsrSubtags[i], lsrSubtags[i + 1], lsrSubtags[i + 2]);
}
return new Data(languageAliases, regionAliases, trie, lsrs);
}
@Override
public boolean equals(Object other) {
if (this == other) { return true; }
if (!getClass().equals(other.getClass())) { return false; }
Data od = (Data)other;
return
languageAliases.equals(od.languageAliases) &&
regionAliases.equals(od.regionAliases) &&
Arrays.equals(trie, od.trie) &&
Arrays.equals(lsrs, od.lsrs);
}
}
// VisibleForTesting
public static final XLikelySubtags INSTANCE = new XLikelySubtags(Data.load());
private final Map<String, String> languageAliases;
private final Map<String, String> regionAliases;
@ -46,30 +119,35 @@ public final class XLikelySubtags {
private final long trieUndState;
private final long trieUndZzzzState;
private final int defaultLsrIndex;
private final long[] trieFirstLetterStates = new long[26];
private final LSR[] lsrs;
private XLikelySubtags(XLikelySubtags.Data data) {
languageAliases = data.languageAliases;
regionAliases = data.regionAliases;
trie = data.trie;
trie = new BytesTrie(data.trie, 0);
lsrs = data.lsrs;
// Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
BytesTrie.Result result = trie.next('*');
assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
int value = trie.getValue();
assert value == 0;
assert result.hasNext();
trieUndState = trie.getState64();
result = trie.next('*');
assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
value = trie.getValue();
assert value == 0;
assert result.hasNext();
trieUndZzzzState = trie.getState64();
result = trie.next('*');
assert result.hasValue();
defaultLsrIndex = trie.getValue();
trie.reset();
for (char c = 'a'; c <= 'z'; ++c) {
result = trie.next(c);
if (result == BytesTrie.Result.NO_VALUE) {
trieFirstLetterStates[c - 'a'] = trie.getState64();
}
trie.reset();
}
if (DEBUG_OUTPUT) {
System.out.println("*** likely subtags");
for (Map.Entry<String, LSR> mapping : getTable().entrySet()) {
@ -78,24 +156,53 @@ public final class XLikelySubtags {
}
}
/**
* Implementation of LocaleMatcher.canonicalize(ULocale).
*/
public ULocale canonicalize(ULocale locale) {
String lang = locale.getLanguage();
String lang2 = languageAliases.get(lang);
String region = locale.getCountry();
String region2 = regionAliases.get(region);
if (lang2 != null || region2 != null) {
return new ULocale(
lang2 == null ? lang : lang2,
locale.getScript(),
region2 == null ? region : region2);
}
return locale;
}
private static String getCanonical(Map<String, String> aliases, String alias) {
String canonical = aliases.get(alias);
return canonical == null ? alias : canonical;
}
LSR makeMaximizedLsrFrom(ULocale locale) {
// VisibleForTesting
public LSR makeMaximizedLsrFrom(ULocale locale) {
String name = locale.getName();
if (name.startsWith("@x=")) {
// Private use language tag x-subtag-subtag...
return new LSR(name, "", "");
}
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
locale.getVariant());
}
public LSR makeMaximizedLsrFrom(Locale locale) {
String tag = locale.toLanguageTag();
if (tag.startsWith("x-")) {
// Private use language tag x-subtag-subtag...
return new LSR(tag, "", "");
}
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
locale.getVariant());
}
private LSR makeMaximizedLsr(String language, String script, String region, String variant) {
// Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
// They should match only themselves,
// not other locales with what looks like the same language and script subtags.
String language = locale.getLanguage();
String script = locale.getScript();
String region = locale.getCountry();
if (region.length() == 2 && region.charAt(0) == 'X') {
switch (region.charAt(1)) {
case 'A':
@ -112,7 +219,6 @@ public final class XLikelySubtags {
}
}
String variant = locale.getVariant();
if (variant.startsWith("PS")) {
switch (variant) {
case "PSACCENT":
@ -130,7 +236,7 @@ public final class XLikelySubtags {
}
language = getCanonical(languageAliases, language);
// script is ok
// (We have no script mappings.)
region = getCanonical(regionAliases, region);
return INSTANCE.maximize(language, script, region);
}
@ -139,14 +245,31 @@ public final class XLikelySubtags {
* Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
*/
private LSR maximize(String language, String script, String region) {
int retainOldMask = 0;
BytesTrie iter = new BytesTrie(trie);
// language lookup
if (language.equals("und")) {
language = "";
}
if (script.equals("Zzzz")) {
script = "";
}
if (region.equals("ZZ")) {
region = "";
}
if (!script.isEmpty() && !region.isEmpty() && !language.isEmpty()) {
return new LSR(language, script, region); // already maximized
}
int retainOldMask = 0;
BytesTrie iter = new BytesTrie(trie);
long state;
int value = trieNext(iter, language, false);
int value;
// Small optimization: Array lookup for first language letter.
int c0;
if (language.length() >= 2 && 0 <= (c0 = language.charAt(0) - 'a') && c0 <= 25 &&
(state = trieFirstLetterStates[c0]) != 0) {
value = trieNext(iter.resetToState64(state), language, 1);
} else {
value = trieNext(iter, language, 0);
}
if (value >= 0) {
if (!language.isEmpty()) {
retainOldMask |= 4;
@ -157,45 +280,54 @@ public final class XLikelySubtags {
iter.resetToState64(trieUndState); // "und" ("*")
state = 0;
}
// script lookup
if (script.equals("Zzzz")) {
script = "";
}
value = trieNext(iter, script, false);
if (value >= 0) {
if (value > 0) {
// Intermediate or final value from just language.
if (value == SKIP_SCRIPT) {
value = 0;
}
if (!script.isEmpty()) {
retainOldMask |= 2;
}
state = iter.getState64();
} else {
retainOldMask |= 2;
if (state == 0) {
iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
} else {
iter.resetToState64(state);
value = trieNext(iter, "", false);
assert value == 0;
value = trieNext(iter, script, 0);
if (value >= 0) {
if (!script.isEmpty()) {
retainOldMask |= 2;
}
state = iter.getState64();
} else {
retainOldMask |= 2;
if (state == 0) {
iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
} else {
iter.resetToState64(state);
value = trieNext(iter, "", 0);
assert value >= 0;
state = iter.getState64();
}
}
}
// region lookup
if (region.equals("ZZ")) {
region = "";
}
value = trieNext(iter, region, true);
if (value >= 0) {
if (value > 0) {
// Final value from just language or language+script.
if (!region.isEmpty()) {
retainOldMask |= 1;
}
} else {
retainOldMask |= 1;
if (state == 0) {
value = defaultLsrIndex;
value = trieNext(iter, region, 0);
if (value >= 0) {
if (!region.isEmpty()) {
retainOldMask |= 1;
}
} else {
iter.resetToState64(state);
value = trieNext(iter, "", true);
if (value < 0) { // TODO: should never happen?! just assert value >= 0?
return null;
retainOldMask |= 1;
if (state == 0) {
value = defaultLsrIndex;
} else {
iter.resetToState64(state);
value = trieNext(iter, "", 0);
assert value > 0;
}
}
}
@ -220,34 +352,34 @@ public final class XLikelySubtags {
return new LSR(language, script, region);
}
private static final int trieNext(BytesTrie iter, String s, boolean finalSubtag) {
private static final int trieNext(BytesTrie iter, String s, int i) {
BytesTrie.Result result;
if (s.isEmpty()) {
result = iter.next('*');
} else {
int end = s.length() - 1;
for (int i = 0;; ++i) {
result = iter.next(s.charAt(i));
for (;; ++i) {
int c = s.charAt(i);
if (i < end) {
if (!result.hasNext()) {
if (!iter.next(c).hasNext()) {
return -1;
}
} else {
// last character of this subtag
result = iter.next(c | 0x80);
break;
}
}
}
if (!finalSubtag) {
if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
return 0; // value should be 0, don't care
}
} else {
if (result.hasValue()) {
return iter.getValue();
}
switch (result) {
case NO_MATCH: return -1;
case NO_VALUE: return 0;
case INTERMEDIATE_VALUE:
assert iter.getValue() == SKIP_SCRIPT;
return SKIP_SCRIPT;
case FINAL_VALUE: return iter.getValue();
default: return -1;
}
return -1;
}
LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn,
@ -263,11 +395,16 @@ public final class XLikelySubtags {
// value00 = lookup(result.language, "", "")
BytesTrie iter = new BytesTrie(trie);
int value = trieNext(iter, result.language, false);
int value = trieNext(iter, result.language, 0);
assert value >= 0;
value = trieNext(iter, "", false);
assert value >= 0;
value = trieNext(iter, "", true);
if (value == 0) {
value = trieNext(iter, "", 0);
assert value >= 0;
if (value == 0) {
value = trieNext(iter, "", 0);
}
}
assert value > 0;
LSR value00 = lsrs[value];
boolean favorRegionOk = false;
if (result.script.equals(value00.script)) { //script is default
@ -292,26 +429,24 @@ public final class XLikelySubtags {
}
private Map<String, LSR> getTable() {
Map<String, LSR> map = new LinkedHashMap<>();
Set<String> prefixes = new HashSet<>();
Map<String, LSR> map = new TreeMap<>();
StringBuilder sb = new StringBuilder();
for (BytesTrie.Entry entry : trie) {
sb.setLength(0);
int length = entry.bytesLength();
for (int i = 0; i < length;) {
byte b = entry.byteAt(i++);
sb.append((char) b);
if (i < length && prefixes.contains(sb.toString())) {
sb.append('-');
if (b == '*') {
sb.append("*-");
} else if (b >= 0) {
sb.append((char) b);
} else { // end of subtag
sb.append((char) (b & 0x7f)).append('-');
}
}
String s = sb.toString();
if (entry.value == 0) {
// intermediate match point
prefixes.add(s);
} else {
map.put(s, lsrs[entry.value]);
}
assert sb.length() > 0 && sb.charAt(sb.length() - 1) == '-';
sb.setLength(sb.length() - 1);
map.put(sb.toString(), lsrs[entry.value]);
}
return map;
}

View file

@ -1,681 +0,0 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.locale;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
import com.ibm.icu.util.LocalePriorityList;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
/**
* Immutable class that picks best match between user's desired locales and application's supported locales.
* @author markdavis
*/
public final class XLocaleMatcher {
private static final LSR UND_LSR = new LSR("und","","");
private static final ULocale UND_LOCALE = new ULocale("und");
private static final Iterator<ULocale> NULL_ITERATOR = null;
// Activates debugging output to stderr with details of GetBestMatch.
private static final boolean TRACE_MATCHER = false;
// List of indexes, optimized for one or two.
private static final class Indexes {
// Some indexes without further object creation and auto-boxing.
int first, second = -1;
// We could turn the List into an int array + length and manage its growth.
List<Integer> remaining;
Indexes(int firstIndex) {
first = firstIndex;
}
void add(int i) {
if (second < 0) {
second = i;
} else {
if (remaining == null) {
remaining = new ArrayList<>();
}
remaining.add(i);
}
}
int getFirst() { return first; }
int get(int i) { // returns -1 when i >= length
if (i == 0) {
return first;
} else if (i == 1) {
return second;
} else if (remaining != null && (i -= 2) < remaining.size()) {
return remaining.get(i);
} else {
return -1;
}
}
}
// TODO: Make public, and add public methods that return it.
private static final class Result {
private Result(ULocale desired, ULocale supported,
/* Locale jdesired, */ Locale jsupported,
int desIndex, int suppIndex) {
desiredLocale = desired;
supportedLocale = supported;
// desiredJavaLocale = jdesired;
supportedJavaLocale = jsupported;
desiredIndex = desIndex;
supportedIndex = suppIndex;
}
ULocale desiredLocale;
ULocale supportedLocale;
// Locale desiredJavaLocale;
Locale supportedJavaLocale;
int desiredIndex;
@SuppressWarnings("unused") // unused until public, for other wrappers
int supportedIndex;
}
// normally the default values, but can be set via constructor
private final int thresholdDistance;
private final int demotionPerAdditionalDesiredLocale;
private final DistanceOption distanceOption;
// built based on application's supported languages in constructor
private final ULocale[] supportedLocales;
private final Locale[] supportedJavaLocales;
private final Map<ULocale, Integer> supportedToIndex;
private final Map<LSR, Indexes> supportedLsrToIndexes;
// Array versions of the supportedLsrToIndexes keys and values.
// The distance lookup loops over the supportedLsrs and returns the index of the best match.
private final LSR[] supportedLsrs;
private final Indexes[] supportedIndexes;
private final ULocale defaultLocale;
private final Locale defaultJavaLocale;
private final int defaultLocaleIndex;
public static class Builder {
/**
* Supported locales. A Set, to avoid duplicates.
* Maintains iteration order for consistent matching behavior (first best match wins).
*/
private Set<ULocale> supportedLocales;
private int thresholdDistance = -1;
private int demotionPerAdditionalDesiredLocale = -1;;
private ULocale defaultLocale;
private DistanceOption distanceOption;
/**
* @param locales the languagePriorityList to set
* @return this Builder object
*/
public Builder setSupportedLocales(String locales) {
return setSupportedLocales(LocalePriorityList.add(locales).build());
}
public Builder setSupportedLocales(Iterable<ULocale> locales) {
supportedLocales = new LinkedHashSet<>(); // maintain order
for (ULocale locale : locales) {
supportedLocales.add(locale);
}
return this;
}
public Builder setSupportedLocales(Collection<ULocale> locales) {
supportedLocales = new LinkedHashSet<>(locales); // maintain order
return this;
}
public Builder setSupportedJavaLocales(Collection<Locale> locales) {
supportedLocales = new LinkedHashSet<>(locales.size()); // maintain order
for (Locale locale : locales) {
supportedLocales.add(ULocale.forLocale(locale));
}
return this;
}
public Builder addSupportedLocale(ULocale locale) {
if (supportedLocales == null) {
supportedLocales = new LinkedHashSet<>();
}
supportedLocales.add(locale);
return this;
}
public Builder addSupportedLocale(Locale locale) {
return addSupportedLocale(ULocale.forLocale(locale));
}
/**
* @param thresholdDistance the thresholdDistance to set, with -1 = default
* @return this Builder object
*/
public Builder setThresholdDistance(int thresholdDistance) {
if (thresholdDistance > 100) {
thresholdDistance = 100;
}
this.thresholdDistance = thresholdDistance;
return this;
}
/**
* @param demotionPerAdditionalDesiredLocale the demotionPerAdditionalDesiredLocale to set, with -1 = default
* @return this Builder object
*/
public Builder setDemotionPerAdditionalDesiredLocale(int demotionPerAdditionalDesiredLocale) {
this.demotionPerAdditionalDesiredLocale = demotionPerAdditionalDesiredLocale;
return this;
}
/**
* Set the default language, with null = default = first supported language
* @param defaultLocale the default language
* @return this Builder object
*/
public Builder setDefaultLanguage(ULocale defaultLocale) {
this.defaultLocale = defaultLocale;
return this;
}
/**
* If true, then the language differences are smaller than than script differences.
* This is used in situations (such as maps) where it is better to fall back to the same script than a similar language.
* @param distanceOption the distance option
* @return this Builder object
*/
public Builder setDistanceOption(DistanceOption distanceOption) {
this.distanceOption = distanceOption;
return this;
}
public XLocaleMatcher build() {
return new XLocaleMatcher(this);
}
@Override
public String toString() {
StringBuilder s = new StringBuilder().append("{XLocaleMatcher.Builder");
if (!supportedLocales.isEmpty()) {
s.append(" supported={").append(supportedLocales.toString()).append('}');
}
if (defaultLocale != null) {
s.append(" default=").append(defaultLocale.toString());
}
if (distanceOption != null) {
s.append(" distance=").append(distanceOption.toString());
}
if (thresholdDistance >= 0) {
s.append(String.format(" threshold=%d", thresholdDistance));
}
if (demotionPerAdditionalDesiredLocale >= 0) {
s.append(String.format(" demotion=%d", demotionPerAdditionalDesiredLocale));
}
return s.append('}').toString();
}
}
/**
* Returns a builder used in chaining parameters for building a Locale Matcher.
* @return this Builder object
*/
public static Builder builder() {
return new Builder();
}
/** Convenience method */
public XLocaleMatcher(String supportedLocales) {
this(builder().setSupportedLocales(supportedLocales));
}
/** Convenience method */
public XLocaleMatcher(LocalePriorityList supportedLocales) {
this(builder().setSupportedLocales(supportedLocales));
}
/** Convenience method */
public XLocaleMatcher(Set<ULocale> supportedLocales) {
this(builder().setSupportedLocales(supportedLocales));
}
/**
* Creates a locale matcher with the given Builder parameters.
*/
private XLocaleMatcher(Builder builder) {
thresholdDistance = builder.thresholdDistance < 0 ?
LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
// Store the supported locales in input order,
// so that when different types are used (e.g., java.util.Locale)
// we can return those by parallel index.
int supportedLocalesLength = builder.supportedLocales.size();
supportedLocales = new ULocale[supportedLocalesLength];
supportedJavaLocales = new Locale[supportedLocalesLength];
supportedToIndex = new HashMap<>(supportedLocalesLength);
// We need an unordered map from LSR to first supported locale with that LSR,
// and an ordered list of (LSR, Indexes).
// We use a LinkedHashMap for both,
// and insert the supported locales in the following order:
// 1. First supported locale.
// 2. Priority locales in builder order.
// 3. Remaining locales in builder order.
supportedLsrToIndexes = new LinkedHashMap<>(supportedLocalesLength);
Map<LSR, Indexes> otherLsrToIndexes = null;
LSR firstLSR = null;
int i = 0;
for (ULocale locale : builder.supportedLocales) {
supportedLocales[i] = locale;
supportedJavaLocales[i] = locale.toLocale();
// supportedToIndex.putIfAbsent(locale, i)
Integer oldIndex = supportedToIndex.get(locale);
if (oldIndex == null) {
supportedToIndex.put(locale, i);
}
LSR lsr = getMaximalLsrOrUnd(locale);
if (i == 0) {
firstLSR = lsr;
supportedLsrToIndexes.put(lsr, new Indexes(0));
} else if (lsr.equals(firstLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
addIndex(supportedLsrToIndexes, lsr, i);
} else {
if (otherLsrToIndexes == null) {
otherLsrToIndexes = new LinkedHashMap<>(supportedLocalesLength);
}
addIndex(otherLsrToIndexes, lsr, i);
}
++i;
}
if (otherLsrToIndexes != null) {
supportedLsrToIndexes.putAll(otherLsrToIndexes);
}
int numSuppLsrs = supportedLsrToIndexes.size();
supportedLsrs = supportedLsrToIndexes.keySet().toArray(new LSR[numSuppLsrs]);
supportedIndexes = supportedLsrToIndexes.values().toArray(new Indexes[numSuppLsrs]);
ULocale def;
Locale jdef = null;
int idef = -1;
if (builder.defaultLocale != null) {
def = builder.defaultLocale;
} else if (supportedLocalesLength > 0) {
def = supportedLocales[0]; // first language
jdef = supportedJavaLocales[0];
idef = 0;
} else {
def = null;
}
if (jdef == null && def != null) {
jdef = def.toLocale();
}
defaultLocale = def;
defaultJavaLocale = jdef;
defaultLocaleIndex = idef;
demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ?
LocaleDistance.INSTANCE.getDefaultRegionDistance() + 1 :
builder.demotionPerAdditionalDesiredLocale;
distanceOption = builder.distanceOption;
}
private static final void addIndex(Map<LSR, Indexes> lsrToIndexes, LSR lsr, int i) {
Indexes indexes = lsrToIndexes.get(lsr);
if (indexes == null) {
lsrToIndexes.put(lsr, new Indexes(i));
} else {
indexes.add(i);
}
}
private static final LSR getMaximalLsrOrUnd(ULocale locale) {
if (locale.equals(UND_LOCALE)) {
return UND_LSR;
} else {
return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
}
}
/** Convenience method */
public ULocale getBestMatch(ULocale ulocale) {
return getBestMatch(ulocale, NULL_ITERATOR).supportedLocale;
}
/** Convenience method */
public ULocale getBestMatch(String languageList) {
return getBestMatch(LocalePriorityList.add(languageList).build(), null);
}
/** Convenience method */
public ULocale getBestMatch(ULocale... locales) {
return getBestMatch(Arrays.asList(locales), null);
}
/** Convenience method */
public ULocale getBestMatch(Iterable<ULocale> desiredLocales) {
return getBestMatch(desiredLocales, null);
}
/**
* Get the best match between the desired languages and supported languages
* @param desiredLocales Typically the supplied user's languages, in order of preference, with best first.
* @param outputBestDesired The one of the desired languages that matched best (can be null).
* Set to null if the best match was not below the threshold distance.
* @return the best match.
*/
public ULocale getBestMatch(Iterable<ULocale> desiredLocales, Output<ULocale> outputBestDesired) {
Iterator<ULocale> desiredIter = desiredLocales.iterator();
if (!desiredIter.hasNext()) {
if (outputBestDesired != null) {
outputBestDesired.value = null;
}
if (TRACE_MATCHER) {
System.err.printf("Returning default %s: no desired languages\n", defaultLocale);
}
return defaultLocale;
}
ULocale desiredLocale = desiredIter.next();
return getBestMatch(desiredLocale, desiredIter, outputBestDesired);
}
/**
* @param desiredLocale First desired locale.
* @param remainingIter Remaining desired locales, null or empty if none.
* @param outputBestDesired If not null,
* will be set to the desired locale that matches the best supported one.
* @return the best supported locale.
*/
private ULocale getBestMatch(ULocale desiredLocale, Iterator<ULocale> remainingIter,
Output<ULocale> outputBestDesired) {
Result result = getBestMatch(desiredLocale, remainingIter);
if (outputBestDesired != null) {
outputBestDesired.value = result.desiredLocale;
}
return result.supportedLocale;
}
private Result getBestMatch(ULocale desiredLocale, Iterator<ULocale> remainingIter) {
int desiredIndex = 0;
int bestDesiredIndex = -1;
ULocale bestDesiredLocale = null;
int bestSupportedLsrIndex = 0;
for (int bestDistance = thresholdDistance; bestDistance > 0;
bestDistance -= demotionPerAdditionalDesiredLocale) {
// Quick check for exact locale match.
Integer supportedIndex = supportedToIndex.get(desiredLocale);
if (supportedIndex != null) {
if (TRACE_MATCHER) {
System.err.printf("Returning %s: desired=supported\n", desiredLocale);
}
int suppIndex = supportedIndex;
return new Result(desiredLocale, supportedLocales[suppIndex],
supportedJavaLocales[suppIndex], desiredIndex, suppIndex);
}
// Quick check for exact maximized LSR.
LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
Indexes indexes = supportedLsrToIndexes.get(desiredLSR);
if (indexes != null) {
// If this is a supported LSR, return the first locale.
// We already know the exact locale isn't there.
int suppIndex = indexes.getFirst();
ULocale result = supportedLocales[suppIndex];
if (TRACE_MATCHER) {
System.err.printf("Returning %s: desiredLSR=supportedLSR\n", result);
}
return new Result(desiredLocale, result,
supportedJavaLocales[suppIndex], desiredIndex, suppIndex);
}
int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
desiredLSR, supportedLsrs, bestDistance, distanceOption);
if (bestIndexAndDistance >= 0) {
bestDistance = bestIndexAndDistance & 0xff;
bestDesiredIndex = desiredIndex;
bestDesiredLocale = desiredLocale;
bestSupportedLsrIndex = bestIndexAndDistance >> 8;
if (bestDistance == 0) {
break;
}
}
if (remainingIter == null || !remainingIter.hasNext()) {
break;
}
desiredLocale = remainingIter.next();
++desiredIndex;
}
if (bestDesiredIndex < 0) {
if (TRACE_MATCHER) {
System.err.printf("Returning default %s: no good match\n", defaultLocale);
}
return new Result(null, defaultLocale, defaultJavaLocale, -1, defaultLocaleIndex);
}
// Pick exact match if there is one.
// The length of the list is normally 1.
Indexes bestSupportedIndexes = supportedIndexes[bestSupportedLsrIndex];
int suppIndex;
for (int i = 0; (suppIndex = bestSupportedIndexes.get(i)) >= 0; ++i) {
ULocale locale = supportedLocales[suppIndex];
if (bestDesiredLocale.equals(locale)) {
if (TRACE_MATCHER) {
System.err.printf("Returning %s: desired=best matching supported language\n",
bestDesiredLocale);
}
return new Result(bestDesiredLocale, locale,
supportedJavaLocales[suppIndex], bestDesiredIndex, suppIndex);
}
}
// Otherwise return the first of the supported languages that share the best-matching LSR.
suppIndex = bestSupportedIndexes.getFirst();
ULocale result = supportedLocales[suppIndex];
if (TRACE_MATCHER) {
System.err.printf("Returning %s: first best matching supported language\n", result);
}
return new Result(bestDesiredLocale, result,
supportedJavaLocales[suppIndex], bestDesiredIndex, suppIndex);
}
/**
* Get the best match between the desired languages and supported languages
* @param desiredLocale the supplied user's language.
* @param outputBestDesired The one of the desired languages that matched best.
* Set to null if the best match was not below the threshold distance.
* @return the best match.
*/
public ULocale getBestMatch(ULocale desiredLocale, Output<ULocale> outputBestDesired) {
return getBestMatch(desiredLocale, null, outputBestDesired);
}
/**
* Converts Locales to ULocales on the fly.
*/
private static final class LocalesWrapper implements Iterator<ULocale> {
private Iterator<Locale> locales;
// Cache locales to avoid conversion of the result.
private Locale first, second;
private List<Locale> remaining;
LocalesWrapper(Iterator<Locale> locales) {
this.locales = locales;
}
@Override
public boolean hasNext() {
return locales.hasNext();
}
@Override
public ULocale next() {
Locale locale = locales.next();
if (first == null) {
first = locale;
} else if (second == null) {
second = locale;
} else {
if (remaining == null) {
remaining = new ArrayList<>();
}
remaining.add(locale);
}
return ULocale.forLocale(locale);
}
Locale getJavaLocale(int i) {
if (i == 0) {
return first;
} else if (i == 1) {
return second;
} else {
// TODO: test code coverage
return remaining.get(i - 2);
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
public Locale getBestJavaMatch(Iterable<Locale> desiredLocales, Output<Locale> outputBestDesired) {
Iterator<Locale> desiredIter = desiredLocales.iterator();
if (!desiredIter.hasNext()) {
if (outputBestDesired != null) {
outputBestDesired.value = null;
}
if (TRACE_MATCHER) {
System.err.printf("Returning default %s: no desired languages\n", defaultLocale);
}
return defaultJavaLocale;
}
LocalesWrapper wrapper = new LocalesWrapper(desiredIter);
ULocale desiredLocale = wrapper.next();
Result result = getBestMatch(desiredLocale, NULL_ITERATOR);
if (outputBestDesired != null) {
outputBestDesired.value = result.desiredIndex >= 0 ?
wrapper.getJavaLocale(result.desiredIndex) : null;
}
return result.supportedJavaLocale;
}
public Locale getBestJavaMatch(Locale desiredLocale, Output<Locale> outputBestDesired) {
ULocale desiredULocale = ULocale.forLocale(desiredLocale);
Result result = getBestMatch(desiredULocale, NULL_ITERATOR);
if (outputBestDesired != null) {
outputBestDesired.value = result.desiredIndex >= 0 ? desiredLocale : null;
}
return result.supportedJavaLocale;
}
/** Combine features of the desired locale into those of the supported, and return result. */
public static ULocale combine(ULocale bestSupported, ULocale bestDesired) {
// for examples of extensions, variants, see
// http://unicode.org/repos/cldr/tags/latest/common/bcp47/
// http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
if (!bestSupported.equals(bestDesired) && bestDesired != null) {
// add region, variants, extensions
ULocale.Builder b = new ULocale.Builder().setLocale(bestSupported);
// copy the region from the desired, if there is one
String region = bestDesired.getCountry();
if (!region.isEmpty()) {
b.setRegion(region);
}
// copy the variants from desired, if there is one
// note that this will override any subvariants. Eg "sco-ulster-fonipa" + "…-fonupa" => "sco-fonupa" (nuking ulster)
String variants = bestDesired.getVariant();
if (!variants.isEmpty()) {
b.setVariant(variants);
}
// copy the extensions from desired, if there are any
// note that this will override any subkeys. Eg "th-u-nu-latn-ca-buddhist" + "…-u-nu-native" => "th-u-nu-native" (nuking calendar)
for (char extensionKey : bestDesired.getExtensionKeys()) {
b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
}
bestSupported = b.build();
}
return bestSupported;
}
/** Returns the distance between the two languages. The values are not necessarily symmetric.
* @param desired A locale desired by the user
* @param supported A locale supported by a program.
* @return A return of 0 is a complete match, and 100 is a failure case (above the thresholdDistance).
* A language is first maximized with add likely subtags, then compared.
*/
public int distance(ULocale desired, ULocale supported) {
return LocaleDistance.INSTANCE.getBestIndexAndDistance(
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired),
new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) },
thresholdDistance, distanceOption) & 0xff;
}
/** Convenience method */
public int distance(String desiredLanguage, String supportedLanguage) {
return LocaleDistance.INSTANCE.getBestIndexAndDistance(
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(new ULocale(desiredLanguage)),
new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(new ULocale(supportedLanguage)) },
thresholdDistance, distanceOption) & 0xff;
}
@Override
public String toString() {
StringBuilder s = new StringBuilder().append("{XLocaleMatcher");
if (supportedLocales.length > 0) {
s.append(" supported={").append(supportedLocales[0].toString());
for (int i = 1; i < supportedLocales.length; ++i) {
s.append(", ").append(supportedLocales[1].toString());
}
s.append('}');
}
s.append(" default=").append(Objects.toString(defaultLocale));
if (distanceOption != null) {
s.append(" distance=").append(distanceOption.toString());
}
if (thresholdDistance >= 0) {
s.append(String.format(" threshold=%d", thresholdDistance));
}
s.append(String.format(" demotion=%d", demotionPerAdditionalDesiredLocale));
return s.append('}').toString();
}
/** Return the inverse of the distance: that is, 1-distance(desired, supported) */
public double match(ULocale desired, ULocale supported) {
return (100-distance(desired, supported))/100.0;
}
/**
* Returns a fraction between 0 and 1, where 1 means that the languages are a
* perfect match, and 0 means that they are completely different. This is (100-distance(desired, supported))/100.0.
* <br>Note that
* the precise values may change over time; no code should be made dependent
* on the values remaining constant.
* @param desired Desired locale
* @param desiredMax Maximized locale (using likely subtags)
* @param supported Supported locale
* @param supportedMax Maximized locale (using likely subtags)
* @return value between 0 and 1, inclusive.
* @deprecated Use the form with 2 parameters instead.
*/
@Deprecated
public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
return match(desired, supported);
}
/**
* Canonicalize a locale (language). Note that for now, it is canonicalizing
* according to CLDR conventions (he vs iw, etc), since that is what is needed
* for likelySubtags.
* @param ulocale language/locale code
* @return ULocale with remapped subtags.
* @stable ICU 4.4
*/
public ULocale canonicalize(ULocale ulocale) {
// TODO
return null;
}
/**
* @return the thresholdDistance. Any distance above this value is treated as a match failure.
*/
public int getThresholdDistance() {
return thresholdDistance;
}
}

View file

@ -2,6 +2,7 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl.number;
import com.ibm.icu.impl.FormattedStringBuilder;
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.text.UnicodeSet;
@ -290,7 +291,7 @@ public class AffixUtils {
/**
* Executes the unescape state machine. Replaces the unquoted characters "-", "+", "%", "", and "¤"
* with the corresponding symbols provided by the {@link SymbolProvider}, and inserts the result into
* the NumberStringBuilder at the requested location.
* the FormattedStringBuilder at the requested location.
*
* <p>
* Example input: "'-'¤x"; example output: "-$x"
@ -298,16 +299,16 @@ public class AffixUtils {
* @param affixPattern
* The original string to be unescaped.
* @param output
* The NumberStringBuilder to mutate with the result.
* The FormattedStringBuilder to mutate with the result.
* @param position
* The index into the NumberStringBuilder to insert the the string.
* The index into the FormattedStringBuilder to insert the the string.
* @param provider
* An object to generate locale symbols.
* @return The length of the string added to affixPattern.
*/
public static int unescape(
CharSequence affixPattern,
NumberStringBuilder output,
FormattedStringBuilder output,
int position,
SymbolProvider provider,
NumberFormat.Field field) {

View file

@ -4,6 +4,8 @@ package com.ibm.icu.impl.number;
import java.text.Format.Field;
import com.ibm.icu.impl.FormattedStringBuilder;
/**
* The canonical implementation of {@link Modifier}, containing a prefix and suffix string.
*/
@ -52,7 +54,7 @@ public class ConstantAffixModifier implements Modifier {
}
@Override
public int apply(NumberStringBuilder output, int leftIndex, int rightIndex) {
public int apply(FormattedStringBuilder output, int leftIndex, int rightIndex) {
// Insert the suffix first since inserting the prefix will change the rightIndex
int length = output.insert(rightIndex, suffix, field);
length += output.insert(leftIndex, prefix, field);

View file

@ -5,14 +5,16 @@ package com.ibm.icu.impl.number;
import java.text.Format.Field;
import java.util.Arrays;
import com.ibm.icu.impl.FormattedStringBuilder;
/**
* An implementation of {@link Modifier} that allows for multiple types of fields in the same modifier.
* Constructed based on the contents of two {@link NumberStringBuilder} instances (one for the prefix,
* Constructed based on the contents of two {@link FormattedStringBuilder} instances (one for the prefix,
* one for the suffix).
*/
public class ConstantMultiFieldModifier implements Modifier {
// NOTE: In Java, these are stored as array pointers. In C++, the NumberStringBuilder is stored by
// NOTE: In Java, these are stored as array pointers. In C++, the FormattedStringBuilder is stored by
// value and is treated internally as immutable.
protected final char[] prefixChars;
protected final char[] suffixChars;
@ -25,16 +27,16 @@ public class ConstantMultiFieldModifier implements Modifier {
private final Parameters parameters;
public ConstantMultiFieldModifier(
NumberStringBuilder prefix,
NumberStringBuilder suffix,
FormattedStringBuilder prefix,
FormattedStringBuilder suffix,
boolean overwrite,
boolean strong) {
this(prefix, suffix, overwrite, strong, null);
}
public ConstantMultiFieldModifier(
NumberStringBuilder prefix,
NumberStringBuilder suffix,
FormattedStringBuilder prefix,
FormattedStringBuilder suffix,
boolean overwrite,
boolean strong,
Parameters parameters) {
@ -48,7 +50,7 @@ public class ConstantMultiFieldModifier implements Modifier {
}
@Override
public int apply(NumberStringBuilder output, int leftIndex, int rightIndex) {
public int apply(FormattedStringBuilder output, int leftIndex, int rightIndex) {
int length = output.insert(leftIndex, prefixChars, prefixFields);
if (overwrite) {
length += output.splice(leftIndex + length, rightIndex + length, "", 0, 0, null);
@ -109,7 +111,7 @@ public class ConstantMultiFieldModifier implements Modifier {
@Override
public String toString() {
NumberStringBuilder temp = new NumberStringBuilder();
FormattedStringBuilder temp = new FormattedStringBuilder();
apply(temp, 0, 0);
int prefixLength = getPrefixLength();
return String.format("<ConstantMultiFieldModifier prefix:'%s' suffix:'%s'>",

View file

@ -4,6 +4,7 @@ package com.ibm.icu.impl.number;
import java.text.Format.Field;
import com.ibm.icu.impl.FormattedStringBuilder;
import com.ibm.icu.text.DecimalFormatSymbols;
import com.ibm.icu.text.NumberFormat;
import com.ibm.icu.text.UnicodeSet;
@ -30,8 +31,8 @@ public class CurrencySpacingEnabledModifier extends ConstantMultiFieldModifier {
/** Safe code path */
public CurrencySpacingEnabledModifier(
NumberStringBuilder prefix,
NumberStringBuilder suffix,
FormattedStringBuilder prefix,
FormattedStringBuilder suffix,
boolean overwrite,
boolean strong,
DecimalFormatSymbols symbols) {
@ -73,7 +74,7 @@ public class CurrencySpacingEnabledModifier extends ConstantMultiFieldModifier {
/** Safe code path */
@Override
public int apply(NumberStringBuilder output, int leftIndex, int rightIndex) {
public int apply(FormattedStringBuilder output, int leftIndex, int rightIndex) {
// Currency spacing logic
int length = 0;
if (rightIndex - leftIndex > 0
@ -96,7 +97,7 @@ public class CurrencySpacingEnabledModifier extends ConstantMultiFieldModifier {
/** Unsafe code path */
public static int applyCurrencySpacing(
NumberStringBuilder output,
FormattedStringBuilder output,
int prefixStart,
int prefixLen,
int suffixStart,
@ -117,7 +118,7 @@ public class CurrencySpacingEnabledModifier extends ConstantMultiFieldModifier {
/** Unsafe code path */
private static int applyCurrencySpacingAffix(
NumberStringBuilder output,
FormattedStringBuilder output,
int index,
byte affix,
DecimalFormatSymbols symbols) {

View file

@ -4,6 +4,7 @@ package com.ibm.icu.impl.number;
import java.text.Format.Field;
import com.ibm.icu.impl.FormattedStringBuilder;
import com.ibm.icu.impl.StandardPlural;
/**
@ -29,7 +30,7 @@ public interface Modifier {
* number is being formatted.
* @return The number of characters (UTF-16 code units) that were added to the string builder.
*/
public int apply(NumberStringBuilder output, int leftIndex, int rightIndex);
public int apply(FormattedStringBuilder output, int leftIndex, int rightIndex);
/**
* Gets the length of the prefix. This information can be used in combination with {@link #apply} to

Some files were not shown because too many files have changed in this diff Show more