mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-13 08:53:20 +00:00
ICU-20511 Merge branch 'master' into 64-merge-branch
This commit is contained in:
commit
a268e3a299
140 changed files with 7648 additions and 4184 deletions
|
@ -94,9 +94,12 @@ jobs:
|
|||
- visualstudio
|
||||
- Cmd
|
||||
steps:
|
||||
- powershell: 'Invoke-WebRequest https://www.python.org/ftp/python/3.7.1/python-3.7.1-amd64-webinstall.exe -OutFile c:\py3-setup.exe'
|
||||
- powershell: 'Invoke-WebRequest https://www.python.org/ftp/python/3.7.2/python-3.7.2-amd64-webinstall.exe -OutFile c:\py3-setup.exe'
|
||||
- script: |
|
||||
c:\py3-setup.exe /quiet PrependPath=1 InstallAllUsers=1 Include_launcher=1 InstallLauncherAllUsers=1 Include_test=0 Include_doc=0 Include_dev=0 Include_debug=0 Include_tcltk=0 TargetDir=c:\py3
|
||||
- script: |
|
||||
@echo ##vso[task.prependpath]C:\py3
|
||||
@echo ##vso[task.prependpath]C:\py3\Scripts
|
||||
- script: |
|
||||
python --version
|
||||
py -3 --version
|
||||
|
|
5
docs/userguide/assets/features_locales.svg
Normal file
5
docs/userguide/assets/features_locales.svg
Normal file
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 53 KiB |
74
docs/userguide/format_parse/formatted_value.md
Normal file
74
docs/userguide/format_parse/formatted_value.md
Normal file
|
@ -0,0 +1,74 @@
|
|||
<!--
|
||||
© 2019 and later: Unicode, Inc. and others.
|
||||
License & terms of use: http://www.unicode.org/copyright.html
|
||||
-->
|
||||
|
||||
FormattedValue
|
||||
==============
|
||||
|
||||
FormattedValue is an abstraction for localized strings with attributes
|
||||
returned by a number of ICU formatters. APIs for FormattedValue are available
|
||||
in Java, C++, and C. For more details and a list of all implementing classes,
|
||||
refer to the API docs:
|
||||
|
||||
- [C++ FormattedValue](http://icu-project.org/apiref/icu4c/classicu_1_1FormattedValue.html)
|
||||
- [C UFormattedValue](http://icu-project.org/apiref/icu4c/globals_u.html) -- search for "resultAsValue"
|
||||
- [Java FormattedValue](http://www.icu-project.org/apiref/icu4j/com/ibm/icu/text/FormattedValue.html)
|
||||
|
||||
## Nested Span Fields
|
||||
|
||||
Certain ICU formatters, like FormattedList and FormattedDateInterval, use
|
||||
*span fields* to return information about which spans of a string correspond
|
||||
to different input parameters. In C and C++, span fields are implemented
|
||||
using a field category, with the field being set to the input index; in Java,
|
||||
they are implemented by associating an Integer value with a SpanField
|
||||
subclass.
|
||||
|
||||
For example, in C++, here is how you can determine which region in a formatted
|
||||
date interval corresponds to the 2nd argument (index 1) in the input date
|
||||
interval (the "to" date):
|
||||
|
||||
```cpp
|
||||
// Let fmt be a DateIntervalFormat for locale en-US and skeleton dMMMMy
|
||||
// Let input1 be July 20, 2018 and input2 be August 3, 2018:
|
||||
FormattedDateInterval result = fmt->formatToValue(*input1, *input2, status);
|
||||
assertEquals("Expected output from format",
|
||||
u"July 20 \u2013 August 3, 2018", result.toString(status));
|
||||
ConstrainedFieldPosition cfpos;
|
||||
cfpos.constrainField(UFIELD_CATEGORY_DATE_INTERVAL_SPAN, 0);
|
||||
if (result.nextPosition(cfpos, status)) {
|
||||
assertEquals("Expect start index", 0, cfpos.getStart());
|
||||
assertEquals("Expect end index", 7, cfpos.getLimit());
|
||||
} else {
|
||||
// No such span: can happen if input dates are equal.
|
||||
}
|
||||
assertFalse("No more than one occurrence of the field",
|
||||
result.nextPosition(cfpos, status));
|
||||
```
|
||||
|
||||
In C, the code looks very similar, except you use the equivalent C types.
|
||||
|
||||
In Java, use the `constrainFieldAndValue` method:
|
||||
|
||||
```java
|
||||
// Let fmt be a DateIntervalFormat for locale en-US and skeleton dMMMMy
|
||||
// Let input1 be July 20, 2018 and input2 be August 3, 2018:
|
||||
FormattedDateInterval result = fmt.formatToValue(input1, input2);
|
||||
assertEquals("Expected output from format",
|
||||
"July 20 \u2013 August 3, 2018", result.toString());
|
||||
ConstrainedFieldPosition cfpos = new ConstrainedFieldPosition();
|
||||
cfpos.constrainFieldAndValue(DateIntervalFormat.SpanField.DATE_INTERVAL_SPAN, 0);
|
||||
if (result.nextPosition(cfpos)) {
|
||||
assertEquals("Expect start index", 0, cfpos.getStart());
|
||||
assertEquals("Expect end index", 7, cfpos.getLimit());
|
||||
} else {
|
||||
// No such span: can happen if input dates are equal.
|
||||
}
|
||||
assertFalse("No more than one occurrence of the field",
|
||||
result.nextPosition(cfpos));
|
||||
```
|
||||
|
||||
A span may cover multiple primitive fields; in the above example, the span
|
||||
contains both a month and a date. Using FormattedValue, those primitive
|
||||
fields will also be present, and you can check their start and end indices to
|
||||
see if they are contained within a desired span.
|
302
docs/userguide/format_parse/numbers/skeletons.md
Normal file
302
docs/userguide/format_parse/numbers/skeletons.md
Normal file
|
@ -0,0 +1,302 @@
|
|||
<!--
|
||||
© 2019 and later: Unicode, Inc. and others.
|
||||
License & terms of use: http://www.unicode.org/copyright.html
|
||||
-->
|
||||
|
||||
Number Skeletons
|
||||
================
|
||||
|
||||
Number skeletons are a locale-agnostic way to configure a NumberFormatter in
|
||||
ICU. Number skeletons work in MessageFormat.
|
||||
|
||||
Number skeletons consist of *space-separated tokens* that correspond to
|
||||
settings in ICU NumberFormatter. For example, to format a currency in compact
|
||||
notation, you could use this skeleton:
|
||||
|
||||
compact-short currency/GBP
|
||||
|
||||
To use a skeleton in MessageFormat, use the "number" type and prefix the
|
||||
skeleton with `::`
|
||||
|
||||
{0, number, ::compact-short currency/GBP}
|
||||
|
||||
## Syntax
|
||||
|
||||
A token consists of a *stem* and zero or more *options*. The stem is what
|
||||
occurs before the first "/" character in a token, and the options are each of
|
||||
the subsequent "/"-delimited strings. For example, "compact-short" and
|
||||
"currency" are stems, and "GBP" is an option.
|
||||
|
||||
Stems might also be dynamic strings (not a fixed list); these are called
|
||||
*blueprint stems*. For example, to format a number with 2-3 significant
|
||||
digits, you could use the following stem:
|
||||
|
||||
@@#
|
||||
|
||||
A few examples of number skeletons are shown below. The list of available
|
||||
stems and options can be found below in [Skeleton Stems and
|
||||
Options](#skeleton-stems-and-options).
|
||||
|
||||
## Examples
|
||||
|
||||
| Skeleton | Input | en-US Output | Comments |
|
||||
|---|---|---|---|
|
||||
| `percent` | 25 | 25% |
|
||||
| `.00` | 25 | 25.00 | Equivalent to Precision::fixedFraction(2) |
|
||||
| `percent .00` | 25 | 25.00% |
|
||||
| `scale/100` | 0.3 | 30 | Multiply by 100 before formatting |
|
||||
| `percent scale/100` | 0.3 | 30% |
|
||||
| `measure-unit/length-meter` | 5 | 5 m | UnitWidth defaults to Short |
|
||||
| `measure-unit/length-meter` <br/> `unit-width-full-name` | 5 | 5 meters |
|
||||
| `currency/CAD` | 10 | CA$10.00 |
|
||||
| `currency/CAD` <br/> `unit-width-narrow` | 10 | $10.00 | Use the narrow symbol variant |
|
||||
| `compact-short` | 5000 | 5K |
|
||||
| `compact-long` | 5000 | 5 thousand |
|
||||
| `compact-short` <br/> `currency/CAD` | 5000 | CA$5K |
|
||||
| - | 5000 | 5,000 |
|
||||
| `group-min2` | 5000 | 5000 | Require 2 digits in group for separator |
|
||||
| `group-min2` | 15000 | 15,000 |
|
||||
| `sign-always` | 60 | +60 | Show sign on all numbers |
|
||||
| `sign-always` | 0 | +0 |
|
||||
| `sign-except-zero` | 60 | +60 | Show sign on all numbers except 0 |
|
||||
| `sign-except-zero` | 0 | 0 |
|
||||
| `sign-accounting` <br/> `currency/CAD` | -40 | (CA$40.00) |
|
||||
|
||||
## Skeleton Stems and Options
|
||||
|
||||
The full set of features supported by number skeletons is listed by category
|
||||
below.
|
||||
|
||||
### Notation
|
||||
|
||||
Use one of the following stems to select your notation style:
|
||||
|
||||
- `compact-short`
|
||||
- `compact-long`
|
||||
- `scientific`
|
||||
- `engineering`
|
||||
- `notation-simple`
|
||||
|
||||
The skeletons `scientific` and `engineering` take the following optional
|
||||
options:
|
||||
|
||||
- `/sign-xxx` sets the sign display option for the exponent; see [Sign](#sign).
|
||||
- `/+ee` sets exponent digits to "at least 2"; use `/+eee` for at least 3 digits, etc.
|
||||
|
||||
For example, all of the following skeletons are valid:
|
||||
|
||||
- `scientific`
|
||||
- `scientific/sign-always`
|
||||
- `scientific/+ee`
|
||||
- `scientific/+ee/sign-always`
|
||||
|
||||
### Unit
|
||||
|
||||
The supported types of units are percent, currency, and measurement units.
|
||||
The following skeleton tokens are accepted:
|
||||
|
||||
- `percent`
|
||||
- `permille`
|
||||
- `base-unit`
|
||||
- `currency/XXX`
|
||||
- `measure-unit/aaaa-bbbb`
|
||||
|
||||
The `percent`, `permille`, and `base-unit` stems do not take any options.
|
||||
|
||||
The `currency` stem takes one required option: the three-letter ISO code of
|
||||
the currency to be formatted.
|
||||
|
||||
The `measure-unit` stem takes one required option: the unit identifier of the
|
||||
unit to be formatted. The full unit identifier is required: both the type and
|
||||
the subtype (for example, `length-meter`).
|
||||
|
||||
### Per Unit
|
||||
|
||||
To specify a unit to put in the denominator, use the following skeleton token:
|
||||
|
||||
- `per-measure-unit/aaaa-bbbb`
|
||||
|
||||
As with the `measure-unit` stem, pass the unit identifier as the option.
|
||||
|
||||
### Unit Width
|
||||
|
||||
The unit width can be specified by the following stems:
|
||||
|
||||
- `unit-width-narrow`
|
||||
- `unit-width-short`
|
||||
- `unit-width-full-name`
|
||||
- `unit-width-iso-code`
|
||||
- `unit-width-hidden`
|
||||
|
||||
For more details, see
|
||||
[UNumberUnitWidth](http://icu-project.org/apiref/icu4c/unumberformatter_8h.html).
|
||||
|
||||
### Precision
|
||||
|
||||
The precision category has more blueprint stems than most other categories;
|
||||
they are documented in detail below. The following non-blueprint stems are
|
||||
accepted:
|
||||
|
||||
- `precision-integer` (round to the nearest integer) --- accepts fraction-precision options
|
||||
- `precision-unlimited` (do not perform rounding; display all digits)
|
||||
- `precision-increment/dddd` (round to *dddd*, a decimal number) --- see below
|
||||
- `precision-currency-standard`
|
||||
- `precision-currency-cash`
|
||||
|
||||
To round to the nearest nickel, for example, use the skeleton
|
||||
`precision-increment/0.05`. For more information on the decimal number
|
||||
syntax, see [Scale](#scale).
|
||||
|
||||
#### Fraction Precision
|
||||
|
||||
The following are examples of fraction-precision stems:
|
||||
|
||||
| Stem | Explanation | Equivalent C++ Code |
|
||||
|---|---|---|
|
||||
| `.00` | Exactly 2 fraction digits | `Precision::fixedFraction(2) ` |
|
||||
| `.00+` | At least 2 fraction digits | `Precision::minFraction(2)` |
|
||||
| `.##` | At most 2 fraction digits | `Precision::maxFraction(2) ` |
|
||||
| `.0#` | Between 1 and 2 fraction digits | `Precision::minMaxFraction(1, 2)` |
|
||||
|
||||
More precisely, the fraction precision stem starts with `.`, then contains
|
||||
zero or more `0` symbols, which implies the minimum fraction digits. Then it
|
||||
contains either a `+`, for unlimited maximum fraction digits, or zero or more
|
||||
`#` symbols, which implies the minimum fraction digits when added to the `0`
|
||||
symbols.
|
||||
|
||||
Note that the stem `.` is considered valid and is equivalent to `precision-integer`.
|
||||
|
||||
Fraction-precision stems accept a single optional option: the minimum or
|
||||
maximum number of significant digits. This allows you to combine fraction
|
||||
precision with certain significant digits capabilities. The following are
|
||||
examples:
|
||||
|
||||
| Skeleton | Explanation | Equivalent C++ Code |
|
||||
|---|---|---|
|
||||
| `.##/@@@+` | At most 2 fraction digits, but guarantee <br/> at least 3 significant digits | `Precision::maxFraction(2)` <br/> `.withMinDigits(3)` |
|
||||
| `.00/@##` | Exactly 2 fraction digits, but do not <br/> display more than 3 significant digits | `Precision::fixedFraction(2)` <br/> `.withMaxDigits(3)` |
|
||||
|
||||
Precisely, the option starts with one or more `@` symbols. Then it contains
|
||||
either a `+`, for `::withMinDigits`, or one or more `#` symbols, for
|
||||
`::withMaxDigits`. If a `#` symbol is present, there must be only one `@`
|
||||
symbol.
|
||||
|
||||
#### Significant Digits Precision
|
||||
|
||||
The following are examples of stems for significant figures:
|
||||
|
||||
| Stem | Explanation | Equivalent C++ Code|
|
||||
|---|---|---|
|
||||
| `@@@` | Exactly 3 significant digits | `Precision::fixedSignificantDigits(3)` |
|
||||
| `@@@+` | At least 3 significant digits | `Precision::minSignificantDigits(3)` |
|
||||
| `@##` | At most 3 significant digits | `Precision::maxSignificantDigits(3)` |
|
||||
| `@@#` | Between 2 and 3 significant digits | `...::minMaxSignificantDigits(2, 3)` |
|
||||
|
||||
The precise syntax is very similar to fraction precision. The blueprint stem
|
||||
starts with one or more `@` symbols, which implies the minimum significant
|
||||
digits. Then it contains either a `+`, for unlimited maximum significant
|
||||
digits, or zero or more `#` symbols, which implies the minimum significant
|
||||
digits when added to the `@` symbols.
|
||||
|
||||
### Rounding Mode
|
||||
|
||||
The rounding mode can be specified by the following stems:
|
||||
|
||||
- `rounding-mode-ceiling`
|
||||
- `rounding-mode-floor`
|
||||
- `rounding-mode-down`
|
||||
- `rounding-mode-up`
|
||||
- `rounding-mode-half-even`
|
||||
- `rounding-mode-half-down`
|
||||
- `rounding-mode-half-up`
|
||||
- `rounding-mode-unnecessary`
|
||||
|
||||
For more details, see [Rounding
|
||||
Modes](http://userguide.icu-project.org/formatparse/numbers/rounding-modes).
|
||||
|
||||
### Integer Width
|
||||
|
||||
The following examples show how to specify integer width (minimum or maximum
|
||||
integer digits):
|
||||
|
||||
| Token | Explanation | Equivalent C++ Code |
|
||||
|---|---|---|
|
||||
| `integer-width/+000` | At least 3 <br/> integer digits | `IntegerWidth::zeroFillTo(3)` |
|
||||
| `integer-width/##0` | Between 1 and 3 <br/> integer digits | `IntegerWidth::zeroFillTo(1)` <br/> `.truncateAt(3)`
|
||||
| `integer-width/00` | Exactly 2 <br/> integer digits | `IntegerWidth::zeroFillTo(2)` <br/> `.truncateAt(2)` |
|
||||
| `integer-width/+` | Zero or more <br/> integer digits | `IntegerWidth::zeroFillTo(0) `
|
||||
|
||||
The option start with either a single `+` symbols, signaling no limit on the
|
||||
number of integer digits (no *truncateAt*), or zero or more `#` symbols. It
|
||||
should then be followed by zero or more `0` symbols, indicating the minimum
|
||||
integer digits (the argument to *zeroFillTo*). If there is no `+` symbol, the
|
||||
maximum integer digits (the argument to *truncateAt*) is the number of `#`
|
||||
symbols plus the number of `0` symbols.
|
||||
|
||||
### Scale
|
||||
|
||||
To specify the scale, use the following stem and option:
|
||||
|
||||
- `scale/dddd`
|
||||
|
||||
where *dddd* is a decimal number. For example, the following are valid
|
||||
skeletons:
|
||||
|
||||
- `scale/100` (multiply by 100)
|
||||
- `scale/1E2` (same as above)
|
||||
- `scale/0.5` (multiply by 0.5)
|
||||
|
||||
The decimal number should conform to a standard decimal number syntax. In
|
||||
C++, it is parsed using the decimal number library described in
|
||||
[LocalizedNumberFormatter::formatDecimal](http://icu-project.org/apiref/icu4c/classicu_1_1number_1_1LocalizedNumberFormatter.html).
|
||||
In Java, it is parsed using
|
||||
[BigDecimal](https://docs.oracle.com/javase/7/docs/api/java/math/BigDecimal.html#BigDecimal%28java.lang.String%29).
|
||||
For maximum compatibility, it is highly recommended that your decimal number
|
||||
is able to be parsed by both engines.
|
||||
|
||||
### Grouping
|
||||
|
||||
The grouping strategy can be specified by the following stems:
|
||||
|
||||
- `group-off`
|
||||
- `group-min2`
|
||||
- `group-auto`
|
||||
- `group-on-aligned`
|
||||
- `group-thousands`
|
||||
|
||||
For more details, see
|
||||
[UNumberGroupingStrategy](http://icu-project.org/apiref/icu4c/unumberformatter_8h.html).
|
||||
|
||||
### Symbols
|
||||
|
||||
The following stems are allowed for specifying the number symbols:
|
||||
|
||||
- `latin` (use Latin-script digits)
|
||||
- `numbering-system/nnnn` (use the `nnnn` numbering system)
|
||||
|
||||
A custom NDecimalFormatSymbols instance is not supported at this time.
|
||||
|
||||
### Sign Display
|
||||
|
||||
The following stems specify sign display:
|
||||
|
||||
- `sign-auto`
|
||||
- `sign-always`
|
||||
- `sign-never`
|
||||
- `sign-accounting`
|
||||
- `sign-accounting-always`
|
||||
- `sign-except-zero`
|
||||
- `sign-accounting-except-zero`
|
||||
|
||||
For more details, see
|
||||
[UNumberSignDisplay](http://icu-project.org/apiref/icu4c/unumberformatter_8h.html).
|
||||
|
||||
### Decimal Separator Display
|
||||
|
||||
The following stems specify decimal separator display:
|
||||
|
||||
- `decimal-auto`
|
||||
- `decimal-always`
|
||||
|
||||
For more details, see
|
||||
[UNumberDecimalSeparatorDisplay](http://icu-project.org/apiref/icu4c/unumberformatter_8h.html).
|
633
docs/userguide/icu_data/buildtool.md
Normal file
633
docs/userguide/icu_data/buildtool.md
Normal file
|
@ -0,0 +1,633 @@
|
|||
<!--
|
||||
© 2019 and later: Unicode, Inc. and others.
|
||||
License & terms of use: http://www.unicode.org/copyright.html
|
||||
-->
|
||||
|
||||
ICU Data Build Tool
|
||||
===================
|
||||
|
||||
ICU 64 provides a tool for configuring your ICU locale data file with finer
|
||||
granularity. This page explains how to use this tool to customize and reduce
|
||||
your data file size.
|
||||
|
||||
## Overview: What is in the ICU data file?
|
||||
|
||||
There are hundreds of **locales** supported in ICU (including script and
|
||||
region variants), and ICU supports many different **features**. For each
|
||||
locale and for each feature, data is stored in one or more data files.
|
||||
|
||||
Those data files are compiled and then bundled into a `.dat` file called
|
||||
something like `icudt64l.dat`, which is little-endian data for ICU 64. This
|
||||
dat file is packaged into the `libicudata.so` on Linux or `libicudata.dll.a`
|
||||
on Windows. In ICU4J, it is bundled into a jar file named `icudata.jar`.
|
||||
|
||||
At a high level, the size of the ICU data file corresponds to the
|
||||
cross-product of locales and features, except that not all features require
|
||||
locale-specific data, and not all locales require data for all features. The
|
||||
data file contents can be approximately visualized like this:
|
||||
|
||||
<img alt="Features vs. Locales" src="../assets/features_locales.svg" style="max-width:600px" />
|
||||
|
||||
The `icudt64l.dat` file is 27 MiB uncompressed and 11 MiB gzipped. This file
|
||||
size is too large for certain use cases, such as bundling the data file into a
|
||||
smartphone app or an embedded device. This is something the ICU Data Build
|
||||
Tool aims to solve.
|
||||
|
||||
## ICU Data Configuration File
|
||||
|
||||
The ICU Data Build Tool enables you to write a configuration file that
|
||||
specifies what features and locales to include in a custom data bundle.
|
||||
|
||||
The configuration file may be written in either [JSON](http://json.org/) or
|
||||
[Hjson](https://hjson.org/). To build ICU4C with custom data, set the
|
||||
`ICU_DATA_FILTER_FILE` environment variable when running `runConfigureICU` on
|
||||
Unix or when building the data package on Windows. For example:
|
||||
|
||||
ICU_DATA_FILTER_FILE=filters.json path/to/icu4c/source/runConfigureICU Linux
|
||||
|
||||
The ICU Data Build Tool will work out of the box with a default Python
|
||||
installation. In order to use Hjson syntax, the `hjson` pip module must be
|
||||
installed on your system. You should also consider installing the
|
||||
`jsonschema` module to print messages when errors are found in your config
|
||||
file.
|
||||
|
||||
$ pip3 install --user hjson jsonschema
|
||||
|
||||
To build ICU4J with custom data, you must first build ICU4C with custom data
|
||||
and then generate the JAR file. For more information, read
|
||||
[icu4j-readme.txt](https://github.com/unicode-org/icu/blob/master/icu4c/source/data/icu4j-readme.txt).
|
||||
|
||||
### Locale Slicing
|
||||
|
||||
The simplest way to slice ICU data is by locale. The ICU Data Build Tool
|
||||
makes it easy to select your desired locales to suit a number of use cases.
|
||||
|
||||
#### Filtering by Language Only
|
||||
|
||||
Here is a *filters.json* file that builds ICU data with support for English,
|
||||
Chinese, and German, including *all* script and regional variants for those
|
||||
languages:
|
||||
|
||||
{
|
||||
"localeFilter": {
|
||||
"filterType": "language",
|
||||
"whitelist": [
|
||||
"en",
|
||||
"de",
|
||||
"zh"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
The *filterType* "language" only supports slicing by entire languages.
|
||||
|
||||
#### Filtering by Locale
|
||||
|
||||
For more control, use *filterType* "locale". Here is a *filters.hjson* file that
|
||||
includes the same three languages as above, including regional variants, but
|
||||
only the default script (e.g., Simplified Han for Chinese):
|
||||
|
||||
localeFilter: {
|
||||
filterType: locale
|
||||
whitelist: [
|
||||
en
|
||||
de
|
||||
zh
|
||||
]
|
||||
}
|
||||
|
||||
#### Adding Script Variants (includeScripts = true)
|
||||
|
||||
You may set the *includeScripts* option to true to include all scripts for a
|
||||
language while using *filterType* "locale". This results in behavior similar
|
||||
to *filterType* "language". In the following JSON example, all scripts for
|
||||
Chinese are included:
|
||||
|
||||
{
|
||||
"localeFilter": {
|
||||
"filterType": "locale",
|
||||
"includeScripts": true,
|
||||
"whitelist": [
|
||||
"en",
|
||||
"de",
|
||||
"zh"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
If you wish to explicitly list the scripts, you may put the script code in the
|
||||
locale tag in the whitelist, and you do not need the *includeScripts* option
|
||||
enabled. For example, in Hjson, to include Han Traditional ***but not Han
|
||||
Simplified***:
|
||||
|
||||
localeFilter: {
|
||||
filterType: locale
|
||||
whitelist: [
|
||||
en
|
||||
de
|
||||
zh_Hant
|
||||
]
|
||||
}
|
||||
|
||||
Note: the option *includeScripts* is only supported at the language level;
|
||||
i.e., in order to include all scripts for a particular language, you must
|
||||
specify the language alone, without a region tag.
|
||||
|
||||
#### Removing Regional Variants (includeChildren = false)
|
||||
|
||||
If you wish to enumerate exactly which regional variants you wish to support,
|
||||
you may use *filterType* "locale" with the *includeChildren* setting turned to
|
||||
false. The following *filters.hjson* file includes English (US), English
|
||||
(UK), German (Germany), and Chinese (China, Han Simplified), as well as their
|
||||
dependencies, *but not* other regional variants like English (Australia),
|
||||
German (Switzerland), or Chinese (Taiwan, Han Traditional):
|
||||
|
||||
localeFilter: {
|
||||
filterType: locale
|
||||
includeChildren: false
|
||||
whitelist: [
|
||||
en_US
|
||||
en_GB
|
||||
de_DE
|
||||
zh_CN
|
||||
]
|
||||
}
|
||||
|
||||
Including dependencies, the above filter would include the following data files:
|
||||
|
||||
- root.txt
|
||||
- en.txt
|
||||
- en_US.txt
|
||||
- en_001.txt
|
||||
- en_GB.txt
|
||||
- de.txt
|
||||
- de_DE.txt
|
||||
- zh.txt
|
||||
- zh_Hans.txt
|
||||
- zh_Hans_CN.txt
|
||||
- zh_CN.txt
|
||||
|
||||
### File Slicing (coarse-grained features)
|
||||
|
||||
ICU provides a lot of features, of which you probably need only a small subset
|
||||
for your application. Feature slicing is a powerful way to prune out data for
|
||||
any features you are not using.
|
||||
|
||||
***CAUTION:*** When slicing by features, you must manually include all
|
||||
dependencies. For example, if you are formatting dates, you must include not
|
||||
only the date formatting data but also the number formatting data, since dates
|
||||
contain numbers. Expect to spend a fair bit of time debugging your feature
|
||||
filter to get it to work the way you expect it to.
|
||||
|
||||
The data for many ICU features live in individual files. The ICU Data Build
|
||||
Tool puts puts similar *types* of files into categories. The following table
|
||||
summarizes the ICU data files and their corresponding features and categories:
|
||||
|
||||
| Feature | Category ID(s) | Data Files <br/> ([icu4c/source/data](https://github.com/unicode-org/icu/tree/master/icu4c/source/data)) | Resource Size <br/> (as of ICU 64) |
|
||||
|---|---|---|---|
|
||||
| Break Iteration | `"brkitr_rules"` <br/> `"brkitr_dictionaries"` <br/> `"brkitr_tree"` | brkitr/rules/\*.txt <br/> brkitr/dictionaries/\*.txt <br/> brkitr/\*.txt | 522 KiB <br/> **2.8 MiB** <br/> 14 KiB |
|
||||
| Charset Conversion | `"conversion_mappings"` | mappings/\*.ucm | **4.9 MiB** |
|
||||
| Collation <br/> *[more info](#collation-ucadata)* | `"coll_ucadata"` <br/> `"coll_tree"` | in/coll/ucadata-\*.icu <br/> coll/\*.txt | 511 KiB <br/> **2.8 MiB** |
|
||||
| Confusables | `"confusables"` | unidata/confusables\*.txt | 45 KiB |
|
||||
| Currencies | `"misc"` <br/> `"curr_supplemental"` <br/> `"curr_tree"` | misc/currencyNumericCodes.txt <br/> curr/supplementalData.txt <br/> curr/\*.txt | 3.1 KiB <br/> 27 KiB <br/> **2.5 MiB** |
|
||||
| Language Display <br/> Names | `"lang_tree"` | lang/\*.txt | **2.1 MiB** |
|
||||
| Language Tags | `"misc"` | misc/keyTypeData.txt <br/> misc/langInfo.txt <br/> misc/likelySubtags.txt <br/> misc/metadata.txt | 6.8 KiB <br/> 37 KiB <br/> 53 KiB <br/> 33 KiB |
|
||||
| Normalization | `"normalization"` | in/\*.nrm except in/nfc.nrm | 160 KiB |
|
||||
| Plural Rules | `"misc"` | misc/pluralRanges.txt <br/> misc/plurals.txt | 3.3 KiB <br/> 33 KiB |
|
||||
| Region Display <br/> Names | `"region_tree"` | region/\*.txt | **1.1 MiB** |
|
||||
| Rule-Based <br/> Number Formatting <br/> (Spellout, Ordinals) | `"rbnf_tree"` | rbnf/\*.txt | 538 KiB |
|
||||
| StringPrep | `"stringprep"` | sprep/\*.txt | 193 KiB |
|
||||
| Time Zones | `"misc"` <br/> `"zone_tree"` | misc/metaZones.txt <br/> misc/timezoneTypes.txt <br/> misc/windowsZones.txt <br/> misc/zoneinfo64.txt <br/> zone/\*.txt | 41 KiB <br/> 20 KiB <br/> 22 KiB <br/> 151 KiB <br/> **2.7 MiB** |
|
||||
| Transliteration | `"translit"` | translit/\*.txt | 685 KiB |
|
||||
| Unicode Character <br/> Names | `"unames"` | in/unames.icu | 269 KiB |
|
||||
| Unicode Text Layout | `"ulayout"` | in/ulayout.icu | 14 KiB |
|
||||
| Units | `"unit_tree"` | unit/\*.txt | **1.7 MiB** |
|
||||
| **OTHER** | `"cnvalias"` <br/> `"misc"` <br/> `"locales_tree"` | mappings/convrtrs.txt <br/> misc/dayPeriods.txt <br/> misc/genderList.txt <br/> misc/numberingSystems.txt <br/> misc/supplementalData.txt <br/> locales/\*.txt | 63 KiB <br/> 19 KiB <br/> 0.5 KiB <br/> 5.6 KiB <br/> 228 KiB <br/> **2.4 MiB** |
|
||||
|
||||
#### Filter Types
|
||||
|
||||
You may list *filters* for each category in the *featureFilters* section of
|
||||
your config file. What follows are examples of the possible types of filters.
|
||||
|
||||
##### Exclusion Filter
|
||||
|
||||
To exclude an entire category, use *filterType* "exclude". For example, to
|
||||
exclude all confusables data:
|
||||
|
||||
featureFilters: {
|
||||
confusables: {
|
||||
filterType: exclude
|
||||
}
|
||||
}
|
||||
|
||||
##### File Name Filter
|
||||
|
||||
To exclude certain files out of a category, use the file name filter, which is
|
||||
the default type of filter when *filterType* is not specified. For example,
|
||||
to include the Burmese break iteration dictionary but not any other
|
||||
dictionaries:
|
||||
|
||||
featureFilters: {
|
||||
brkitr_dictionaries: {
|
||||
whitelist: [
|
||||
burmesedict
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
Do *not* include directories or file extensions. They will be added
|
||||
automatically for you. Note that all files in a particular category have the
|
||||
same directory and extension.
|
||||
|
||||
You can use either a whitelist or a blacklist for the file name filter.
|
||||
|
||||
##### Regex Filter
|
||||
|
||||
To exclude filenames matching a certain regular expression, use *filterType*
|
||||
"regex". For example, to reject the CJK-specific break iteration rules:
|
||||
|
||||
featureFilters: {
|
||||
brkitr_rules: {
|
||||
filterType: regex
|
||||
blacklist: [
|
||||
^.*_cj$
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
The Python standard library [*re*
|
||||
module](https://docs.python.org/3/library/re.html) is used for evaluating the
|
||||
regular expressions. In case the regular expression engine is changed in the
|
||||
future, however, you are encouraged to restrict yourself to a simple set of
|
||||
regular expression operators.
|
||||
|
||||
As above, do not include directories or file extensions, and you can use
|
||||
either a whitelist or a blacklist.
|
||||
|
||||
##### Union Filter
|
||||
|
||||
You can combine the results of multiple filters with *filterType* "union".
|
||||
This filter matches files that match *at least one* of the provided filters.
|
||||
The syntax is:
|
||||
|
||||
{
|
||||
filterType: union
|
||||
unionOf: [
|
||||
{ /* filter 1 */ },
|
||||
{ /* filter 2 */ },
|
||||
// ...
|
||||
]
|
||||
}
|
||||
|
||||
This filter type is useful for combining "locale" filters with different
|
||||
includeScripts or includeChildren options.
|
||||
|
||||
#### Locale-Tree Categories
|
||||
|
||||
Several categories have the `_tree` suffix. These categories are for "locale
|
||||
trees": they contain locale-specific data. ***The [localeFilter configuration
|
||||
option](#slicing-data-by-locale) sets the default file filter for all `_tree`
|
||||
categories.***
|
||||
|
||||
If you want to include different locales for different locale file trees, you
|
||||
can override their filter in the *featureFilters* section of the config file.
|
||||
For example, to include only Italian data for currency symbols *instead of*
|
||||
the common locales specified in *localeFilter*, you can do the following:
|
||||
|
||||
featureFilters:
|
||||
curr_tree: {
|
||||
filterType: locale
|
||||
whitelist: [
|
||||
it
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
You can exclude an entire `_tree` category without affecting other categories.
|
||||
For example, to exclude region display names:
|
||||
|
||||
featureFilters: {
|
||||
region_tree: {
|
||||
filterType: exclude
|
||||
}
|
||||
}
|
||||
|
||||
Note that you are able to use any of the other filter types for `_tree`
|
||||
categories, but you must be very careful that you are including all of the
|
||||
correct files. For example, `en_GB` requires `en_001`, and you must always
|
||||
include `root`. If you use the "language" or "locale" filter types, this
|
||||
logic is done for you.
|
||||
|
||||
### Resource Bundle Slicing (fine-grained features)
|
||||
|
||||
The third section of the ICU filter config file is *resourceFilters*. With
|
||||
this section, you can dive inside resource bundle files to remove even more
|
||||
data.
|
||||
|
||||
You can apply resource filters to all locale tree categories as well as to
|
||||
categories that include resource bundles, such as the `"misc"` category.
|
||||
|
||||
For example, consider measurement units. There is one unit file per locale (example:
|
||||
[en.txt](https://github.com/unicode-org/icu/blob/master/icu4c/source/data/unit/en.txt)),
|
||||
and that file contains data for all measurement units in CLDR. However, if
|
||||
you are only formatting distances, for example, you may need the data for only
|
||||
a small set of units.
|
||||
|
||||
Here is how you could include units of length in the "short" style but no
|
||||
other units:
|
||||
|
||||
resourceFilters: [
|
||||
{
|
||||
categories: [
|
||||
unit_tree
|
||||
]
|
||||
rules: [
|
||||
-/units
|
||||
-/unitsNarrow
|
||||
-/unitsShort
|
||||
+/unitsShort/length
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
Conceptually, the rules are applied from top to bottom. First, all data for
|
||||
all three styes of units are removed, and then the short length units are
|
||||
added back.
|
||||
|
||||
#### Wildcard Character
|
||||
|
||||
You can use the wildcard character (`*`) to match a piece of the resource
|
||||
path. For example, to include length units for all three styles, you can do:
|
||||
|
||||
resourceFilters: [
|
||||
{
|
||||
categories: [
|
||||
unit_tree
|
||||
]
|
||||
rules: [
|
||||
-/units
|
||||
-/unitsNarrow
|
||||
-/unitsShort
|
||||
+/*/length
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
The wildcard must be the only character in its path segment. Future ICU
|
||||
versions may expand the syntax.
|
||||
|
||||
#### Resource Filter for Specific File
|
||||
|
||||
The resource filter object takes an optional *files* setting which accepts a
|
||||
file filter in the same syntax used above for file filtering. For example, if
|
||||
you wanted to apply a filter to misc/supplementalData.txt, you could do the
|
||||
following (this example removes calendar data):
|
||||
|
||||
resourceFilters: [
|
||||
{
|
||||
categories: ["misc"]
|
||||
files: {
|
||||
whitelist: ["supplementalData"]
|
||||
}
|
||||
rules: [
|
||||
-/calendarData
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
#### Combining Multiple Resource Filter Specs
|
||||
|
||||
You can also list multiple resource filter objects in the *resourceFilters*
|
||||
array; the filters are added from top to bottom. For example, here is an
|
||||
advanced configuration that includes "mile" for en-US and "kilometer" for
|
||||
en-CA; this also makes use of the *files* option:
|
||||
|
||||
resourceFilters: [
|
||||
{
|
||||
categories: ["unit_tree"]
|
||||
rules: [
|
||||
-/units
|
||||
-/unitsNarrow
|
||||
-/unitsShort
|
||||
]
|
||||
},
|
||||
{
|
||||
categories: ["unit_tree"]
|
||||
files: {
|
||||
filterType: locale
|
||||
whitelist: ["en_US"]
|
||||
}
|
||||
rules: [
|
||||
+/*/length/mile
|
||||
]
|
||||
},
|
||||
{
|
||||
categories: ["unit_tree"]
|
||||
files: {
|
||||
filterType: locale
|
||||
whitelist: ["en_CA"]
|
||||
}
|
||||
rules: [
|
||||
+/*/length/kilometer
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
The above example would give en-US these resource filter rules:
|
||||
|
||||
-/units
|
||||
-/unitsNarrow
|
||||
-/unitsShort
|
||||
+/*/length/mile
|
||||
|
||||
and en-CA these resource filter rules:
|
||||
|
||||
-/units
|
||||
-/unitsNarrow
|
||||
-/unitsShort
|
||||
+/*/length/kilometer
|
||||
|
||||
In accordance with *filterType* "locale", the parent locales *en* and *root*
|
||||
would get both units; this is required since both en-US and en-CA may inherit
|
||||
from the parent locale:
|
||||
|
||||
-/units
|
||||
-/unitsNarrow
|
||||
-/unitsShort
|
||||
+/*/length/mile
|
||||
+/*/length/kilometer
|
||||
|
||||
## Debugging Tips
|
||||
|
||||
**Run Python directly:** If you do not want to wait for ./runConfigureICU to
|
||||
finish, you can directly re-generate the rules using your filter file with the
|
||||
following command line run from *iuc4c/source/data*.
|
||||
|
||||
$ python3 -m buildtool --mode=gnumake --seqmode=parallel --filter_file=filters.json > rules.mk
|
||||
|
||||
**Install jsonschema:** Install the `jsonschema` pip package to get warnings
|
||||
about problems with your filter file.
|
||||
|
||||
**Inspect data/rules.mk:** The Python script outputs the file *rules.mk*
|
||||
inside *iuc4c/source/data*. To see what is going to get built, you can inspect
|
||||
that file. First build ICU normally, and copy *rules.mk* to
|
||||
*rules_default.mk*. Then build ICU with your filter file. Now you can take the
|
||||
diff between *rules_default.mk* and *rules.mk* to see exactly what your filter
|
||||
file is removing.
|
||||
|
||||
**Inspect the output:** After a `make clean` and `make` with a new *rules.mk*,
|
||||
you can look inside the directory *icu4c/source/data/out* to see the files
|
||||
that got built.
|
||||
|
||||
**Inspect the compiled resource filter rules:** If you are using a resource
|
||||
filter, the resource filter rules get compiled for each individual locale
|
||||
inside *icu4c/source/data/out/tmp/filters*. You can look at those files to see
|
||||
what filter rules are being applied to each individual locale.
|
||||
|
||||
**Run genrb in verbose mode:** For debugging a resource filter, you can run
|
||||
genrb in verbose mode to see which resources got stripped. To do this, first
|
||||
inspect the make output and find a command line like this:
|
||||
|
||||
LD_LIBRARY_PATH=../lib:../stubdata:../tools/ctestfw:$LD_LIBRARY_PATH ../bin/genrb --filterDir ./out/tmp/filters/unit_tree -s ./unit -d ./out/build/icudt64l/unit/ -i ./out/build/icudt64l --usePoolBundle ./out/build/icudt64l/unit/ -k en.txt
|
||||
|
||||
Copy that command line and re-run it from *icu4c/source/data* with the `-v`
|
||||
flag added to the end. The command will print out exactly which resource paths
|
||||
are being included and excluded as well as a model of the filter rules applied
|
||||
to this file.
|
||||
|
||||
**Inspect .res files with derb:** The `derb` tool can convert .res files back
|
||||
to .txt files after filtering. For example, to convert the above unit res file
|
||||
back to a txt file, you can run this command from *icu4c/source*:
|
||||
|
||||
LD_LIBRARY_PATH=lib bin/derb data/out/build/icudt64l/unit/en.res
|
||||
|
||||
That will produce a file *en.txt* in your current directory, which is the
|
||||
original *data/unit/en.txt* but after resource filters were applied.
|
||||
|
||||
**Put complex rules first** and **use the wildcard `*` sparingly:** The order
|
||||
of the filter rules matters a great deal in how effective your data size
|
||||
reduction can be, and the wildcard `*` can sometimes produce behavior that is
|
||||
tricky to reason about. For example, these three lists of filter rules look
|
||||
similar on first glance but acutally produce different output:
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<th>Unit Resource Filter Rules</th>
|
||||
<th>Unit Resource Size</th>
|
||||
<th>Commentary</th>
|
||||
<th>Result</th>
|
||||
</tr>
|
||||
<tr><td><pre>
|
||||
-/*/*
|
||||
+/*/digital
|
||||
-/*/digital/*/dnam
|
||||
-/durationUnits
|
||||
-/units
|
||||
-/unitsNarrow
|
||||
</pre></td><td>77 KiB</td><td>
|
||||
First, remove all unit types. Then, add back digital units across all unit
|
||||
widths. Then, remove display names from digital units. Then, remove duration
|
||||
unit patterns and long and narrow forms.
|
||||
</td><td>
|
||||
Digital units in short form are included; all other units are removed.
|
||||
</td></tr>
|
||||
<tr><td><pre>
|
||||
-/durationUnits
|
||||
-/units
|
||||
-/unitsNarrow
|
||||
-/*/*
|
||||
+/*/digital
|
||||
-/*/digital/*/dnam
|
||||
</pre></td><td>125 KiB</td><td>
|
||||
First, remove duration unit patterns and long and narrow forms. Then, remove
|
||||
all unit types. Then, add back digital units across all unit widths. Then,
|
||||
remove display names from digital units.
|
||||
</td><td>
|
||||
Digital units are included <em>in all widths</em>; all other units are removed.
|
||||
</td></tr>
|
||||
<tr><td><pre>
|
||||
-/*/*
|
||||
+/*/digital
|
||||
-/*/*/*/dnam
|
||||
-/durationUnits
|
||||
-/units
|
||||
-/unitsNarrow
|
||||
</pre></td><td>191 KiB</td><td>
|
||||
First, remove all unit types. Then, add back digital units across all unit
|
||||
widths. Then, remove display names from all units. Then, remove duration unit
|
||||
patterns and long and narrow forms.
|
||||
</td><td>
|
||||
Digital units in short form are included, as is the <em>tree structure</em>
|
||||
for all other units, even though the other units have no real data.
|
||||
</td></tr>
|
||||
</table>
|
||||
|
||||
By design, empty tree structure is retained in the unit bundle. This is
|
||||
because there are numerous instances in ICU data where the presence of an
|
||||
empty tree carries meaning. However, it means that you must be careful when
|
||||
building resource filter rules in order to achieve the optimal data bundle
|
||||
size.
|
||||
|
||||
Using the `-v` option in genrb (described above) is helpful when debugging
|
||||
these types of issues.
|
||||
|
||||
## Other Features of the ICU Data Build Tool
|
||||
|
||||
While data filtering is the primary reason the ICU Data Build Tool was
|
||||
developed, there are there are additional use cases.
|
||||
|
||||
### Running Data Build without Configure/Make
|
||||
|
||||
You can build the dat file outside of the ICU build system by directly
|
||||
invoking the Python buildtool. Run the following command to see the help text
|
||||
for the CLI tool:
|
||||
|
||||
$ PYTHONPATH=path/to/icu4c/source/data python3 -m buildtool --help
|
||||
|
||||
### Collation UCAData
|
||||
|
||||
For using collation (sorting and searching) in any language, the "root"
|
||||
collation data file must be included. It provides the Unicode CLDR default
|
||||
sort order for all code points, and forms the basis for language-specific
|
||||
tailorings as well as for custom collators built at runtime.
|
||||
|
||||
There are two versions of the root collation data file:
|
||||
|
||||
- ucadata-unihan.txt (compiled size: 511 KiB)
|
||||
- ucadata-implicithan.txt (compiled size: 178 KiB)
|
||||
|
||||
The unihan version sorts Han characters in radical-stroke order according to
|
||||
Unicode, which is a somewhat useful default sort order, especially for use
|
||||
with non-CJK languages. The implicithan version sorts Han characters in the
|
||||
order of their Unicode assignment, which is similar to radical-stroke order
|
||||
for common characters but arbitrary for others. For more information, see
|
||||
[UTS #10 §10.1.3](https://www.unicode.org/reports/tr10/#Implicit_Weights).
|
||||
|
||||
By default, the unihan version is used. The unihan version of the data file
|
||||
is much larger than that for implicithan, so if you need collation but also
|
||||
small data, then you may want to select the implicithan version. To use the
|
||||
implicithan version, put the following setting in your *filters.json* file:
|
||||
|
||||
{
|
||||
"collationUCAData": "implicithan"
|
||||
}
|
||||
|
||||
### File Substitution
|
||||
|
||||
Using the configuration file, you can perform whole-file substitutions. For
|
||||
example, suppose you want to replace the transliteration rules for
|
||||
*Zawgyi_my*. You could create a directory called `my_icu_substitutions`
|
||||
containing your new `Zawgyi_my.txt` rule file, and then put this in your
|
||||
configuration file:
|
||||
|
||||
fileReplacements: {
|
||||
directory: "/path/to/my_icu_substitutions"
|
||||
replacements: [
|
||||
{
|
||||
src: "Zawgyi_my.txt"
|
||||
dest: "translit/Zawgyi_my.txt"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Whole-file substitution happens before all other filters are applied.
|
|
@ -13,7 +13,7 @@
|
|||
# (bring up Powershell ISE)
|
||||
# cd C:\icu\icu4c\
|
||||
# Set-ExecutionPolicy -Scope Process Unrestricted
|
||||
# .\packaging\distrelease.ps1
|
||||
# .\packaging\distrelease.ps1 -arch "x64 or x86"
|
||||
#
|
||||
# Will emit: c:\icu4c\icu\source\dist\icu-windows.zip
|
||||
#
|
||||
|
@ -22,6 +22,9 @@
|
|||
# see https://docs.microsoft.com/powershell/module/microsoft.powershell.core/about/about_execution_policies?view=powershell-5.1&viewFallbackFrom=powershell-Microsoft.PowerShell.Core
|
||||
# for more about execution policies.
|
||||
|
||||
Param(
|
||||
[string]$arch = "x64" # use x64 as default
|
||||
)
|
||||
|
||||
$icuDir = Split-Path -Path $MyInvocation.MyCommand.Definition -Parent
|
||||
$icuDir = Resolve-Path -Path '$icuDir\..'
|
||||
|
@ -35,11 +38,25 @@ Get-ChildItem -Path $source -ErrorAction SilentlyContinue | Remove-Item -Recurse
|
|||
New-Item -Path $source -ItemType "directory" -ErrorAction SilentlyContinue
|
||||
|
||||
# copy required stuff
|
||||
Copy-Item -Path "$icuDir\lib" -Destination $source -Recurse
|
||||
Copy-Item -Path "$icuDir\lib64" -Destination $source -Recurse
|
||||
if ($arch -eq "x64")
|
||||
{
|
||||
Copy-Item -Path "$icuDir\lib64" -Destination $source -Recurse
|
||||
Copy-Item -Path "$icuDir\bin64" -Destination $source -Recurse
|
||||
}
|
||||
elseif ($arch -eq "x86")
|
||||
{
|
||||
Copy-Item -Path "$icuDir\lib" -Destination $source -Recurse
|
||||
Copy-Item -Path "$icuDir\bin" -Destination $source -Recurse
|
||||
}
|
||||
else
|
||||
{
|
||||
$filename = $MyInvocation.MyCommand.Name;
|
||||
echo "Invalid architecture."
|
||||
echo "Usage: $filename -arch `"x64 or x86`""
|
||||
exit
|
||||
}
|
||||
|
||||
Copy-Item -Path "$icuDir\include" -Destination $source -Recurse
|
||||
Copy-Item -Path "$icuDir\bin" -Destination $source -Recurse
|
||||
Copy-Item -Path "$icuDir\bin64" -Destination $source -Recurse
|
||||
Copy-Item -Path "$icuDir\APIChangeReport.html" -Destination $source -Recurse
|
||||
Copy-Item -Path "$icuDir\icu4c.css" -Destination $source -Recurse
|
||||
Copy-Item -Path "$icuDir\LICENSE" -Destination $source -Recurse
|
||||
|
|
|
@ -57,18 +57,16 @@ toASCIILower(UChar ch){
|
|||
|
||||
inline static UBool
|
||||
startsWithPrefix(const UChar* src , int32_t srcLength){
|
||||
UBool startsWithPrefix = TRUE;
|
||||
|
||||
if(srcLength < ACE_PREFIX_LENGTH){
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){
|
||||
if(toASCIILower(src[i]) != ACE_PREFIX[i]){
|
||||
startsWithPrefix = FALSE;
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
return startsWithPrefix;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
@ -441,6 +439,7 @@ _internal_toUnicode(const UChar* src, int32_t srcLength,
|
|||
for(int32_t j=0; j<srcLength; j++){
|
||||
if(src[j]> 0x7f){
|
||||
srcIsASCII = FALSE;
|
||||
break;
|
||||
}/*else if(isLDHChar(src[j])==FALSE){
|
||||
// here we do not assemble surrogates
|
||||
// since we know that LDH code points
|
||||
|
|
|
@ -3034,11 +3034,11 @@ public:
|
|||
* uint16_t * constructor.
|
||||
* Delegates to UnicodeString(const char16_t *, int32_t).
|
||||
* @param text UTF-16 string
|
||||
* @param length string length
|
||||
* @param textLength string length
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UnicodeString(const uint16_t *text, int32_t length) :
|
||||
UnicodeString(ConstChar16Ptr(text), length) {}
|
||||
UnicodeString(const uint16_t *text, int32_t textLength) :
|
||||
UnicodeString(ConstChar16Ptr(text), textLength) {}
|
||||
#endif
|
||||
|
||||
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
|
||||
|
@ -3047,21 +3047,21 @@ public:
|
|||
* (Only defined if U_SIZEOF_WCHAR_T==2.)
|
||||
* Delegates to UnicodeString(const char16_t *, int32_t).
|
||||
* @param text NUL-terminated UTF-16 string
|
||||
* @param length string length
|
||||
* @param textLength string length
|
||||
* @stable ICU 59
|
||||
*/
|
||||
UnicodeString(const wchar_t *text, int32_t length) :
|
||||
UnicodeString(ConstChar16Ptr(text), length) {}
|
||||
UnicodeString(const wchar_t *text, int32_t textLength) :
|
||||
UnicodeString(ConstChar16Ptr(text), textLength) {}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* nullptr_t constructor.
|
||||
* Effectively the same as the default constructor, makes an empty string object.
|
||||
* @param text nullptr
|
||||
* @param length ignored
|
||||
* @param textLength ignored
|
||||
* @stable ICU 59
|
||||
*/
|
||||
inline UnicodeString(const std::nullptr_t text, int32_t length);
|
||||
inline UnicodeString(const std::nullptr_t text, int32_t textLength);
|
||||
|
||||
/**
|
||||
* Readonly-aliasing char16_t* constructor.
|
||||
|
@ -3266,13 +3266,13 @@ public:
|
|||
* }
|
||||
* \endcode
|
||||
* @param src String using only invariant characters.
|
||||
* @param length Length of src, or -1 if NUL-terminated.
|
||||
* @param textLength Length of src, or -1 if NUL-terminated.
|
||||
* @param inv Signature-distinguishing paramater, use US_INV.
|
||||
*
|
||||
* @see US_INV
|
||||
* @stable ICU 3.2
|
||||
*/
|
||||
UnicodeString(const char *src, int32_t length, enum EInvariant inv);
|
||||
UnicodeString(const char *src, int32_t textLength, enum EInvariant inv);
|
||||
|
||||
|
||||
/**
|
||||
|
|
|
@ -31,9 +31,9 @@ SHLIB.cc= $(CXX) -dynamiclib -dynamic $(CXXFLAGS) $(LDFLAGS) $(LD_SOOPTIONS)
|
|||
|
||||
## Compiler switches to embed a library name and version information
|
||||
ifeq ($(ENABLE_RPATH),YES)
|
||||
LD_SONAME = -Wl,-compatibility_version -Wl,$(SO_TARGET_VERSION_MAJOR) -Wl,-current_version -Wl,$(SO_TARGET_VERSION) -install_name $(libdir)/$(notdir $(MIDDLE_SO_TARGET))
|
||||
LD_SONAME = -Wl,-compatibility_version -Wl,$(SO_TARGET_VERSION_MAJOR) -Wl,-current_version -Wl,$(SO_TARGET_VERSION) -install_name $(libdir)/$(notdir $(MIDDLE_SO_TARGET)) $(PKGDATA_TRAILING_SPACE)
|
||||
else
|
||||
LD_SONAME = -Wl,-compatibility_version -Wl,$(SO_TARGET_VERSION_MAJOR) -Wl,-current_version -Wl,$(SO_TARGET_VERSION) -install_name $(notdir $(MIDDLE_SO_TARGET))
|
||||
LD_SONAME = -Wl,-compatibility_version -Wl,$(SO_TARGET_VERSION_MAJOR) -Wl,-current_version -Wl,$(SO_TARGET_VERSION) -install_name $(notdir $(MIDDLE_SO_TARGET)) $(PKGDATA_TRAILING_SPACE)
|
||||
endif
|
||||
|
||||
## Compiler switch to embed a runtime search path
|
||||
|
|
|
@ -56,7 +56,7 @@ LD_RPATH_PRE= -R
|
|||
#LIBRARY_PATH_PREFIX=/usr/lib/lwp:
|
||||
|
||||
## Compiler switch to embed a library name
|
||||
LD_SONAME = -h $(notdir $(MIDDLE_SO_TARGET))
|
||||
LD_SONAME = -h $(notdir $(MIDDLE_SO_TARGET)) $(PKGDATA_TRAILING_SPACE)
|
||||
|
||||
## Shared object suffix
|
||||
SO= so
|
||||
|
|
|
@ -25,7 +25,7 @@ LD_RPATH= -R'$$'ORIGIN
|
|||
LD_RPATH_PRE= -R
|
||||
|
||||
## Compiler switch to embed a library name
|
||||
LD_SONAME = -h $(notdir $(MIDDLE_SO_TARGET))
|
||||
LD_SONAME = -h $(notdir $(MIDDLE_SO_TARGET)) $(PKGDATA_TRAILING_SPACE)
|
||||
|
||||
## Shared library options
|
||||
LD_SOOPTIONS= -Wl,-Bsymbolic
|
||||
|
|
|
@ -16,6 +16,7 @@ include $(top_builddir)/icudefs.mk
|
|||
|
||||
OUTPUTFILE=pkgdata.inc
|
||||
MIDDLE_SO_TARGET=
|
||||
PKGDATA_TRAILING_SPACE=" "
|
||||
|
||||
all : clean
|
||||
@echo GENCCODE_ASSEMBLY_TYPE=$(GENCCODE_ASSEMBLY) >> $(OUTPUTFILE)
|
||||
|
@ -36,7 +37,6 @@ all : clean
|
|||
@echo RANLIB=$(RANLIB) >> $(OUTPUTFILE)
|
||||
@echo INSTALL_CMD=$(INSTALL-L) >> $(OUTPUTFILE)
|
||||
|
||||
|
||||
clean :
|
||||
$(RMV) $(OUTPUTFILE)
|
||||
|
||||
|
|
|
@ -126,7 +126,7 @@ TESTDATAOUT=$(ICUP)\source\test\testdata\out
|
|||
|
||||
#
|
||||
# TESTDATABLD
|
||||
# The build directory for test data intermidiate files
|
||||
# The build directory for test data intermediate files
|
||||
# (Tests are NOT run from this makefile,
|
||||
# only the data is put in place.)
|
||||
TESTDATABLD=$(ICUP)\source\test\testdata\out\build
|
||||
|
@ -412,41 +412,34 @@ CLEAN : GODATA
|
|||
-@erase "region\*.txt"
|
||||
-@erase "zone\*.res"
|
||||
-@erase "zone\*.txt"
|
||||
@cd "$(ICUBLD_PKG)\$(ICUBRK)"
|
||||
-@erase "*.brk"
|
||||
-@erase "*.res"
|
||||
-@erase "*.txt"
|
||||
-@erase "*.dict"
|
||||
@cd "$(ICUBLD_PKG)\$(ICUCOL)"
|
||||
-@erase "*.res"
|
||||
-@erase "*.txt"
|
||||
@cd "$(ICUBLD_PKG)\$(ICURBNF)"
|
||||
-@erase "*.res"
|
||||
-@erase "*.txt"
|
||||
@cd "$(ICUBLD_PKG)\$(ICUTRNS)"
|
||||
-@erase "*.res"
|
||||
@cd "$(ICUOUT)"
|
||||
-@erase "*.dat"
|
||||
@cd "$(ICUTMP)"
|
||||
-@erase "*.html"
|
||||
-@erase "*.lst"
|
||||
-@erase "*.mak"
|
||||
-@erase "*.obj"
|
||||
-@erase "*.res"
|
||||
-@erase "*.timestamp"
|
||||
@cd "$(TESTDATABLD)"
|
||||
-@erase "*.cnv"
|
||||
-@erase "*.icu"
|
||||
-@erase "*.mak"
|
||||
-@erase "*.nrm"
|
||||
-@erase "*.res"
|
||||
-@erase "*.spp"
|
||||
-@erase "*.txt"
|
||||
@cd "$(TESTDATAOUT)"
|
||||
-@erase "*.dat"
|
||||
@cd "$(TESTDATAOUT)\testdata"
|
||||
-@erase "*.typ"
|
||||
@cd "$(ICUBLD_PKG)"
|
||||
-@erase "$(ICUBRK)\*.brk"
|
||||
-@erase "$(ICUBRK)\*.res"
|
||||
-@erase "$(ICUBRK)\*.txt"
|
||||
-@erase "$(ICUBRK)\*.dict"
|
||||
-@erase "$(ICUCOL)\*.res"
|
||||
-@erase "$(ICUCOL)\*.txt"
|
||||
-@erase "$(ICURBNF)\*.res"
|
||||
-@erase "$(ICURBNF)\*.txt"
|
||||
-@erase "$(ICUTRNS)\*.res"
|
||||
-@erase "$(ICUOUT)\*.dat"
|
||||
-@erase "$(ICUTMP)\*.html"
|
||||
-@erase "$(ICUTMP)\*.lst"
|
||||
-@erase "$(ICUTMP)\*.mak"
|
||||
-@erase "$(ICUTMP)\*.obj"
|
||||
-@erase "$(ICUTMP)\*.res"
|
||||
-@erase "$(ICUTMP)\*.timestamp"
|
||||
-@erase "$(TESTDATABLD)\*.cnv"
|
||||
-@erase "$(TESTDATABLD)\*.icu"
|
||||
-@erase "$(TESTDATABLD)\*.mak"
|
||||
-@erase "$(TESTDATABLD)\*.nrm"
|
||||
-@erase "$(TESTDATABLD)\*.res"
|
||||
-@erase "$(TESTDATABLD)\*.spp"
|
||||
-@erase "$(TESTDATABLD)\*.txt"
|
||||
-@erase "$(TESTDATAOUT)\*.dat"
|
||||
-@erase "$(TESTDATAOUT)\testdata\*.typ"
|
||||
-@erase "$(TESTDATAOUT)\testdata\*.res"
|
||||
-@erase "$(TESTDATAOUT)\testdata\*.txt"
|
||||
-@erase "$(TESTDATAOUT)\testdata\*.lst"
|
||||
|
||||
|
||||
# DLL version information
|
||||
|
|
2614
icu4c/source/data/misc/langInfo.txt
Normal file
2614
icu4c/source/data/misc/langInfo.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -2086,11 +2086,11 @@ metaZones:table(nofallback){
|
|||
{
|
||||
"America_Pacific",
|
||||
"2018-11-04 10:00",
|
||||
"2019-03-10 11:00",
|
||||
"2019-01-20 10:00",
|
||||
}
|
||||
{
|
||||
"Alaska",
|
||||
"2019-03-10 11:00",
|
||||
"2019-01-20 10:00",
|
||||
"9999-12-31 23:59",
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,17 +3,17 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
//---------------------------------------------------------
|
||||
// Build tool: tz2icu
|
||||
// Build date: Tue Feb 19 01:26:22 2019
|
||||
// Build date: Tue Mar 26 16:57:59 2019
|
||||
// tz database: ftp://ftp.iana.org/tz/
|
||||
// tz version: 2018i
|
||||
// ICU version: 63.1
|
||||
// tz version: 2019a
|
||||
// ICU version: 64.1
|
||||
//---------------------------------------------------------
|
||||
// >> !!! >> THIS IS A MACHINE-GENERATED FILE << !!! <<
|
||||
// >> !!! >>> DO NOT EDIT <<< !!! <<
|
||||
//---------------------------------------------------------
|
||||
|
||||
zoneinfo64:table(nofallback) {
|
||||
TZVersion { "2018i" }
|
||||
TZVersion { "2019a" }
|
||||
Zones:array {
|
||||
/* ACT */ :int { 355 } //Z#0
|
||||
/* AET */ :int { 367 } //Z#1
|
||||
|
@ -786,9 +786,9 @@ zoneinfo64:table(nofallback) {
|
|||
} //Z#163
|
||||
/* America/Metlakatla */ :table {
|
||||
transPre32:intvector { -1, 1069743569, -1, 2106011674 }
|
||||
trans:intvector { -880207200, -765385200, -21477600, -5756400, 9972000, 25693200, 41421600, 57747600, 73476000, 89197200, 104925600, 120646800, 126698400, 152096400, 162381600, 183546000, 199274400, 215600400, 230724000, 247050000, 262778400, 278499600, 294228000, 309949200, 325677600, 341398800, 357127200, 372848400, 388576800, 404902800, 420026400, 436352400, 1446372000, 1457866800, 1478426400, 1489316400, 1509876000, 1520766000, 1541325600, 1552215600, 1572775200 }
|
||||
trans:intvector { -880207200, -765385200, -21477600, -5756400, 9972000, 25693200, 41421600, 57747600, 73476000, 89197200, 104925600, 120646800, 126698400, 152096400, 162381600, 183546000, 199274400, 215600400, 230724000, 247050000, 262778400, 278499600, 294228000, 309949200, 325677600, 341398800, 357127200, 372848400, 388576800, 404902800, 420026400, 436352400, 1446372000, 1457866800, 1478426400, 1489316400, 1509876000, 1520766000, 1541325600, 1547978400, 1552215600, 1572775200 }
|
||||
typeOffsets:intvector { 54822, 0, -32400, 0, -32400, 3600, -31578, 0, -28800, 0, -28800, 3600 }
|
||||
typeMap:bin { "03040504050405040504050405040504050405040504050405040504050405040504010201020102040201" }
|
||||
typeMap:bin { "0304050405040504050405040504050405040504050405040504050405040504050401020102010204010201" }
|
||||
finalRule { "US" }
|
||||
finalRaw:int { -32400 }
|
||||
finalYear:int { 2020 }
|
||||
|
@ -1374,9 +1374,9 @@ zoneinfo64:table(nofallback) {
|
|||
} //Z#267
|
||||
/* Asia/Gaza */ :table {
|
||||
transPre32:intvector { -1, 2109557424 }
|
||||
trans:intvector { -933645600, -857358000, -844300800, -825822000, -812685600, -794199600, -779853600, -762656400, -748310400, -731127600, -399088800, -386650800, -368330400, -355114800, -336790800, -323654400, -305168400, -292032000, -273632400, -260496000, -242096400, -228960000, -210560400, -197424000, -178938000, -165801600, -147402000, -134265600, -115866000, -102643200, -84330000, -81313200, 142380000, 150843600, 167176800, 178664400, 482277600, 495579600, 516751200, 526424400, 545436000, 558478800, 576626400, 589323600, 609890400, 620773200, 638316000, 651618000, 669765600, 683672400, 701820000, 715726800, 733701600, 747176400, 765151200, 778021200, 796600800, 810075600, 828655200, 843170400, 860104800, 874620000, 891554400, 906069600, 924213600, 939934800, 956268000, 971989200, 987717600, 1003438800, 1019167200, 1034888400, 1050616800, 1066338000, 1082066400, 1096581600, 1113516000, 1128380400, 1143842400, 1158872400, 1175378400, 1189638000, 1206655200, 1219957200, 1238104800, 1252015200, 1269640860, 1281474000, 1301608860, 1312146000, 1333058400, 1348178400, 1364508000, 1380229200, 1395957600, 1414098000, 1427493600, 1445547600, 1458946800, 1477692000 }
|
||||
trans:intvector { -933645600, -857358000, -844300800, -825822000, -812685600, -794199600, -779853600, -762656400, -748310400, -731127600, -399088800, -386650800, -368330400, -355114800, -336790800, -323654400, -305168400, -292032000, -273632400, -260496000, -242096400, -228960000, -210560400, -197424000, -178938000, -165801600, -147402000, -134265600, -115866000, -102643200, -84330000, -81313200, 142380000, 150843600, 167176800, 178664400, 334015200, 337644000, 452556000, 462232800, 482277600, 495579600, 516751200, 526424400, 545436000, 558478800, 576626400, 589323600, 609890400, 620773200, 638316000, 651618000, 669765600, 683672400, 701820000, 715726800, 733701600, 747176400, 765151200, 778021200, 796600800, 810075600, 828655200, 843170400, 860104800, 874620000, 891554400, 906069600, 924213600, 939934800, 956268000, 971989200, 987717600, 1003438800, 1019167200, 1034888400, 1050616800, 1066338000, 1082066400, 1096581600, 1113516000, 1128380400, 1143842400, 1158872400, 1175378400, 1189638000, 1206655200, 1219957200, 1238104800, 1252015200, 1269640860, 1281474000, 1301608860, 1312146000, 1333058400, 1348178400, 1364508000, 1380229200, 1395957600, 1414098000, 1427493600, 1445547600, 1458946800, 1477692000 }
|
||||
typeOffsets:intvector { 8272, 0, 7200, 0, 7200, 3600 }
|
||||
typeMap:bin { "0102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201" }
|
||||
typeMap:bin { "010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201" }
|
||||
finalRule { "Palestine" }
|
||||
finalRaw:int { 7200 }
|
||||
finalYear:int { 2017 }
|
||||
|
@ -1384,9 +1384,9 @@ zoneinfo64:table(nofallback) {
|
|||
/* Asia/Harbin */ :int { 314 } //Z#269
|
||||
/* Asia/Hebron */ :table {
|
||||
transPre32:intvector { -1, 2109557273 }
|
||||
trans:intvector { -933645600, -857358000, -844300800, -825822000, -812685600, -794199600, -779853600, -762656400, -748310400, -731127600, -399088800, -386650800, -368330400, -355114800, -336790800, -323654400, -305168400, -292032000, -273632400, -260496000, -242096400, -228960000, -210560400, -197424000, -178938000, -165801600, -147402000, -134265600, -115866000, -102643200, -84330000, -81313200, 142380000, 150843600, 167176800, 178664400, 482277600, 495579600, 516751200, 526424400, 545436000, 558478800, 576626400, 589323600, 609890400, 620773200, 638316000, 651618000, 669765600, 683672400, 701820000, 715726800, 733701600, 747176400, 765151200, 778021200, 796600800, 810075600, 828655200, 843170400, 860104800, 874620000, 891554400, 906069600, 924213600, 939934800, 956268000, 971989200, 987717600, 1003438800, 1019167200, 1034888400, 1050616800, 1066338000, 1082066400, 1096581600, 1113516000, 1128380400, 1143842400, 1158872400, 1175378400, 1189638000, 1206655200, 1220216400, 1238104800, 1252015200, 1269554400, 1281474000, 1301608860, 1312146000, 1314655200, 1317330000, 1333058400, 1348178400, 1364508000, 1380229200, 1395957600, 1414098000, 1427493600, 1445547600, 1458946800, 1477692000 }
|
||||
trans:intvector { -933645600, -857358000, -844300800, -825822000, -812685600, -794199600, -779853600, -762656400, -748310400, -731127600, -399088800, -386650800, -368330400, -355114800, -336790800, -323654400, -305168400, -292032000, -273632400, -260496000, -242096400, -228960000, -210560400, -197424000, -178938000, -165801600, -147402000, -134265600, -115866000, -102643200, -84330000, -81313200, 142380000, 150843600, 167176800, 178664400, 334015200, 337644000, 452556000, 462232800, 482277600, 495579600, 516751200, 526424400, 545436000, 558478800, 576626400, 589323600, 609890400, 620773200, 638316000, 651618000, 669765600, 683672400, 701820000, 715726800, 733701600, 747176400, 765151200, 778021200, 796600800, 810075600, 828655200, 843170400, 860104800, 874620000, 891554400, 906069600, 924213600, 939934800, 956268000, 971989200, 987717600, 1003438800, 1019167200, 1034888400, 1050616800, 1066338000, 1082066400, 1096581600, 1113516000, 1128380400, 1143842400, 1158872400, 1175378400, 1189638000, 1206655200, 1220216400, 1238104800, 1252015200, 1269554400, 1281474000, 1301608860, 1312146000, 1314655200, 1317330000, 1333058400, 1348178400, 1364508000, 1380229200, 1395957600, 1414098000, 1427493600, 1445547600, 1458946800, 1477692000 }
|
||||
typeOffsets:intvector { 8423, 0, 7200, 0, 7200, 3600 }
|
||||
typeMap:bin { "01020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201" }
|
||||
typeMap:bin { "0102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201" }
|
||||
finalRule { "Palestine" }
|
||||
finalRaw:int { 7200 }
|
||||
finalYear:int { 2017 }
|
||||
|
@ -1426,9 +1426,9 @@ zoneinfo64:table(nofallback) {
|
|||
} //Z#277
|
||||
/* Asia/Jerusalem */ :table {
|
||||
transPre32:intvector { -1, 1454818042 }
|
||||
trans:intvector { -1641003640, -933645600, -857358000, -844300800, -825822000, -812685600, -794199600, -779853600, -762656400, -748310400, -731127600, -681962400, -673243200, -667962000, -652327200, -636426000, -622087200, -608947200, -591847200, -572486400, -558576000, -542851200, -527731200, -514425600, -490845600, -482986800, -459475200, -451537200, -428551200, -418262400, -400032000, -387428400, 142380000, 150843600, 167176800, 178664400, 482277600, 495579600, 516751200, 526424400, 545436000, 558478800, 576626400, 589323600, 609890400, 620773200, 638316000, 651618000, 669765600, 683672400, 701820000, 715726800, 733701600, 747176400, 765151200, 778021200, 796600800, 810075600, 826840800, 842821200, 858895200, 874184400, 890344800, 905029200, 923011200, 936313200, 955670400, 970783200, 986770800, 1001282400, 1017356400, 1033941600, 1048806000, 1065132000, 1081292400, 1095804000, 1112313600, 1128812400, 1143763200, 1159657200, 1175212800, 1189897200, 1206662400, 1223161200, 1238112000, 1254006000, 1269561600, 1284246000, 1301616000, 1317510000, 1333065600, 1348354800, 1364515200, 1382828400 }
|
||||
trans:intvector { -1641003640, -933645600, -857358000, -844300800, -825822000, -812685600, -794199600, -779853600, -762656400, -748310400, -731127600, -681962400, -673243200, -667962000, -652327200, -636426000, -622087200, -608947200, -591847200, -572486400, -558576000, -542851200, -527731200, -514425600, -490845600, -482986800, -459475200, -451537200, -428551200, -418262400, -400032000, -387428400, 142380000, 150843600, 167176800, 178664400, 334015200, 337644000, 452556000, 462232800, 482277600, 495579600, 516751200, 526424400, 545436000, 558478800, 576626400, 589323600, 609890400, 620773200, 638316000, 651618000, 669765600, 683672400, 701820000, 715726800, 733701600, 747176400, 765151200, 778021200, 796600800, 810075600, 826840800, 842821200, 858895200, 874184400, 890344800, 905029200, 923011200, 936313200, 955670400, 970783200, 986770800, 1001282400, 1017356400, 1033941600, 1048806000, 1065132000, 1081292400, 1095804000, 1112313600, 1128812400, 1143763200, 1159657200, 1175212800, 1189897200, 1206662400, 1223161200, 1238112000, 1254006000, 1269561600, 1284246000, 1301616000, 1317510000, 1333065600, 1348354800, 1364515200, 1382828400 }
|
||||
typeOffsets:intvector { 8454, 0, 7200, 0, 7200, 3600, 7200, 7200, 8440, 0 }
|
||||
typeMap:bin { "0401020102010201020102010302010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201" }
|
||||
typeMap:bin { "040102010201020102010201030201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201020102010201" }
|
||||
finalRule { "Zion" }
|
||||
finalRaw:int { 7200 }
|
||||
finalYear:int { 2014 }
|
||||
|
@ -2031,13 +2031,10 @@ zoneinfo64:table(nofallback) {
|
|||
} //Z#431
|
||||
/* Etc/GMT0 */ :int { 403 } //Z#432
|
||||
/* Etc/Greenwich */ :int { 403 } //Z#433
|
||||
/* Etc/UCT */ :table {
|
||||
typeOffsets:intvector { 0, 0 }
|
||||
links:intvector { 434, 614 }
|
||||
} //Z#434
|
||||
/* Etc/UCT */ :int { 435 } //Z#434
|
||||
/* Etc/UTC */ :table {
|
||||
typeOffsets:intvector { 0, 0 }
|
||||
links:intvector { 435, 436, 438, 628, 629, 633 }
|
||||
links:intvector { 434, 435, 436, 438, 614, 628, 629, 633 }
|
||||
} //Z#435
|
||||
/* Etc/Universal */ :int { 435 } //Z#436
|
||||
/* Etc/Unknown */ :table {
|
||||
|
@ -2860,7 +2857,7 @@ zoneinfo64:table(nofallback) {
|
|||
finalYear:int { 1977 }
|
||||
} //Z#612
|
||||
/* Turkey */ :int { 458 } //Z#613
|
||||
/* UCT */ :int { 434 } //Z#614
|
||||
/* UCT */ :int { 435 } //Z#614
|
||||
/* US/Alaska */ :int { 60 } //Z#615
|
||||
/* US/Aleutian */ :int { 59 } //Z#616
|
||||
/* US/Arizona */ :int { 184 } //Z#617
|
||||
|
@ -3141,7 +3138,7 @@ zoneinfo64:table(nofallback) {
|
|||
8, -30, -1, 7200, 1, 3, 1, -1, 7200, 1, 3600
|
||||
} //_#20
|
||||
Palestine:intvector {
|
||||
2, 22, -7, 3600, 0, 9, -31, -7, 3600, 0, 3600
|
||||
2, 24, -7, 3600, 0, 9, -31, -7, 3600, 0, 3600
|
||||
} //_#21
|
||||
Para:intvector {
|
||||
9, 1, -1, 0, 0, 2, 22, -1, 0, 0, 3600
|
||||
|
|
|
@ -16,6 +16,7 @@ include $(top_builddir)/icudefs.mk
|
|||
|
||||
OUTPUTFILE=icupkg.inc
|
||||
MIDDLE_SO_TARGET=
|
||||
PKGDATA_TRAILING_SPACE=" "
|
||||
|
||||
all : clean
|
||||
@echo GENCCODE_ASSEMBLY_TYPE=$(GENCCODE_ASSEMBLY) >> $(OUTPUTFILE)
|
||||
|
@ -36,7 +37,6 @@ all : clean
|
|||
@echo RANLIB=$(RANLIB) >> $(OUTPUTFILE)
|
||||
@echo INSTALL_CMD=$(INSTALL) >> $(OUTPUTFILE)
|
||||
|
||||
|
||||
clean :
|
||||
$(RMV) $(OUTPUTFILE)
|
||||
|
||||
|
|
|
@ -14,9 +14,9 @@ top_builddir = ../..
|
|||
## All the flags and other definitions are included here.
|
||||
include $(top_builddir)/icudefs.mk
|
||||
|
||||
MIDDLE_SO_TARGET=
|
||||
|
||||
OUTPUTFILE=pkgdata.inc
|
||||
MIDDLE_SO_TARGET=
|
||||
PKGDATA_TRAILING_SPACE=" "
|
||||
|
||||
all : clean
|
||||
@echo GENCCODE_ASSEMBLY_TYPE=$(GENCCODE_ASSEMBLY) >> $(OUTPUTFILE)
|
||||
|
|
|
@ -103,16 +103,17 @@ number_affixutils.o number_compact.o number_decimalquantity.o \
|
|||
number_decimfmtprops.o number_fluent.o number_formatimpl.o number_grouping.o \
|
||||
number_integerwidth.o number_longnames.o number_modifiers.o number_notation.o number_output.o \
|
||||
number_padding.o number_patternmodifier.o number_patternstring.o \
|
||||
number_rounding.o number_scientific.o number_stringbuilder.o number_utils.o number_asformat.o \
|
||||
number_rounding.o number_scientific.o number_utils.o number_asformat.o \
|
||||
number_mapper.o number_multiplier.o number_currencysymbols.o number_skeletons.o number_capi.o \
|
||||
double-conversion.o double-conversion-bignum-dtoa.o double-conversion-bignum.o \
|
||||
double-conversion-cached-powers.o double-conversion-diy-fp.o \
|
||||
double-conversion-fast-dtoa.o double-conversion-strtod.o \
|
||||
numparse_stringsegment.o numparse_parsednumber.o numparse_impl.o \
|
||||
string_segment.o numparse_parsednumber.o numparse_impl.o \
|
||||
numparse_symbols.o numparse_decimal.o numparse_scientific.o numparse_currency.o \
|
||||
numparse_affixes.o numparse_compositions.o numparse_validators.o \
|
||||
numrange_fluent.o numrange_impl.o \
|
||||
erarules.o formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o
|
||||
erarules.o \
|
||||
formattedvalue.o formattedval_iterimpl.o formattedval_sbimpl.o formatted_string_builder.o
|
||||
|
||||
## Header files to install
|
||||
HEADERS = $(srcdir)/unicode/*.h
|
||||
|
|
|
@ -5,14 +5,9 @@
|
|||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#include "number_stringbuilder.h"
|
||||
#include "static_unicode_sets.h"
|
||||
#include "formatted_string_builder.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "number_utils.h"
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::number;
|
||||
using namespace icu::number::impl;
|
||||
|
||||
namespace {
|
||||
|
||||
|
@ -34,7 +29,10 @@ inline void uprv_memmove2(void* dest, const void* src, size_t len) {
|
|||
|
||||
} // namespace
|
||||
|
||||
NumberStringBuilder::NumberStringBuilder() {
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
FormattedStringBuilder::FormattedStringBuilder() {
|
||||
#if U_DEBUG
|
||||
// Initializing the memory to non-zero helps catch some bugs that involve
|
||||
// reading from an improperly terminated string.
|
||||
|
@ -44,18 +42,18 @@ NumberStringBuilder::NumberStringBuilder() {
|
|||
#endif
|
||||
}
|
||||
|
||||
NumberStringBuilder::~NumberStringBuilder() {
|
||||
FormattedStringBuilder::~FormattedStringBuilder() {
|
||||
if (fUsingHeap) {
|
||||
uprv_free(fChars.heap.ptr);
|
||||
uprv_free(fFields.heap.ptr);
|
||||
}
|
||||
}
|
||||
|
||||
NumberStringBuilder::NumberStringBuilder(const NumberStringBuilder &other) {
|
||||
FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) {
|
||||
*this = other;
|
||||
}
|
||||
|
||||
NumberStringBuilder &NumberStringBuilder::operator=(const NumberStringBuilder &other) {
|
||||
FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) {
|
||||
// Check for self-assignment
|
||||
if (this == &other) {
|
||||
return *this;
|
||||
|
@ -78,7 +76,7 @@ NumberStringBuilder &NumberStringBuilder::operator=(const NumberStringBuilder &o
|
|||
// UErrorCode is not available; fail silently.
|
||||
uprv_free(newChars);
|
||||
uprv_free(newFields);
|
||||
*this = NumberStringBuilder(); // can't fail
|
||||
*this = FormattedStringBuilder(); // can't fail
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -97,15 +95,15 @@ NumberStringBuilder &NumberStringBuilder::operator=(const NumberStringBuilder &o
|
|||
return *this;
|
||||
}
|
||||
|
||||
int32_t NumberStringBuilder::length() const {
|
||||
int32_t FormattedStringBuilder::length() const {
|
||||
return fLength;
|
||||
}
|
||||
|
||||
int32_t NumberStringBuilder::codePointCount() const {
|
||||
int32_t FormattedStringBuilder::codePointCount() const {
|
||||
return u_countChar32(getCharPtr() + fZero, fLength);
|
||||
}
|
||||
|
||||
UChar32 NumberStringBuilder::getFirstCodePoint() const {
|
||||
UChar32 FormattedStringBuilder::getFirstCodePoint() const {
|
||||
if (fLength == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
@ -114,7 +112,7 @@ UChar32 NumberStringBuilder::getFirstCodePoint() const {
|
|||
return cp;
|
||||
}
|
||||
|
||||
UChar32 NumberStringBuilder::getLastCodePoint() const {
|
||||
UChar32 FormattedStringBuilder::getLastCodePoint() const {
|
||||
if (fLength == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
@ -125,13 +123,13 @@ UChar32 NumberStringBuilder::getLastCodePoint() const {
|
|||
return cp;
|
||||
}
|
||||
|
||||
UChar32 NumberStringBuilder::codePointAt(int32_t index) const {
|
||||
UChar32 FormattedStringBuilder::codePointAt(int32_t index) const {
|
||||
UChar32 cp;
|
||||
U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
|
||||
return cp;
|
||||
}
|
||||
|
||||
UChar32 NumberStringBuilder::codePointBefore(int32_t index) const {
|
||||
UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const {
|
||||
int32_t offset = index;
|
||||
U16_BACK_1(getCharPtr() + fZero, 0, offset);
|
||||
UChar32 cp;
|
||||
|
@ -139,19 +137,19 @@ UChar32 NumberStringBuilder::codePointBefore(int32_t index) const {
|
|||
return cp;
|
||||
}
|
||||
|
||||
NumberStringBuilder &NumberStringBuilder::clear() {
|
||||
FormattedStringBuilder &FormattedStringBuilder::clear() {
|
||||
// TODO: Reset the heap here?
|
||||
fZero = getCapacity() / 2;
|
||||
fLength = 0;
|
||||
return *this;
|
||||
}
|
||||
|
||||
int32_t NumberStringBuilder::appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) {
|
||||
int32_t FormattedStringBuilder::appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status) {
|
||||
return insertCodePoint(fLength, codePoint, field, status);
|
||||
}
|
||||
|
||||
int32_t
|
||||
NumberStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
|
||||
FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
|
||||
int32_t count = U16_LENGTH(codePoint);
|
||||
int32_t position = prepareForInsert(index, count, status);
|
||||
if (U_FAILURE(status)) {
|
||||
|
@ -168,11 +166,11 @@ NumberStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field fie
|
|||
return count;
|
||||
}
|
||||
|
||||
int32_t NumberStringBuilder::append(const UnicodeString &unistr, Field field, UErrorCode &status) {
|
||||
int32_t FormattedStringBuilder::append(const UnicodeString &unistr, Field field, UErrorCode &status) {
|
||||
return insert(fLength, unistr, field, status);
|
||||
}
|
||||
|
||||
int32_t NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
|
||||
int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
|
||||
UErrorCode &status) {
|
||||
if (unistr.length() == 0) {
|
||||
// Nothing to insert.
|
||||
|
@ -186,7 +184,7 @@ int32_t NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr,
|
|||
}
|
||||
|
||||
int32_t
|
||||
NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
|
||||
FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
|
||||
Field field, UErrorCode &status) {
|
||||
int32_t count = end - start;
|
||||
int32_t position = prepareForInsert(index, count, status);
|
||||
|
@ -201,7 +199,7 @@ NumberStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t
|
|||
}
|
||||
|
||||
int32_t
|
||||
NumberStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr,
|
||||
FormattedStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr,
|
||||
int32_t startOther, int32_t endOther, Field field, UErrorCode& status) {
|
||||
int32_t thisLength = endThis - startThis;
|
||||
int32_t otherLength = endOther - startOther;
|
||||
|
@ -224,12 +222,12 @@ NumberStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeSt
|
|||
return count;
|
||||
}
|
||||
|
||||
int32_t NumberStringBuilder::append(const NumberStringBuilder &other, UErrorCode &status) {
|
||||
int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) {
|
||||
return insert(fLength, other, status);
|
||||
}
|
||||
|
||||
int32_t
|
||||
NumberStringBuilder::insert(int32_t index, const NumberStringBuilder &other, UErrorCode &status) {
|
||||
FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) {
|
||||
if (this == &other) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return 0;
|
||||
|
@ -250,7 +248,7 @@ NumberStringBuilder::insert(int32_t index, const NumberStringBuilder &other, UEr
|
|||
return count;
|
||||
}
|
||||
|
||||
void NumberStringBuilder::writeTerminator(UErrorCode& status) {
|
||||
void FormattedStringBuilder::writeTerminator(UErrorCode& status) {
|
||||
int32_t position = prepareForInsert(fLength, 1, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
|
@ -260,7 +258,7 @@ void NumberStringBuilder::writeTerminator(UErrorCode& status) {
|
|||
fLength--;
|
||||
}
|
||||
|
||||
int32_t NumberStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
|
||||
int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
|
||||
U_ASSERT(index >= 0);
|
||||
U_ASSERT(index <= fLength);
|
||||
U_ASSERT(count >= 0);
|
||||
|
@ -279,7 +277,7 @@ int32_t NumberStringBuilder::prepareForInsert(int32_t index, int32_t count, UErr
|
|||
}
|
||||
}
|
||||
|
||||
int32_t NumberStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
|
||||
int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
|
||||
int32_t oldCapacity = getCapacity();
|
||||
int32_t oldZero = fZero;
|
||||
char16_t *oldChars = getCharPtr();
|
||||
|
@ -342,7 +340,7 @@ int32_t NumberStringBuilder::prepareForInsertHelper(int32_t index, int32_t count
|
|||
return fZero + index;
|
||||
}
|
||||
|
||||
int32_t NumberStringBuilder::remove(int32_t index, int32_t count) {
|
||||
int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) {
|
||||
// TODO: Reset the heap here? (If the string after removal can fit on stack?)
|
||||
int32_t position = index + fZero;
|
||||
uprv_memmove2(getCharPtr() + position,
|
||||
|
@ -355,18 +353,18 @@ int32_t NumberStringBuilder::remove(int32_t index, int32_t count) {
|
|||
return position;
|
||||
}
|
||||
|
||||
UnicodeString NumberStringBuilder::toUnicodeString() const {
|
||||
UnicodeString FormattedStringBuilder::toUnicodeString() const {
|
||||
return UnicodeString(getCharPtr() + fZero, fLength);
|
||||
}
|
||||
|
||||
const UnicodeString NumberStringBuilder::toTempUnicodeString() const {
|
||||
const UnicodeString FormattedStringBuilder::toTempUnicodeString() const {
|
||||
// Readonly-alias constructor:
|
||||
return UnicodeString(FALSE, getCharPtr() + fZero, fLength);
|
||||
}
|
||||
|
||||
UnicodeString NumberStringBuilder::toDebugString() const {
|
||||
UnicodeString FormattedStringBuilder::toDebugString() const {
|
||||
UnicodeString sb;
|
||||
sb.append(u"<NumberStringBuilder [", -1);
|
||||
sb.append(u"<FormattedStringBuilder [", -1);
|
||||
sb.append(toUnicodeString());
|
||||
sb.append(u"] [", -1);
|
||||
for (int i = 0; i < fLength; i++) {
|
||||
|
@ -419,11 +417,11 @@ UnicodeString NumberStringBuilder::toDebugString() const {
|
|||
return sb;
|
||||
}
|
||||
|
||||
const char16_t *NumberStringBuilder::chars() const {
|
||||
const char16_t *FormattedStringBuilder::chars() const {
|
||||
return getCharPtr() + fZero;
|
||||
}
|
||||
|
||||
bool NumberStringBuilder::contentEquals(const NumberStringBuilder &other) const {
|
||||
bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const {
|
||||
if (fLength != other.fLength) {
|
||||
return false;
|
||||
}
|
||||
|
@ -435,136 +433,7 @@ bool NumberStringBuilder::contentEquals(const NumberStringBuilder &other) const
|
|||
return true;
|
||||
}
|
||||
|
||||
bool NumberStringBuilder::nextFieldPosition(FieldPosition& fp, UErrorCode& status) const {
|
||||
int32_t rawField = fp.getField();
|
||||
|
||||
if (rawField == FieldPosition::DONT_CARE) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (rawField < 0 || rawField >= UNUM_FIELD_COUNT) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
ConstrainedFieldPosition cfpos;
|
||||
cfpos.constrainField(UFIELD_CATEGORY_NUMBER, rawField);
|
||||
cfpos.setState(UFIELD_CATEGORY_NUMBER, rawField, fp.getBeginIndex(), fp.getEndIndex());
|
||||
if (nextPosition(cfpos, 0, status)) {
|
||||
fp.setBeginIndex(cfpos.getStart());
|
||||
fp.setEndIndex(cfpos.getLimit());
|
||||
return true;
|
||||
}
|
||||
|
||||
// Special case: fraction should start after integer if fraction is not present
|
||||
if (rawField == UNUM_FRACTION_FIELD && fp.getEndIndex() == 0) {
|
||||
bool inside = false;
|
||||
int32_t i = fZero;
|
||||
for (; i < fZero + fLength; i++) {
|
||||
if (isIntOrGroup(getFieldPtr()[i]) || getFieldPtr()[i] == UNUM_DECIMAL_SEPARATOR_FIELD) {
|
||||
inside = true;
|
||||
} else if (inside) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
fp.setBeginIndex(i - fZero);
|
||||
fp.setEndIndex(i - fZero);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void NumberStringBuilder::getAllFieldPositions(FieldPositionIteratorHandler& fpih,
|
||||
UErrorCode& status) const {
|
||||
ConstrainedFieldPosition cfpos;
|
||||
while (nextPosition(cfpos, 0, status)) {
|
||||
fpih.addAttribute(cfpos.getField(), cfpos.getStart(), cfpos.getLimit());
|
||||
}
|
||||
}
|
||||
|
||||
// Signal the end of the string using a field that doesn't exist and that is
|
||||
// different from UNUM_FIELD_COUNT, which is used for "null number field".
|
||||
static constexpr Field kEndField = 0xff;
|
||||
|
||||
bool NumberStringBuilder::nextPosition(ConstrainedFieldPosition& cfpos, Field numericField, UErrorCode& /*status*/) const {
|
||||
auto numericCAF = NumFieldUtils::expand(numericField);
|
||||
int32_t fieldStart = -1;
|
||||
Field currField = UNUM_FIELD_COUNT;
|
||||
for (int32_t i = fZero + cfpos.getLimit(); i <= fZero + fLength; i++) {
|
||||
Field _field = (i < fZero + fLength) ? getFieldPtr()[i] : kEndField;
|
||||
// Case 1: currently scanning a field.
|
||||
if (currField != UNUM_FIELD_COUNT) {
|
||||
if (currField != _field) {
|
||||
int32_t end = i - fZero;
|
||||
// Grouping separators can be whitespace; don't throw them out!
|
||||
if (currField != UNUM_GROUPING_SEPARATOR_FIELD) {
|
||||
end = trimBack(i - fZero);
|
||||
}
|
||||
if (end <= fieldStart) {
|
||||
// Entire field position is ignorable; skip.
|
||||
fieldStart = -1;
|
||||
currField = UNUM_FIELD_COUNT;
|
||||
i--; // look at this index again
|
||||
continue;
|
||||
}
|
||||
int32_t start = fieldStart;
|
||||
if (currField != UNUM_GROUPING_SEPARATOR_FIELD) {
|
||||
start = trimFront(start);
|
||||
}
|
||||
auto caf = NumFieldUtils::expand(currField);
|
||||
cfpos.setState(caf.category, caf.field, start, end);
|
||||
return true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// Special case: coalesce the INTEGER if we are pointing at the end of the INTEGER.
|
||||
if (cfpos.matchesField(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)
|
||||
&& i > fZero
|
||||
// don't return the same field twice in a row:
|
||||
&& i - fZero > cfpos.getLimit()
|
||||
&& isIntOrGroup(getFieldPtr()[i - 1])
|
||||
&& !isIntOrGroup(_field)) {
|
||||
int j = i - 1;
|
||||
for (; j >= fZero && isIntOrGroup(getFieldPtr()[j]); j--) {}
|
||||
cfpos.setState(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD, j - fZero + 1, i - fZero);
|
||||
return true;
|
||||
}
|
||||
// Special case: coalesce NUMERIC if we are pointing at the end of the NUMERIC.
|
||||
if (numericField != 0
|
||||
&& cfpos.matchesField(numericCAF.category, numericCAF.field)
|
||||
&& i > fZero
|
||||
// don't return the same field twice in a row:
|
||||
&& (i - fZero > cfpos.getLimit()
|
||||
|| cfpos.getCategory() != numericCAF.category
|
||||
|| cfpos.getField() != numericCAF.field)
|
||||
&& isNumericField(getFieldPtr()[i - 1])
|
||||
&& !isNumericField(_field)) {
|
||||
int j = i - 1;
|
||||
for (; j >= fZero && isNumericField(getFieldPtr()[j]); j--) {}
|
||||
cfpos.setState(numericCAF.category, numericCAF.field, j - fZero + 1, i - fZero);
|
||||
return true;
|
||||
}
|
||||
// Special case: skip over INTEGER; will be coalesced later.
|
||||
if (_field == UNUM_INTEGER_FIELD) {
|
||||
_field = UNUM_FIELD_COUNT;
|
||||
}
|
||||
// Case 2: no field starting at this position.
|
||||
if (_field == UNUM_FIELD_COUNT || _field == kEndField) {
|
||||
continue;
|
||||
}
|
||||
// Case 3: check for field starting at this position
|
||||
auto caf = NumFieldUtils::expand(_field);
|
||||
if (cfpos.matchesField(caf.category, caf.field)) {
|
||||
fieldStart = i - fZero;
|
||||
currField = _field;
|
||||
}
|
||||
}
|
||||
|
||||
U_ASSERT(currField == UNUM_FIELD_COUNT);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool NumberStringBuilder::containsField(Field field) const {
|
||||
bool FormattedStringBuilder::containsField(Field field) const {
|
||||
for (int32_t i = 0; i < fLength; i++) {
|
||||
if (field == fieldAt(i)) {
|
||||
return true;
|
||||
|
@ -573,27 +442,6 @@ bool NumberStringBuilder::containsField(Field field) const {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool NumberStringBuilder::isIntOrGroup(Field field) {
|
||||
return field == UNUM_INTEGER_FIELD
|
||||
|| field == UNUM_GROUPING_SEPARATOR_FIELD;
|
||||
}
|
||||
|
||||
bool NumberStringBuilder::isNumericField(Field field) {
|
||||
return NumFieldUtils::isNumericField(field);
|
||||
}
|
||||
|
||||
int32_t NumberStringBuilder::trimBack(int32_t limit) const {
|
||||
return unisets::get(unisets::DEFAULT_IGNORABLES)->spanBack(
|
||||
getCharPtr() + fZero,
|
||||
limit,
|
||||
USET_SPAN_CONTAINED);
|
||||
}
|
||||
|
||||
int32_t NumberStringBuilder::trimFront(int32_t start) const {
|
||||
return start + unisets::get(unisets::DEFAULT_IGNORABLES)->span(
|
||||
getCharPtr() + fZero + start,
|
||||
fLength - start,
|
||||
USET_SPAN_CONTAINED);
|
||||
}
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
|
@ -9,17 +9,29 @@
|
|||
|
||||
|
||||
#include <cstdint>
|
||||
#include "unicode/numfmt.h"
|
||||
#include "unicode/ustring.h"
|
||||
#include "unicode/unum.h" // for UNUM_FIELD_COUNT
|
||||
#include "cstring.h"
|
||||
#include "uassert.h"
|
||||
#include "number_types.h"
|
||||
#include "fphdlimp.h"
|
||||
|
||||
U_NAMESPACE_BEGIN namespace number {
|
||||
namespace impl {
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
class U_I18N_API NumberStringBuilder : public UMemory {
|
||||
class FormattedValueStringBuilderImpl;
|
||||
|
||||
/**
|
||||
* A StringBuilder optimized for formatting. It implements the following key
|
||||
* features beyond a UnicodeString:
|
||||
*
|
||||
* <ol>
|
||||
* <li>Efficient prepend as well as append.
|
||||
* <li>Keeps tracks of Fields in an efficient manner.
|
||||
* </ol>
|
||||
*
|
||||
* See also FormattedValueStringBuilderImpl.
|
||||
*
|
||||
* @author sffc (Shane Carr)
|
||||
*/
|
||||
class U_I18N_API FormattedStringBuilder : public UMemory {
|
||||
private:
|
||||
static const int32_t DEFAULT_CAPACITY = 40;
|
||||
|
||||
|
@ -33,13 +45,19 @@ class U_I18N_API NumberStringBuilder : public UMemory {
|
|||
};
|
||||
|
||||
public:
|
||||
NumberStringBuilder();
|
||||
FormattedStringBuilder();
|
||||
|
||||
~NumberStringBuilder();
|
||||
~FormattedStringBuilder();
|
||||
|
||||
NumberStringBuilder(const NumberStringBuilder &other);
|
||||
FormattedStringBuilder(const FormattedStringBuilder &other);
|
||||
|
||||
NumberStringBuilder &operator=(const NumberStringBuilder &other);
|
||||
// Convention: bottom 4 bits for field, top 4 bits for field category.
|
||||
// Field category 0 implies the number category so that the number field
|
||||
// literals can be directly passed as a Field type.
|
||||
// See the helper functions in "StringBuilderFieldUtils" below.
|
||||
typedef uint8_t Field;
|
||||
|
||||
FormattedStringBuilder &operator=(const FormattedStringBuilder &other);
|
||||
|
||||
int32_t length() const;
|
||||
|
||||
|
@ -65,7 +83,7 @@ class U_I18N_API NumberStringBuilder : public UMemory {
|
|||
|
||||
UChar32 codePointBefore(int32_t index) const;
|
||||
|
||||
NumberStringBuilder &clear();
|
||||
FormattedStringBuilder &clear();
|
||||
|
||||
int32_t appendCodePoint(UChar32 codePoint, Field field, UErrorCode &status);
|
||||
|
||||
|
@ -81,19 +99,19 @@ class U_I18N_API NumberStringBuilder : public UMemory {
|
|||
int32_t splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr,
|
||||
int32_t startOther, int32_t endOther, Field field, UErrorCode& status);
|
||||
|
||||
int32_t append(const NumberStringBuilder &other, UErrorCode &status);
|
||||
int32_t append(const FormattedStringBuilder &other, UErrorCode &status);
|
||||
|
||||
int32_t insert(int32_t index, const NumberStringBuilder &other, UErrorCode &status);
|
||||
int32_t insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status);
|
||||
|
||||
void writeTerminator(UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Gets a "safe" UnicodeString that can be used even after the NumberStringBuilder is destructed.
|
||||
* Gets a "safe" UnicodeString that can be used even after the FormattedStringBuilder is destructed.
|
||||
* */
|
||||
UnicodeString toUnicodeString() const;
|
||||
|
||||
/**
|
||||
* Gets an "unsafe" UnicodeString that is valid only as long as the NumberStringBuilder is alive and
|
||||
* Gets an "unsafe" UnicodeString that is valid only as long as the FormattedStringBuilder is alive and
|
||||
* unchanged. Slightly faster than toUnicodeString().
|
||||
*/
|
||||
const UnicodeString toTempUnicodeString() const;
|
||||
|
@ -102,13 +120,7 @@ class U_I18N_API NumberStringBuilder : public UMemory {
|
|||
|
||||
const char16_t *chars() const;
|
||||
|
||||
bool contentEquals(const NumberStringBuilder &other) const;
|
||||
|
||||
bool nextFieldPosition(FieldPosition& fp, UErrorCode& status) const;
|
||||
|
||||
void getAllFieldPositions(FieldPositionIteratorHandler& fpih, UErrorCode& status) const;
|
||||
|
||||
bool nextPosition(ConstrainedFieldPosition& cfpos, Field numericField, UErrorCode& status) const;
|
||||
bool contentEquals(const FormattedStringBuilder &other) const;
|
||||
|
||||
bool containsField(Field field) const;
|
||||
|
||||
|
@ -145,17 +157,50 @@ class U_I18N_API NumberStringBuilder : public UMemory {
|
|||
|
||||
int32_t remove(int32_t index, int32_t count);
|
||||
|
||||
static bool isIntOrGroup(Field field);
|
||||
|
||||
static bool isNumericField(Field field);
|
||||
|
||||
int32_t trimBack(int32_t limit) const;
|
||||
|
||||
int32_t trimFront(int32_t start) const;
|
||||
friend class FormattedValueStringBuilderImpl;
|
||||
};
|
||||
|
||||
/**
|
||||
* Helper functions for dealing with the Field typedef, which stores fields
|
||||
* in a compressed format.
|
||||
*/
|
||||
class StringBuilderFieldUtils {
|
||||
public:
|
||||
struct CategoryFieldPair {
|
||||
int32_t category;
|
||||
int32_t field;
|
||||
};
|
||||
|
||||
/** Compile-time function to construct a Field from a category and a field */
|
||||
template <int32_t category, int32_t field>
|
||||
static constexpr FormattedStringBuilder::Field compress() {
|
||||
static_assert(category != 0, "cannot use Undefined category in FieldUtils");
|
||||
static_assert(category <= 0xf, "only 4 bits for category");
|
||||
static_assert(field <= 0xf, "only 4 bits for field");
|
||||
return static_cast<int8_t>((category << 4) | field);
|
||||
}
|
||||
|
||||
/** Runtime inline function to unpack the category and field from the Field */
|
||||
static inline CategoryFieldPair expand(FormattedStringBuilder::Field field) {
|
||||
if (field == UNUM_FIELD_COUNT) {
|
||||
return {UFIELD_CATEGORY_UNDEFINED, 0};
|
||||
}
|
||||
CategoryFieldPair ret = {
|
||||
(field >> 4),
|
||||
(field & 0xf)
|
||||
};
|
||||
if (ret.category == 0) {
|
||||
ret.category = UFIELD_CATEGORY_NUMBER;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool isNumericField(FormattedStringBuilder::Field field) {
|
||||
int8_t category = field >> 4;
|
||||
return category == 0 || category == UFIELD_CATEGORY_NUMBER;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace impl
|
||||
} // namespace number
|
||||
U_NAMESPACE_END
|
||||
|
||||
|
|
@ -18,7 +18,7 @@
|
|||
#include "fphdlimp.h"
|
||||
#include "util.h"
|
||||
#include "uvectr32.h"
|
||||
#include "number_stringbuilder.h"
|
||||
#include "formatted_string_builder.h"
|
||||
|
||||
|
||||
/**
|
||||
|
@ -67,7 +67,9 @@ typedef enum UCFPosConstraintType {
|
|||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
||||
/** Implementation using FieldPositionHandler to accept fields. */
|
||||
/**
|
||||
* Implementation of FormattedValue using FieldPositionHandler to accept fields.
|
||||
*/
|
||||
class FormattedValueFieldPositionIteratorImpl : public UMemory, public FormattedValue {
|
||||
public:
|
||||
|
||||
|
@ -112,12 +114,21 @@ private:
|
|||
};
|
||||
|
||||
|
||||
class FormattedValueNumberStringBuilderImpl : public UMemory, public FormattedValue {
|
||||
/**
|
||||
* Implementation of FormattedValue based on FormattedStringBuilder.
|
||||
*
|
||||
* The implementation currently revolves around numbers and number fields.
|
||||
* However, it can be generalized in the future when there is a need.
|
||||
*
|
||||
* @author sffc (Shane Carr)
|
||||
*/
|
||||
// Exported as U_I18N_API for tests
|
||||
class U_I18N_API FormattedValueStringBuilderImpl : public UMemory, public FormattedValue {
|
||||
public:
|
||||
|
||||
FormattedValueNumberStringBuilderImpl(number::impl::Field numericField);
|
||||
FormattedValueStringBuilderImpl(FormattedStringBuilder::Field numericField);
|
||||
|
||||
virtual ~FormattedValueNumberStringBuilderImpl();
|
||||
virtual ~FormattedValueStringBuilderImpl();
|
||||
|
||||
// Implementation of FormattedValue (const):
|
||||
|
||||
|
@ -126,17 +137,25 @@ public:
|
|||
Appendable& appendTo(Appendable& appendable, UErrorCode& status) const U_OVERRIDE;
|
||||
UBool nextPosition(ConstrainedFieldPosition& cfpos, UErrorCode& status) const U_OVERRIDE;
|
||||
|
||||
inline number::impl::NumberStringBuilder& getStringRef() {
|
||||
// Additional helper functions:
|
||||
UBool nextFieldPosition(FieldPosition& fp, UErrorCode& status) const;
|
||||
void getAllFieldPositions(FieldPositionIteratorHandler& fpih, UErrorCode& status) const;
|
||||
inline FormattedStringBuilder& getStringRef() {
|
||||
return fString;
|
||||
}
|
||||
|
||||
inline const number::impl::NumberStringBuilder& getStringRef() const {
|
||||
inline const FormattedStringBuilder& getStringRef() const {
|
||||
return fString;
|
||||
}
|
||||
|
||||
private:
|
||||
number::impl::NumberStringBuilder fString;
|
||||
number::impl::Field fNumericField;
|
||||
FormattedStringBuilder fString;
|
||||
FormattedStringBuilder::Field fNumericField;
|
||||
|
||||
bool nextPositionImpl(ConstrainedFieldPosition& cfpos, FormattedStringBuilder::Field numericField, UErrorCode& status) const;
|
||||
static bool isIntOrGroup(FormattedStringBuilder::Field field);
|
||||
static bool isNumericField(FormattedStringBuilder::Field field);
|
||||
int32_t trimBack(int32_t limit) const;
|
||||
int32_t trimFront(int32_t start) const;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -9,35 +9,203 @@
|
|||
// Other independent implementations should go into their own cpp file for
|
||||
// better dependency modularization.
|
||||
|
||||
#include "unicode/ustring.h"
|
||||
#include "formattedval_impl.h"
|
||||
#include "number_types.h"
|
||||
#include "formatted_string_builder.h"
|
||||
#include "number_utils.h"
|
||||
#include "static_unicode_sets.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
||||
FormattedValueNumberStringBuilderImpl::FormattedValueNumberStringBuilderImpl(number::impl::Field numericField)
|
||||
typedef FormattedStringBuilder::Field Field;
|
||||
|
||||
|
||||
FormattedValueStringBuilderImpl::FormattedValueStringBuilderImpl(Field numericField)
|
||||
: fNumericField(numericField) {
|
||||
}
|
||||
|
||||
FormattedValueNumberStringBuilderImpl::~FormattedValueNumberStringBuilderImpl() {
|
||||
FormattedValueStringBuilderImpl::~FormattedValueStringBuilderImpl() {
|
||||
}
|
||||
|
||||
|
||||
UnicodeString FormattedValueNumberStringBuilderImpl::toString(UErrorCode&) const {
|
||||
UnicodeString FormattedValueStringBuilderImpl::toString(UErrorCode&) const {
|
||||
return fString.toUnicodeString();
|
||||
}
|
||||
|
||||
UnicodeString FormattedValueNumberStringBuilderImpl::toTempString(UErrorCode&) const {
|
||||
UnicodeString FormattedValueStringBuilderImpl::toTempString(UErrorCode&) const {
|
||||
return fString.toTempUnicodeString();
|
||||
}
|
||||
|
||||
Appendable& FormattedValueNumberStringBuilderImpl::appendTo(Appendable& appendable, UErrorCode&) const {
|
||||
Appendable& FormattedValueStringBuilderImpl::appendTo(Appendable& appendable, UErrorCode&) const {
|
||||
appendable.appendString(fString.chars(), fString.length());
|
||||
return appendable;
|
||||
}
|
||||
|
||||
UBool FormattedValueNumberStringBuilderImpl::nextPosition(ConstrainedFieldPosition& cfpos, UErrorCode& status) const {
|
||||
UBool FormattedValueStringBuilderImpl::nextPosition(ConstrainedFieldPosition& cfpos, UErrorCode& status) const {
|
||||
// NOTE: MSVC sometimes complains when implicitly converting between bool and UBool
|
||||
return fString.nextPosition(cfpos, fNumericField, status) ? TRUE : FALSE;
|
||||
return nextPositionImpl(cfpos, fNumericField, status) ? TRUE : FALSE;
|
||||
}
|
||||
|
||||
UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErrorCode& status) const {
|
||||
int32_t rawField = fp.getField();
|
||||
|
||||
if (rawField == FieldPosition::DONT_CARE) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (rawField < 0 || rawField >= UNUM_FIELD_COUNT) {
|
||||
status = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
ConstrainedFieldPosition cfpos;
|
||||
cfpos.constrainField(UFIELD_CATEGORY_NUMBER, rawField);
|
||||
cfpos.setState(UFIELD_CATEGORY_NUMBER, rawField, fp.getBeginIndex(), fp.getEndIndex());
|
||||
if (nextPositionImpl(cfpos, 0, status)) {
|
||||
fp.setBeginIndex(cfpos.getStart());
|
||||
fp.setEndIndex(cfpos.getLimit());
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
// Special case: fraction should start after integer if fraction is not present
|
||||
if (rawField == UNUM_FRACTION_FIELD && fp.getEndIndex() == 0) {
|
||||
bool inside = false;
|
||||
int32_t i = fString.fZero;
|
||||
for (; i < fString.fZero + fString.fLength; i++) {
|
||||
if (isIntOrGroup(fString.getFieldPtr()[i]) || fString.getFieldPtr()[i] == UNUM_DECIMAL_SEPARATOR_FIELD) {
|
||||
inside = true;
|
||||
} else if (inside) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
fp.setBeginIndex(i - fString.fZero);
|
||||
fp.setEndIndex(i - fString.fZero);
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
void FormattedValueStringBuilderImpl::getAllFieldPositions(FieldPositionIteratorHandler& fpih,
|
||||
UErrorCode& status) const {
|
||||
ConstrainedFieldPosition cfpos;
|
||||
while (nextPositionImpl(cfpos, 0, status)) {
|
||||
fpih.addAttribute(cfpos.getField(), cfpos.getStart(), cfpos.getLimit());
|
||||
}
|
||||
}
|
||||
|
||||
// Signal the end of the string using a field that doesn't exist and that is
|
||||
// different from UNUM_FIELD_COUNT, which is used for "null number field".
|
||||
static constexpr Field kEndField = 0xff;
|
||||
|
||||
bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& cfpos, Field numericField, UErrorCode& /*status*/) const {
|
||||
auto numericCAF = StringBuilderFieldUtils::expand(numericField);
|
||||
int32_t fieldStart = -1;
|
||||
Field currField = UNUM_FIELD_COUNT;
|
||||
for (int32_t i = fString.fZero + cfpos.getLimit(); i <= fString.fZero + fString.fLength; i++) {
|
||||
Field _field = (i < fString.fZero + fString.fLength) ? fString.getFieldPtr()[i] : kEndField;
|
||||
// Case 1: currently scanning a field.
|
||||
if (currField != UNUM_FIELD_COUNT) {
|
||||
if (currField != _field) {
|
||||
int32_t end = i - fString.fZero;
|
||||
// Grouping separators can be whitespace; don't throw them out!
|
||||
if (currField != UNUM_GROUPING_SEPARATOR_FIELD) {
|
||||
end = trimBack(i - fString.fZero);
|
||||
}
|
||||
if (end <= fieldStart) {
|
||||
// Entire field position is ignorable; skip.
|
||||
fieldStart = -1;
|
||||
currField = UNUM_FIELD_COUNT;
|
||||
i--; // look at this index again
|
||||
continue;
|
||||
}
|
||||
int32_t start = fieldStart;
|
||||
if (currField != UNUM_GROUPING_SEPARATOR_FIELD) {
|
||||
start = trimFront(start);
|
||||
}
|
||||
auto caf = StringBuilderFieldUtils::expand(currField);
|
||||
cfpos.setState(caf.category, caf.field, start, end);
|
||||
return true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// Special case: coalesce the INTEGER if we are pointing at the end of the INTEGER.
|
||||
if (cfpos.matchesField(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)
|
||||
&& i > fString.fZero
|
||||
// don't return the same field twice in a row:
|
||||
&& i - fString.fZero > cfpos.getLimit()
|
||||
&& isIntOrGroup(fString.getFieldPtr()[i - 1])
|
||||
&& !isIntOrGroup(_field)) {
|
||||
int j = i - 1;
|
||||
for (; j >= fString.fZero && isIntOrGroup(fString.getFieldPtr()[j]); j--) {}
|
||||
cfpos.setState(
|
||||
UFIELD_CATEGORY_NUMBER,
|
||||
UNUM_INTEGER_FIELD,
|
||||
j - fString.fZero + 1,
|
||||
i - fString.fZero);
|
||||
return true;
|
||||
}
|
||||
// Special case: coalesce NUMERIC if we are pointing at the end of the NUMERIC.
|
||||
if (numericField != 0
|
||||
&& cfpos.matchesField(numericCAF.category, numericCAF.field)
|
||||
&& i > fString.fZero
|
||||
// don't return the same field twice in a row:
|
||||
&& (i - fString.fZero > cfpos.getLimit()
|
||||
|| cfpos.getCategory() != numericCAF.category
|
||||
|| cfpos.getField() != numericCAF.field)
|
||||
&& isNumericField(fString.getFieldPtr()[i - 1])
|
||||
&& !isNumericField(_field)) {
|
||||
int j = i - 1;
|
||||
for (; j >= fString.fZero && isNumericField(fString.getFieldPtr()[j]); j--) {}
|
||||
cfpos.setState(
|
||||
numericCAF.category,
|
||||
numericCAF.field,
|
||||
j - fString.fZero + 1,
|
||||
i - fString.fZero);
|
||||
return true;
|
||||
}
|
||||
// Special case: skip over INTEGER; will be coalesced later.
|
||||
if (_field == UNUM_INTEGER_FIELD) {
|
||||
_field = UNUM_FIELD_COUNT;
|
||||
}
|
||||
// Case 2: no field starting at this position.
|
||||
if (_field == UNUM_FIELD_COUNT || _field == kEndField) {
|
||||
continue;
|
||||
}
|
||||
// Case 3: check for field starting at this position
|
||||
auto caf = StringBuilderFieldUtils::expand(_field);
|
||||
if (cfpos.matchesField(caf.category, caf.field)) {
|
||||
fieldStart = i - fString.fZero;
|
||||
currField = _field;
|
||||
}
|
||||
}
|
||||
|
||||
U_ASSERT(currField == UNUM_FIELD_COUNT);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool FormattedValueStringBuilderImpl::isIntOrGroup(Field field) {
|
||||
return field == UNUM_INTEGER_FIELD
|
||||
|| field == UNUM_GROUPING_SEPARATOR_FIELD;
|
||||
}
|
||||
|
||||
bool FormattedValueStringBuilderImpl::isNumericField(Field field) {
|
||||
return StringBuilderFieldUtils::isNumericField(field);
|
||||
}
|
||||
|
||||
int32_t FormattedValueStringBuilderImpl::trimBack(int32_t limit) const {
|
||||
return unisets::get(unisets::DEFAULT_IGNORABLES)->spanBack(
|
||||
fString.getCharPtr() + fString.fZero,
|
||||
limit,
|
||||
USET_SPAN_CONTAINED);
|
||||
}
|
||||
|
||||
int32_t FormattedValueStringBuilderImpl::trimFront(int32_t start) const {
|
||||
return start + unisets::get(unisets::DEFAULT_IGNORABLES)->span(
|
||||
fString.getCharPtr() + fString.fZero + start,
|
||||
fString.fLength - start,
|
||||
USET_SPAN_CONTAINED);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -278,14 +278,14 @@
|
|||
<ClCompile Include="number_patternstring.cpp" />
|
||||
<ClCompile Include="number_rounding.cpp" />
|
||||
<ClCompile Include="number_scientific.cpp" />
|
||||
<ClCompile Include="number_stringbuilder.cpp" />
|
||||
<ClCompile Include="formatted_string_builder.cpp" />
|
||||
<ClCompile Include="number_utils.cpp" />
|
||||
<ClCompile Include="number_mapper.cpp" />
|
||||
<ClCompile Include="number_multiplier.cpp" />
|
||||
<ClCompile Include="number_currencysymbols.cpp" />
|
||||
<ClCompile Include="number_skeletons.cpp" />
|
||||
<ClCompile Include="number_capi.cpp" />
|
||||
<ClCompile Include="numparse_stringsegment.cpp" />
|
||||
<ClCompile Include="string_segment.cpp" />
|
||||
<ClCompile Include="numparse_parsednumber.cpp" />
|
||||
<ClCompile Include="numparse_impl.cpp" />
|
||||
<ClCompile Include="numparse_symbols.cpp" />
|
||||
|
@ -541,7 +541,7 @@
|
|||
<ClInclude Include="number_patternstring.h" />
|
||||
<ClInclude Include="number_roundingutils.h" />
|
||||
<ClInclude Include="number_scientific.h" />
|
||||
<ClInclude Include="number_stringbuilder.h" />
|
||||
<ClInclude Include="formatted_string_builder.h" />
|
||||
<ClInclude Include="number_types.h" />
|
||||
<ClInclude Include="number_utypes.h" />
|
||||
<ClInclude Include="number_utils.h" />
|
||||
|
@ -549,7 +549,7 @@
|
|||
<ClInclude Include="number_multiplier.h" />
|
||||
<ClInclude Include="number_currencysymbols.h" />
|
||||
<ClInclude Include="number_skeletons.h" />
|
||||
<ClInclude Include="numparse_stringsegment.h" />
|
||||
<ClInclude Include="string_segment.h" />
|
||||
<ClInclude Include="numparse_impl.h" />
|
||||
<ClInclude Include="numparse_symbols.h" />
|
||||
<ClInclude Include="numparse_decimal.h" />
|
||||
|
|
|
@ -585,7 +585,7 @@
|
|||
<ClCompile Include="number_scientific.cpp">
|
||||
<Filter>formatting</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="number_stringbuilder.cpp">
|
||||
<ClCompile Include="formatted_string_builder.cpp">
|
||||
<Filter>formatting</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="number_utils.cpp">
|
||||
|
@ -606,7 +606,7 @@
|
|||
<ClCompile Include="number_capi.cpp">
|
||||
<Filter>formatting</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="numparse_stringsegment.cpp">
|
||||
<ClCompile Include="string_segment.cpp">
|
||||
<Filter>formatting</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="numparse_parsednumber.cpp">
|
||||
|
@ -878,7 +878,7 @@
|
|||
<ClInclude Include="number_scientific.h">
|
||||
<Filter>formatting</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="number_stringbuilder.h">
|
||||
<ClInclude Include="formatted_string_builder.h">
|
||||
<Filter>formatting</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="number_types.h">
|
||||
|
@ -902,7 +902,7 @@
|
|||
<ClInclude Include="number_skeletons.h">
|
||||
<Filter>formatting</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="numparse_stringsegment.h">
|
||||
<ClInclude Include="string_segment.h">
|
||||
<Filter>formatting</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="numparse_impl.h">
|
||||
|
|
|
@ -385,14 +385,14 @@
|
|||
<ClCompile Include="number_patternstring.cpp" />
|
||||
<ClCompile Include="number_rounding.cpp" />
|
||||
<ClCompile Include="number_scientific.cpp" />
|
||||
<ClCompile Include="number_stringbuilder.cpp" />
|
||||
<ClCompile Include="formatted_string_builder.cpp" />
|
||||
<ClCompile Include="number_utils.cpp" />
|
||||
<ClCompile Include="number_mapper.cpp" />
|
||||
<ClCompile Include="number_multiplier.cpp" />
|
||||
<ClCompile Include="number_currencysymbols.cpp" />
|
||||
<ClCompile Include="number_skeletons.cpp" />
|
||||
<ClCompile Include="number_capi.cpp" />
|
||||
<ClCompile Include="numparse_stringsegment.cpp" />
|
||||
<ClCompile Include="string_segment.cpp" />
|
||||
<ClCompile Include="numparse_parsednumber.cpp" />
|
||||
<ClCompile Include="numparse_impl.cpp" />
|
||||
<ClCompile Include="numparse_symbols.cpp" />
|
||||
|
@ -646,7 +646,7 @@
|
|||
<ClInclude Include="number_patternstring.h" />
|
||||
<ClInclude Include="number_roundingutils.h" />
|
||||
<ClInclude Include="number_scientific.h" />
|
||||
<ClInclude Include="number_stringbuilder.h" />
|
||||
<ClInclude Include="formatted_string_builder.h" />
|
||||
<ClInclude Include="number_types.h" />
|
||||
<ClInclude Include="number_utypes.h" />
|
||||
<ClInclude Include="number_utils.h" />
|
||||
|
@ -654,7 +654,7 @@
|
|||
<ClInclude Include="number_multiplier.h" />
|
||||
<ClInclude Include="number_currencysymbols.h" />
|
||||
<ClInclude Include="number_skeletons.h" />
|
||||
<ClInclude Include="numparse_stringsegment.h" />
|
||||
<ClInclude Include="string_segment.h" />
|
||||
<ClInclude Include="numparse_impl.h" />
|
||||
<ClInclude Include="numparse_symbols.h" />
|
||||
<ClInclude Include="numparse_decimal.h" />
|
||||
|
|
|
@ -156,7 +156,7 @@ Field AffixUtils::getFieldForType(AffixPatternType type) {
|
|||
}
|
||||
|
||||
int32_t
|
||||
AffixUtils::unescape(const UnicodeString &affixPattern, NumberStringBuilder &output, int32_t position,
|
||||
AffixUtils::unescape(const UnicodeString &affixPattern, FormattedStringBuilder &output, int32_t position,
|
||||
const SymbolProvider &provider, Field field, UErrorCode &status) {
|
||||
int32_t length = 0;
|
||||
AffixTag tag;
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
#include "number_types.h"
|
||||
#include "unicode/stringpiece.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "number_stringbuilder.h"
|
||||
#include "formatted_string_builder.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
U_NAMESPACE_BEGIN namespace number {
|
||||
|
@ -134,16 +134,16 @@ class U_I18N_API AffixUtils {
|
|||
/**
|
||||
* Executes the unescape state machine. Replaces the unquoted characters "-", "+", "%", "‰", and
|
||||
* "¤" with the corresponding symbols provided by the {@link SymbolProvider}, and inserts the
|
||||
* result into the NumberStringBuilder at the requested location.
|
||||
* result into the FormattedStringBuilder at the requested location.
|
||||
*
|
||||
* <p>Example input: "'-'¤x"; example output: "-$x"
|
||||
*
|
||||
* @param affixPattern The original string to be unescaped.
|
||||
* @param output The NumberStringBuilder to mutate with the result.
|
||||
* @param position The index into the NumberStringBuilder to insert the string.
|
||||
* @param output The FormattedStringBuilder to mutate with the result.
|
||||
* @param position The index into the FormattedStringBuilder to insert the string.
|
||||
* @param provider An object to generate locale symbols.
|
||||
*/
|
||||
static int32_t unescape(const UnicodeString& affixPattern, NumberStringBuilder& output,
|
||||
static int32_t unescape(const UnicodeString& affixPattern, FormattedStringBuilder& output,
|
||||
int32_t position, const SymbolProvider& provider, Field field,
|
||||
UErrorCode& status);
|
||||
|
||||
|
|
|
@ -62,12 +62,12 @@ UnicodeString& LocalizedNumberFormatterAsFormat::format(const Formattable& obj,
|
|||
// always return first occurrence:
|
||||
pos.setBeginIndex(0);
|
||||
pos.setEndIndex(0);
|
||||
bool found = data.getStringRef().nextFieldPosition(pos, status);
|
||||
bool found = data.nextFieldPosition(pos, status);
|
||||
if (found && appendTo.length() != 0) {
|
||||
pos.setBeginIndex(pos.getBeginIndex() + appendTo.length());
|
||||
pos.setEndIndex(pos.getEndIndex() + appendTo.length());
|
||||
}
|
||||
appendTo.append(data.getStringRef().toTempUnicodeString());
|
||||
appendTo.append(data.toTempString(status));
|
||||
return appendTo;
|
||||
}
|
||||
|
||||
|
@ -84,10 +84,10 @@ UnicodeString& LocalizedNumberFormatterAsFormat::format(const Formattable& obj,
|
|||
if (U_FAILURE(status)) {
|
||||
return appendTo;
|
||||
}
|
||||
appendTo.append(data.getStringRef().toTempUnicodeString());
|
||||
appendTo.append(data.toTempString(status));
|
||||
if (posIter != nullptr) {
|
||||
FieldPositionIteratorHandler fpih(posIter, status);
|
||||
data.getStringRef().getAllFieldPositions(fpih, status);
|
||||
data.getAllFieldPositions(fpih, status);
|
||||
}
|
||||
return appendTo;
|
||||
}
|
||||
|
|
|
@ -696,7 +696,7 @@ void LocalizedNumberFormatter::formatImpl(impl::UFormattedNumberData* results, U
|
|||
|
||||
void LocalizedNumberFormatter::getAffixImpl(bool isPrefix, bool isNegative, UnicodeString& result,
|
||||
UErrorCode& status) const {
|
||||
NumberStringBuilder string;
|
||||
FormattedStringBuilder string;
|
||||
auto signum = static_cast<int8_t>(isNegative ? -1 : 1);
|
||||
// Always return affixes for plural form OTHER.
|
||||
static const StandardPlural::Form plural = StandardPlural::OTHER;
|
||||
|
|
|
@ -72,7 +72,7 @@ NumberFormatterImpl::NumberFormatterImpl(const MacroProps& macros, UErrorCode& s
|
|||
}
|
||||
|
||||
int32_t NumberFormatterImpl::formatStatic(const MacroProps& macros, DecimalQuantity& inValue,
|
||||
NumberStringBuilder& outString, UErrorCode& status) {
|
||||
FormattedStringBuilder& outString, UErrorCode& status) {
|
||||
NumberFormatterImpl impl(macros, false, status);
|
||||
MicroProps& micros = impl.preProcessUnsafe(inValue, status);
|
||||
if (U_FAILURE(status)) { return 0; }
|
||||
|
@ -83,7 +83,7 @@ int32_t NumberFormatterImpl::formatStatic(const MacroProps& macros, DecimalQuant
|
|||
|
||||
int32_t NumberFormatterImpl::getPrefixSuffixStatic(const MacroProps& macros, int8_t signum,
|
||||
StandardPlural::Form plural,
|
||||
NumberStringBuilder& outString, UErrorCode& status) {
|
||||
FormattedStringBuilder& outString, UErrorCode& status) {
|
||||
NumberFormatterImpl impl(macros, false, status);
|
||||
return impl.getPrefixSuffixUnsafe(signum, plural, outString, status);
|
||||
}
|
||||
|
@ -93,7 +93,7 @@ int32_t NumberFormatterImpl::getPrefixSuffixStatic(const MacroProps& macros, int
|
|||
// The "unsafe" method simply re-uses fMicros, eliminating the extra copy operation.
|
||||
// See MicroProps::processQuantity() for details.
|
||||
|
||||
int32_t NumberFormatterImpl::format(DecimalQuantity& inValue, NumberStringBuilder& outString,
|
||||
int32_t NumberFormatterImpl::format(DecimalQuantity& inValue, FormattedStringBuilder& outString,
|
||||
UErrorCode& status) const {
|
||||
MicroProps micros;
|
||||
preProcess(inValue, micros, status);
|
||||
|
@ -130,7 +130,7 @@ MicroProps& NumberFormatterImpl::preProcessUnsafe(DecimalQuantity& inValue, UErr
|
|||
}
|
||||
|
||||
int32_t NumberFormatterImpl::getPrefixSuffix(int8_t signum, StandardPlural::Form plural,
|
||||
NumberStringBuilder& outString, UErrorCode& status) const {
|
||||
FormattedStringBuilder& outString, UErrorCode& status) const {
|
||||
if (U_FAILURE(status)) { return 0; }
|
||||
// #13453: DecimalFormat wants the affixes from the pattern only (modMiddle, aka pattern modifier).
|
||||
// Safe path: use fImmutablePatternModifier.
|
||||
|
@ -141,7 +141,7 @@ int32_t NumberFormatterImpl::getPrefixSuffix(int8_t signum, StandardPlural::Form
|
|||
}
|
||||
|
||||
int32_t NumberFormatterImpl::getPrefixSuffixUnsafe(int8_t signum, StandardPlural::Form plural,
|
||||
NumberStringBuilder& outString, UErrorCode& status) {
|
||||
FormattedStringBuilder& outString, UErrorCode& status) {
|
||||
if (U_FAILURE(status)) { return 0; }
|
||||
// #13453: DecimalFormat wants the affixes from the pattern only (modMiddle, aka pattern modifier).
|
||||
// Unsafe path: use fPatternModifier.
|
||||
|
@ -430,7 +430,7 @@ NumberFormatterImpl::resolvePluralRules(const PluralRules* rulesPtr, const Local
|
|||
return fRules.getAlias();
|
||||
}
|
||||
|
||||
int32_t NumberFormatterImpl::writeAffixes(const MicroProps& micros, NumberStringBuilder& string,
|
||||
int32_t NumberFormatterImpl::writeAffixes(const MicroProps& micros, FormattedStringBuilder& string,
|
||||
int32_t start, int32_t end, UErrorCode& status) {
|
||||
// Always apply the inner modifier (which is "strong").
|
||||
int32_t length = micros.modInner->apply(string, start, end, status);
|
||||
|
@ -445,7 +445,7 @@ int32_t NumberFormatterImpl::writeAffixes(const MicroProps& micros, NumberString
|
|||
}
|
||||
|
||||
int32_t NumberFormatterImpl::writeNumber(const MicroProps& micros, DecimalQuantity& quantity,
|
||||
NumberStringBuilder& string, int32_t index,
|
||||
FormattedStringBuilder& string, int32_t index,
|
||||
UErrorCode& status) {
|
||||
int32_t length = 0;
|
||||
if (quantity.isInfinite()) {
|
||||
|
@ -487,7 +487,7 @@ int32_t NumberFormatterImpl::writeNumber(const MicroProps& micros, DecimalQuanti
|
|||
}
|
||||
|
||||
int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps& micros, DecimalQuantity& quantity,
|
||||
NumberStringBuilder& string, int32_t index,
|
||||
FormattedStringBuilder& string, int32_t index,
|
||||
UErrorCode& status) {
|
||||
int length = 0;
|
||||
int integerCount = quantity.getUpperDisplayMagnitude() + 1;
|
||||
|
@ -513,7 +513,7 @@ int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps& micros, Decima
|
|||
}
|
||||
|
||||
int32_t NumberFormatterImpl::writeFractionDigits(const MicroProps& micros, DecimalQuantity& quantity,
|
||||
NumberStringBuilder& string, int32_t index,
|
||||
FormattedStringBuilder& string, int32_t index,
|
||||
UErrorCode& status) {
|
||||
int length = 0;
|
||||
int fractionCount = -quantity.getLowerDisplayMagnitude();
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#define __NUMBER_FORMATIMPL_H__
|
||||
|
||||
#include "number_types.h"
|
||||
#include "number_stringbuilder.h"
|
||||
#include "formatted_string_builder.h"
|
||||
#include "number_patternstring.h"
|
||||
#include "number_utils.h"
|
||||
#include "number_patternmodifier.h"
|
||||
|
@ -35,7 +35,7 @@ class NumberFormatterImpl : public UMemory {
|
|||
* Builds and evaluates an "unsafe" MicroPropsGenerator, which is cheaper but can be used only once.
|
||||
*/
|
||||
static int32_t
|
||||
formatStatic(const MacroProps ¯os, DecimalQuantity &inValue, NumberStringBuilder &outString,
|
||||
formatStatic(const MacroProps ¯os, DecimalQuantity &inValue, FormattedStringBuilder &outString,
|
||||
UErrorCode &status);
|
||||
|
||||
/**
|
||||
|
@ -45,13 +45,13 @@ class NumberFormatterImpl : public UMemory {
|
|||
* the prefix length.
|
||||
*/
|
||||
static int32_t getPrefixSuffixStatic(const MacroProps& macros, int8_t signum,
|
||||
StandardPlural::Form plural, NumberStringBuilder& outString,
|
||||
StandardPlural::Form plural, FormattedStringBuilder& outString,
|
||||
UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Evaluates the "safe" MicroPropsGenerator created by "fromMacros".
|
||||
*/
|
||||
int32_t format(DecimalQuantity& inValue, NumberStringBuilder& outString, UErrorCode& status) const;
|
||||
int32_t format(DecimalQuantity& inValue, FormattedStringBuilder& outString, UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* Like format(), but saves the result into an output MicroProps without additional processing.
|
||||
|
@ -61,7 +61,7 @@ class NumberFormatterImpl : public UMemory {
|
|||
/**
|
||||
* Like getPrefixSuffixStatic() but uses the safe compiled object.
|
||||
*/
|
||||
int32_t getPrefixSuffix(int8_t signum, StandardPlural::Form plural, NumberStringBuilder& outString,
|
||||
int32_t getPrefixSuffix(int8_t signum, StandardPlural::Form plural, FormattedStringBuilder& outString,
|
||||
UErrorCode& status) const;
|
||||
|
||||
const MicroProps& getRawMicroProps() const {
|
||||
|
@ -73,12 +73,12 @@ class NumberFormatterImpl : public UMemory {
|
|||
* This method formats only the main number, not affixes.
|
||||
*/
|
||||
static int32_t writeNumber(const MicroProps& micros, DecimalQuantity& quantity,
|
||||
NumberStringBuilder& string, int32_t index, UErrorCode& status);
|
||||
FormattedStringBuilder& string, int32_t index, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Adds the affixes. Intended to be called immediately after formatNumber.
|
||||
*/
|
||||
static int32_t writeAffixes(const MicroProps& micros, NumberStringBuilder& string, int32_t start,
|
||||
static int32_t writeAffixes(const MicroProps& micros, FormattedStringBuilder& string, int32_t start,
|
||||
int32_t end, UErrorCode& status);
|
||||
|
||||
private:
|
||||
|
@ -110,7 +110,7 @@ class NumberFormatterImpl : public UMemory {
|
|||
MicroProps& preProcessUnsafe(DecimalQuantity &inValue, UErrorCode &status);
|
||||
|
||||
int32_t getPrefixSuffixUnsafe(int8_t signum, StandardPlural::Form plural,
|
||||
NumberStringBuilder& outString, UErrorCode& status);
|
||||
FormattedStringBuilder& outString, UErrorCode& status);
|
||||
|
||||
/**
|
||||
* If rulesPtr is non-null, return it. Otherwise, return a PluralRules owned by this object for the
|
||||
|
@ -136,11 +136,11 @@ class NumberFormatterImpl : public UMemory {
|
|||
macrosToMicroGenerator(const MacroProps ¯os, bool safe, UErrorCode &status);
|
||||
|
||||
static int32_t
|
||||
writeIntegerDigits(const MicroProps µs, DecimalQuantity &quantity, NumberStringBuilder &string,
|
||||
writeIntegerDigits(const MicroProps µs, DecimalQuantity &quantity, FormattedStringBuilder &string,
|
||||
int32_t index, UErrorCode &status);
|
||||
|
||||
static int32_t
|
||||
writeFractionDigits(const MicroProps µs, DecimalQuantity &quantity, NumberStringBuilder &string,
|
||||
writeFractionDigits(const MicroProps µs, DecimalQuantity &quantity, FormattedStringBuilder &string,
|
||||
int32_t index, UErrorCode &status);
|
||||
};
|
||||
|
||||
|
|
|
@ -69,7 +69,7 @@ AdoptingModifierStore::~AdoptingModifierStore() {
|
|||
}
|
||||
|
||||
|
||||
int32_t ConstantAffixModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
|
||||
int32_t ConstantAffixModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
|
||||
UErrorCode &status) const {
|
||||
// Insert the suffix first since inserting the prefix will change the rightIndex
|
||||
int length = output.insert(rightIndex, fSuffix, fField, status);
|
||||
|
@ -154,7 +154,7 @@ SimpleModifier::SimpleModifier()
|
|||
: fField(UNUM_FIELD_COUNT), fStrong(false), fPrefixLength(0), fSuffixLength(0) {
|
||||
}
|
||||
|
||||
int32_t SimpleModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
|
||||
int32_t SimpleModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
|
||||
UErrorCode &status) const {
|
||||
return formatAsPrefixSuffix(output, leftIndex, rightIndex, status);
|
||||
}
|
||||
|
@ -203,7 +203,7 @@ bool SimpleModifier::semanticallyEquivalent(const Modifier& other) const {
|
|||
|
||||
|
||||
int32_t
|
||||
SimpleModifier::formatAsPrefixSuffix(NumberStringBuilder &result, int32_t startIndex, int32_t endIndex,
|
||||
SimpleModifier::formatAsPrefixSuffix(FormattedStringBuilder &result, int32_t startIndex, int32_t endIndex,
|
||||
UErrorCode &status) const {
|
||||
if (fSuffixOffset == -1 && fPrefixLength + fSuffixLength > 0) {
|
||||
// There is no argument for the inner number; overwrite the entire segment with our string.
|
||||
|
@ -227,7 +227,7 @@ SimpleModifier::formatAsPrefixSuffix(NumberStringBuilder &result, int32_t startI
|
|||
|
||||
|
||||
int32_t
|
||||
SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, NumberStringBuilder& result,
|
||||
SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, FormattedStringBuilder& result,
|
||||
int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength,
|
||||
Field field, UErrorCode& status) {
|
||||
const UnicodeString& compiledPattern = compiled.compiledPattern;
|
||||
|
@ -284,7 +284,7 @@ SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, NumberStrin
|
|||
}
|
||||
|
||||
|
||||
int32_t ConstantMultiFieldModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
|
||||
int32_t ConstantMultiFieldModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
|
||||
UErrorCode &status) const {
|
||||
int32_t length = output.insert(leftIndex, fPrefix, status);
|
||||
if (fOverwrite) {
|
||||
|
@ -333,8 +333,8 @@ bool ConstantMultiFieldModifier::semanticallyEquivalent(const Modifier& other) c
|
|||
}
|
||||
|
||||
|
||||
CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const NumberStringBuilder &prefix,
|
||||
const NumberStringBuilder &suffix,
|
||||
CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const FormattedStringBuilder &prefix,
|
||||
const FormattedStringBuilder &suffix,
|
||||
bool overwrite,
|
||||
bool strong,
|
||||
const DecimalFormatSymbols &symbols,
|
||||
|
@ -374,7 +374,7 @@ CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const NumberStrin
|
|||
}
|
||||
}
|
||||
|
||||
int32_t CurrencySpacingEnabledModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex,
|
||||
int32_t CurrencySpacingEnabledModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
|
||||
UErrorCode &status) const {
|
||||
// Currency spacing logic
|
||||
int length = 0;
|
||||
|
@ -395,7 +395,7 @@ int32_t CurrencySpacingEnabledModifier::apply(NumberStringBuilder &output, int l
|
|||
}
|
||||
|
||||
int32_t
|
||||
CurrencySpacingEnabledModifier::applyCurrencySpacing(NumberStringBuilder &output, int32_t prefixStart,
|
||||
CurrencySpacingEnabledModifier::applyCurrencySpacing(FormattedStringBuilder &output, int32_t prefixStart,
|
||||
int32_t prefixLen, int32_t suffixStart,
|
||||
int32_t suffixLen,
|
||||
const DecimalFormatSymbols &symbols,
|
||||
|
@ -414,7 +414,7 @@ CurrencySpacingEnabledModifier::applyCurrencySpacing(NumberStringBuilder &output
|
|||
}
|
||||
|
||||
int32_t
|
||||
CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(NumberStringBuilder &output, int32_t index,
|
||||
CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(FormattedStringBuilder &output, int32_t index,
|
||||
EAffix affix,
|
||||
const DecimalFormatSymbols &symbols,
|
||||
UErrorCode &status) {
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
#include "unicode/uniset.h"
|
||||
#include "unicode/simpleformatter.h"
|
||||
#include "standardplural.h"
|
||||
#include "number_stringbuilder.h"
|
||||
#include "formatted_string_builder.h"
|
||||
#include "number_types.h"
|
||||
|
||||
U_NAMESPACE_BEGIN namespace number {
|
||||
|
@ -28,7 +28,7 @@ class U_I18N_API ConstantAffixModifier : public Modifier, public UObject {
|
|||
bool strong)
|
||||
: fPrefix(prefix), fSuffix(suffix), fField(field), fStrong(strong) {}
|
||||
|
||||
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
|
||||
int32_t apply(FormattedStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
|
||||
UErrorCode &status) const U_OVERRIDE;
|
||||
|
||||
int32_t getPrefixLength() const U_OVERRIDE;
|
||||
|
@ -64,7 +64,7 @@ class U_I18N_API SimpleModifier : public Modifier, public UMemory {
|
|||
// Default constructor for LongNameHandler.h
|
||||
SimpleModifier();
|
||||
|
||||
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
|
||||
int32_t apply(FormattedStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
|
||||
UErrorCode &status) const U_OVERRIDE;
|
||||
|
||||
int32_t getPrefixLength() const U_OVERRIDE;
|
||||
|
@ -81,7 +81,7 @@ class U_I18N_API SimpleModifier : public Modifier, public UMemory {
|
|||
|
||||
/**
|
||||
* TODO: This belongs in SimpleFormatterImpl. The only reason I haven't moved it there yet is because
|
||||
* NumberStringBuilder is an internal class and SimpleFormatterImpl feels like it should not depend on it.
|
||||
* FormattedStringBuilder is an internal class and SimpleFormatterImpl feels like it should not depend on it.
|
||||
*
|
||||
* <p>
|
||||
* Formats a value that is already stored inside the StringBuilder <code>result</code> between the indices
|
||||
|
@ -100,22 +100,22 @@ class U_I18N_API SimpleModifier : public Modifier, public UMemory {
|
|||
* @return The number of characters (UTF-16 code points) that were added to the StringBuilder.
|
||||
*/
|
||||
int32_t
|
||||
formatAsPrefixSuffix(NumberStringBuilder& result, int32_t startIndex, int32_t endIndex,
|
||||
formatAsPrefixSuffix(FormattedStringBuilder& result, int32_t startIndex, int32_t endIndex,
|
||||
UErrorCode& status) const;
|
||||
|
||||
/**
|
||||
* TODO: Like above, this belongs with the rest of the SimpleFormatterImpl code.
|
||||
* I put it here so that the SimpleFormatter uses in NumberStringBuilder are near each other.
|
||||
* I put it here so that the SimpleFormatter uses in FormattedStringBuilder are near each other.
|
||||
*
|
||||
* <p>
|
||||
* Applies the compiled two-argument pattern to the NumberStringBuilder.
|
||||
* Applies the compiled two-argument pattern to the FormattedStringBuilder.
|
||||
*
|
||||
* <p>
|
||||
* This method is optimized for the case where the prefix and suffix are often empty, such as
|
||||
* in the range pattern like "{0}-{1}".
|
||||
*/
|
||||
static int32_t
|
||||
formatTwoArgPattern(const SimpleFormatter& compiled, NumberStringBuilder& result,
|
||||
formatTwoArgPattern(const SimpleFormatter& compiled, FormattedStringBuilder& result,
|
||||
int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength,
|
||||
Field field, UErrorCode& status);
|
||||
|
||||
|
@ -131,13 +131,13 @@ class U_I18N_API SimpleModifier : public Modifier, public UMemory {
|
|||
|
||||
/**
|
||||
* An implementation of {@link Modifier} that allows for multiple types of fields in the same modifier. Constructed
|
||||
* based on the contents of two {@link NumberStringBuilder} instances (one for the prefix, one for the suffix).
|
||||
* based on the contents of two {@link FormattedStringBuilder} instances (one for the prefix, one for the suffix).
|
||||
*/
|
||||
class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory {
|
||||
public:
|
||||
ConstantMultiFieldModifier(
|
||||
const NumberStringBuilder &prefix,
|
||||
const NumberStringBuilder &suffix,
|
||||
const FormattedStringBuilder &prefix,
|
||||
const FormattedStringBuilder &suffix,
|
||||
bool overwrite,
|
||||
bool strong,
|
||||
const Modifier::Parameters parameters)
|
||||
|
@ -148,8 +148,8 @@ class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory {
|
|||
fParameters(parameters) {}
|
||||
|
||||
ConstantMultiFieldModifier(
|
||||
const NumberStringBuilder &prefix,
|
||||
const NumberStringBuilder &suffix,
|
||||
const FormattedStringBuilder &prefix,
|
||||
const FormattedStringBuilder &suffix,
|
||||
bool overwrite,
|
||||
bool strong)
|
||||
: fPrefix(prefix),
|
||||
|
@ -157,7 +157,7 @@ class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory {
|
|||
fOverwrite(overwrite),
|
||||
fStrong(strong) {}
|
||||
|
||||
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
|
||||
int32_t apply(FormattedStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
|
||||
UErrorCode &status) const U_OVERRIDE;
|
||||
|
||||
int32_t getPrefixLength() const U_OVERRIDE;
|
||||
|
@ -173,10 +173,10 @@ class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory {
|
|||
bool semanticallyEquivalent(const Modifier& other) const U_OVERRIDE;
|
||||
|
||||
protected:
|
||||
// NOTE: In Java, these are stored as array pointers. In C++, the NumberStringBuilder is stored by
|
||||
// NOTE: In Java, these are stored as array pointers. In C++, the FormattedStringBuilder is stored by
|
||||
// value and is treated internally as immutable.
|
||||
NumberStringBuilder fPrefix;
|
||||
NumberStringBuilder fSuffix;
|
||||
FormattedStringBuilder fPrefix;
|
||||
FormattedStringBuilder fSuffix;
|
||||
bool fOverwrite;
|
||||
bool fStrong;
|
||||
Modifier::Parameters fParameters;
|
||||
|
@ -187,19 +187,19 @@ class U_I18N_API CurrencySpacingEnabledModifier : public ConstantMultiFieldModif
|
|||
public:
|
||||
/** Safe code path */
|
||||
CurrencySpacingEnabledModifier(
|
||||
const NumberStringBuilder &prefix,
|
||||
const NumberStringBuilder &suffix,
|
||||
const FormattedStringBuilder &prefix,
|
||||
const FormattedStringBuilder &suffix,
|
||||
bool overwrite,
|
||||
bool strong,
|
||||
const DecimalFormatSymbols &symbols,
|
||||
UErrorCode &status);
|
||||
|
||||
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
|
||||
int32_t apply(FormattedStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
|
||||
UErrorCode &status) const U_OVERRIDE;
|
||||
|
||||
/** Unsafe code path */
|
||||
static int32_t
|
||||
applyCurrencySpacing(NumberStringBuilder &output, int32_t prefixStart, int32_t prefixLen,
|
||||
applyCurrencySpacing(FormattedStringBuilder &output, int32_t prefixStart, int32_t prefixLen,
|
||||
int32_t suffixStart, int32_t suffixLen, const DecimalFormatSymbols &symbols,
|
||||
UErrorCode &status);
|
||||
|
||||
|
@ -218,7 +218,7 @@ class U_I18N_API CurrencySpacingEnabledModifier : public ConstantMultiFieldModif
|
|||
};
|
||||
|
||||
/** Unsafe code path */
|
||||
static int32_t applyCurrencySpacingAffix(NumberStringBuilder &output, int32_t index, EAffix affix,
|
||||
static int32_t applyCurrencySpacingAffix(FormattedStringBuilder &output, int32_t index, EAffix affix,
|
||||
const DecimalFormatSymbols &symbols, UErrorCode &status);
|
||||
|
||||
static UnicodeSet
|
||||
|
@ -234,7 +234,7 @@ class U_I18N_API EmptyModifier : public Modifier, public UMemory {
|
|||
public:
|
||||
explicit EmptyModifier(bool isStrong) : fStrong(isStrong) {}
|
||||
|
||||
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
|
||||
int32_t apply(FormattedStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
|
||||
UErrorCode &status) const U_OVERRIDE {
|
||||
(void)output;
|
||||
(void)leftIndex;
|
||||
|
|
|
@ -20,8 +20,7 @@ UPRV_FORMATTED_VALUE_SUBCLASS_AUTO_IMPL(FormattedNumber)
|
|||
|
||||
UBool FormattedNumber::nextFieldPosition(FieldPosition& fieldPosition, UErrorCode& status) const {
|
||||
UPRV_FORMATTED_VALUE_METHOD_GUARD(FALSE)
|
||||
// NOTE: MSVC sometimes complains when implicitly converting between bool and UBool
|
||||
return fData->getStringRef().nextFieldPosition(fieldPosition, status) ? TRUE : FALSE;
|
||||
return fData->nextFieldPosition(fieldPosition, status);
|
||||
}
|
||||
|
||||
void FormattedNumber::getAllFieldPositions(FieldPositionIterator& iterator, UErrorCode& status) const {
|
||||
|
@ -32,7 +31,7 @@ void FormattedNumber::getAllFieldPositions(FieldPositionIterator& iterator, UErr
|
|||
void FormattedNumber::getAllFieldPositionsImpl(FieldPositionIteratorHandler& fpih,
|
||||
UErrorCode& status) const {
|
||||
UPRV_FORMATTED_VALUE_METHOD_GUARD(UPRV_NOARG)
|
||||
fData->getStringRef().getAllFieldPositions(fpih, status);
|
||||
fData->getAllFieldPositions(fpih, status);
|
||||
}
|
||||
|
||||
void FormattedNumber::getDecimalQuantity(impl::DecimalQuantity& output, UErrorCode& status) const {
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
#include "unicode/numberformatter.h"
|
||||
#include "number_types.h"
|
||||
#include "number_stringbuilder.h"
|
||||
#include "formatted_string_builder.h"
|
||||
#include "number_decimfmtprops.h"
|
||||
|
||||
using namespace icu;
|
||||
|
@ -17,7 +17,7 @@ using namespace icu::number::impl;
|
|||
namespace {
|
||||
|
||||
int32_t
|
||||
addPaddingHelper(UChar32 paddingCp, int32_t requiredPadding, NumberStringBuilder &string, int32_t index,
|
||||
addPaddingHelper(UChar32 paddingCp, int32_t requiredPadding, FormattedStringBuilder &string, int32_t index,
|
||||
UErrorCode &status) {
|
||||
for (int32_t i = 0; i < requiredPadding; i++) {
|
||||
// TODO: If appending to the end, this will cause actual insertion operations. Improve.
|
||||
|
@ -60,7 +60,7 @@ Padder Padder::forProperties(const DecimalFormatProperties& properties) {
|
|||
}
|
||||
|
||||
int32_t Padder::padAndApply(const Modifier &mod1, const Modifier &mod2,
|
||||
NumberStringBuilder &string, int32_t leftIndex, int32_t rightIndex,
|
||||
FormattedStringBuilder &string, int32_t leftIndex, int32_t rightIndex,
|
||||
UErrorCode &status) const {
|
||||
int32_t modLength = mod1.getCodePointCount() + mod2.getCodePointCount();
|
||||
int32_t requiredPadding = fWidth - modLength - string.codePointCount();
|
||||
|
|
|
@ -108,8 +108,8 @@ MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator* paren
|
|||
}
|
||||
|
||||
ConstantMultiFieldModifier* MutablePatternModifier::createConstantModifier(UErrorCode& status) {
|
||||
NumberStringBuilder a;
|
||||
NumberStringBuilder b;
|
||||
FormattedStringBuilder a;
|
||||
FormattedStringBuilder b;
|
||||
insertPrefix(a, 0, status);
|
||||
insertSuffix(b, 0, status);
|
||||
if (fPatternInfo->hasCurrencySign()) {
|
||||
|
@ -170,7 +170,7 @@ void MutablePatternModifier::processQuantity(DecimalQuantity& fq, MicroProps& mi
|
|||
micros.modMiddle = this;
|
||||
}
|
||||
|
||||
int32_t MutablePatternModifier::apply(NumberStringBuilder& output, int32_t leftIndex, int32_t rightIndex,
|
||||
int32_t MutablePatternModifier::apply(FormattedStringBuilder& output, int32_t leftIndex, int32_t rightIndex,
|
||||
UErrorCode& status) const {
|
||||
// The unsafe code path performs self-mutation, so we need a const_cast.
|
||||
// This method needs to be const because it overrides a const method in the parent class.
|
||||
|
@ -248,13 +248,13 @@ bool MutablePatternModifier::semanticallyEquivalent(const Modifier& other) const
|
|||
UPRV_UNREACHABLE;
|
||||
}
|
||||
|
||||
int32_t MutablePatternModifier::insertPrefix(NumberStringBuilder& sb, int position, UErrorCode& status) {
|
||||
int32_t MutablePatternModifier::insertPrefix(FormattedStringBuilder& sb, int position, UErrorCode& status) {
|
||||
prepareAffix(true);
|
||||
int32_t length = AffixUtils::unescape(currentAffix, sb, position, *this, fField, status);
|
||||
return length;
|
||||
}
|
||||
|
||||
int32_t MutablePatternModifier::insertSuffix(NumberStringBuilder& sb, int position, UErrorCode& status) {
|
||||
int32_t MutablePatternModifier::insertSuffix(FormattedStringBuilder& sb, int position, UErrorCode& status) {
|
||||
prepareAffix(false);
|
||||
int32_t length = AffixUtils::unescape(currentAffix, sb, position, *this, fField, status);
|
||||
return length;
|
||||
|
|
|
@ -184,7 +184,7 @@ class U_I18N_API MutablePatternModifier
|
|||
|
||||
void processQuantity(DecimalQuantity &, MicroProps µs, UErrorCode &status) const U_OVERRIDE;
|
||||
|
||||
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
|
||||
int32_t apply(FormattedStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
|
||||
UErrorCode &status) const U_OVERRIDE;
|
||||
|
||||
int32_t getPrefixLength() const U_OVERRIDE;
|
||||
|
@ -240,17 +240,17 @@ class U_I18N_API MutablePatternModifier
|
|||
* CREATES A NEW HEAP OBJECT; THE CALLER GETS OWNERSHIP.
|
||||
*
|
||||
* @param a
|
||||
* A working NumberStringBuilder object; passed from the outside to prevent the need to create many new
|
||||
* A working FormattedStringBuilder object; passed from the outside to prevent the need to create many new
|
||||
* instances if this method is called in a loop.
|
||||
* @param b
|
||||
* Another working NumberStringBuilder object.
|
||||
* Another working FormattedStringBuilder object.
|
||||
* @return The constant modifier object.
|
||||
*/
|
||||
ConstantMultiFieldModifier *createConstantModifier(UErrorCode &status);
|
||||
|
||||
int32_t insertPrefix(NumberStringBuilder &sb, int position, UErrorCode &status);
|
||||
int32_t insertPrefix(FormattedStringBuilder &sb, int position, UErrorCode &status);
|
||||
|
||||
int32_t insertSuffix(NumberStringBuilder &sb, int position, UErrorCode &status);
|
||||
int32_t insertSuffix(FormattedStringBuilder &sb, int position, UErrorCode &status);
|
||||
|
||||
void prepareAffix(bool isPrefix);
|
||||
};
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#include <cstdlib>
|
||||
#include "number_scientific.h"
|
||||
#include "number_utils.h"
|
||||
#include "number_stringbuilder.h"
|
||||
#include "formatted_string_builder.h"
|
||||
#include "unicode/unum.h"
|
||||
#include "number_microprops.h"
|
||||
|
||||
|
@ -36,7 +36,7 @@ void ScientificModifier::set(int32_t exponent, const ScientificHandler *handler)
|
|||
fHandler = handler;
|
||||
}
|
||||
|
||||
int32_t ScientificModifier::apply(NumberStringBuilder &output, int32_t /*leftIndex*/, int32_t rightIndex,
|
||||
int32_t ScientificModifier::apply(FormattedStringBuilder &output, int32_t /*leftIndex*/, int32_t rightIndex,
|
||||
UErrorCode &status) const {
|
||||
// FIXME: Localized exponent separator location.
|
||||
int i = rightIndex;
|
||||
|
|
|
@ -21,7 +21,7 @@ class U_I18N_API ScientificModifier : public UMemory, public Modifier {
|
|||
|
||||
void set(int32_t exponent, const ScientificHandler *handler);
|
||||
|
||||
int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
|
||||
int32_t apply(FormattedStringBuilder &output, int32_t leftIndex, int32_t rightIndex,
|
||||
UErrorCode &status) const U_OVERRIDE;
|
||||
|
||||
int32_t getPrefixLength() const U_OVERRIDE;
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include "unicode/numberformatter.h"
|
||||
#include "uinvchar.h"
|
||||
#include "charstr.h"
|
||||
#include "string_segment.h"
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::number;
|
||||
|
@ -1217,7 +1218,7 @@ void blueprint_helpers::parseIntegerWidthOption(const StringSegment& segment, Ma
|
|||
maxInt = 0;
|
||||
}
|
||||
for (; offset < segment.length(); offset++) {
|
||||
if (segment.charAt(offset) == u'#') {
|
||||
if (maxInt != -1 && segment.charAt(offset) == u'#') {
|
||||
maxInt++;
|
||||
} else {
|
||||
break;
|
||||
|
|
|
@ -10,10 +10,10 @@
|
|||
#include "number_types.h"
|
||||
#include "numparse_types.h"
|
||||
#include "unicode/ucharstrie.h"
|
||||
#include "string_segment.h"
|
||||
|
||||
using icu::numparse::impl::StringSegment;
|
||||
|
||||
U_NAMESPACE_BEGIN namespace number {
|
||||
U_NAMESPACE_BEGIN
|
||||
namespace number {
|
||||
namespace impl {
|
||||
|
||||
// Forward-declaration
|
||||
|
|
|
@ -17,17 +17,16 @@
|
|||
#include "unicode/platform.h"
|
||||
#include "unicode/uniset.h"
|
||||
#include "standardplural.h"
|
||||
#include "formatted_string_builder.h"
|
||||
|
||||
U_NAMESPACE_BEGIN namespace number {
|
||||
U_NAMESPACE_BEGIN
|
||||
namespace number {
|
||||
namespace impl {
|
||||
|
||||
// Typedef several enums for brevity and for easier comparison to Java.
|
||||
// For convenience and historical reasons, import the Field typedef to the namespace.
|
||||
typedef FormattedStringBuilder::Field Field;
|
||||
|
||||
// Convention: bottom 4 bits for field, top 4 bits for field category.
|
||||
// Field category 0 implies the number category so that the number field
|
||||
// literals can be directly passed as a Field type.
|
||||
// See the helper functions in "NumFieldUtils" in number_utils.h
|
||||
typedef uint8_t Field;
|
||||
// Typedef several enums for brevity and for easier comparison to Java.
|
||||
|
||||
typedef UNumberFormatRoundingMode RoundingMode;
|
||||
|
||||
|
@ -49,7 +48,6 @@ static constexpr char16_t kFallbackPaddingString[] = u" ";
|
|||
class Modifier;
|
||||
class MutablePatternModifier;
|
||||
class DecimalQuantity;
|
||||
class NumberStringBuilder;
|
||||
class ModifierStore;
|
||||
struct MicroProps;
|
||||
|
||||
|
@ -160,7 +158,7 @@ class U_I18N_API Modifier {
|
|||
* formatted.
|
||||
* @return The number of characters (UTF-16 code units) that were added to the string builder.
|
||||
*/
|
||||
virtual int32_t apply(NumberStringBuilder& output, int leftIndex, int rightIndex,
|
||||
virtual int32_t apply(FormattedStringBuilder& output, int leftIndex, int rightIndex,
|
||||
UErrorCode& status) const = 0;
|
||||
|
||||
/**
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "number_roundingutils.h"
|
||||
#include "decNumber.h"
|
||||
#include "charstr.h"
|
||||
#include "formatted_string_builder.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -32,52 +33,10 @@ enum CldrPatternStyle {
|
|||
CLDR_PATTERN_STYLE_COUNT,
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Helper functions for dealing with the Field typedef, which stores fields
|
||||
* in a compressed format.
|
||||
*/
|
||||
class NumFieldUtils {
|
||||
public:
|
||||
struct CategoryFieldPair {
|
||||
int32_t category;
|
||||
int32_t field;
|
||||
};
|
||||
|
||||
/** Compile-time function to construct a Field from a category and a field */
|
||||
template <int32_t category, int32_t field>
|
||||
static constexpr Field compress() {
|
||||
static_assert(category != 0, "cannot use Undefined category in NumFieldUtils");
|
||||
static_assert(category <= 0xf, "only 4 bits for category");
|
||||
static_assert(field <= 0xf, "only 4 bits for field");
|
||||
return static_cast<int8_t>((category << 4) | field);
|
||||
}
|
||||
|
||||
/** Runtime inline function to unpack the category and field from the Field */
|
||||
static inline CategoryFieldPair expand(Field field) {
|
||||
if (field == UNUM_FIELD_COUNT) {
|
||||
return {UFIELD_CATEGORY_UNDEFINED, 0};
|
||||
}
|
||||
CategoryFieldPair ret = {
|
||||
(field >> 4),
|
||||
(field & 0xf)
|
||||
};
|
||||
if (ret.category == 0) {
|
||||
ret.category = UFIELD_CATEGORY_NUMBER;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool isNumericField(Field field) {
|
||||
int8_t category = field >> 4;
|
||||
return category == 0 || category == UFIELD_CATEGORY_NUMBER;
|
||||
}
|
||||
};
|
||||
|
||||
// Namespace for naked functions
|
||||
namespace utils {
|
||||
|
||||
inline int32_t insertDigitFromSymbols(NumberStringBuilder& output, int32_t index, int8_t digit,
|
||||
inline int32_t insertDigitFromSymbols(FormattedStringBuilder& output, int32_t index, int8_t digit,
|
||||
const DecimalFormatSymbols& symbols, Field field,
|
||||
UErrorCode& status) {
|
||||
if (symbols.getCodePointZero() != -1) {
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#include "unicode/numberformatter.h"
|
||||
#include "number_types.h"
|
||||
#include "number_decimalquantity.h"
|
||||
#include "number_stringbuilder.h"
|
||||
#include "formatted_string_builder.h"
|
||||
#include "formattedval_impl.h"
|
||||
|
||||
U_NAMESPACE_BEGIN namespace number {
|
||||
|
@ -31,9 +31,9 @@ const DecimalQuantity* validateUFormattedNumberToDecimalQuantity(
|
|||
* The DecimalQuantity is not currently being used by FormattedNumber, but at some point it could be used
|
||||
* to add a toDecNumber() or similar method.
|
||||
*/
|
||||
class UFormattedNumberData : public FormattedValueNumberStringBuilderImpl {
|
||||
class UFormattedNumberData : public FormattedValueStringBuilderImpl {
|
||||
public:
|
||||
UFormattedNumberData() : FormattedValueNumberStringBuilderImpl(0) {}
|
||||
UFormattedNumberData() : FormattedValueStringBuilderImpl(0) {}
|
||||
virtual ~UFormattedNumberData();
|
||||
|
||||
DecimalQuantity quantity;
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include "numparse_affixes.h"
|
||||
#include "numparse_utils.h"
|
||||
#include "number_utils.h"
|
||||
#include "string_segment.h"
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::numparse;
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
#include "numparse_types.h"
|
||||
#include "numparse_compositions.h"
|
||||
#include "string_segment.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
using namespace icu;
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "ucurrimp.h"
|
||||
#include "unicode/errorcode.h"
|
||||
#include "numparse_utils.h"
|
||||
#include "string_segment.h"
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::numparse;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "unicode/uchar.h"
|
||||
#include "putilimp.h"
|
||||
#include "number_decimalquantity.h"
|
||||
#include "string_segment.h"
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::numparse;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "unicode/localpointer.h"
|
||||
#include "numparse_validators.h"
|
||||
#include "number_multiplier.h"
|
||||
#include "string_segment.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
#include "numparse_types.h"
|
||||
#include "number_decimalquantity.h"
|
||||
#include "string_segment.h"
|
||||
#include "putilimp.h"
|
||||
#include <cmath>
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "numparse_types.h"
|
||||
#include "numparse_scientific.h"
|
||||
#include "static_unicode_sets.h"
|
||||
#include "string_segment.h"
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::numparse;
|
||||
|
|
|
@ -1,24 +0,0 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
#ifndef __NUMPARSE_STRINGSEGMENT_H__
|
||||
#define __NUMPARSE_STRINGSEGMENT_H__
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "number_types.h"
|
||||
#include "unicode/unistr.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
namespace numparse {
|
||||
namespace impl {
|
||||
|
||||
|
||||
} // namespace impl
|
||||
} // namespace numparse
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif //__NUMPARSE_STRINGSEGMENT_H__
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
|
@ -12,6 +12,7 @@
|
|||
#include "numparse_types.h"
|
||||
#include "numparse_symbols.h"
|
||||
#include "numparse_utils.h"
|
||||
#include "string_segment.h"
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::numparse;
|
||||
|
|
|
@ -9,12 +9,13 @@
|
|||
|
||||
#include "unicode/uobject.h"
|
||||
#include "number_decimalquantity.h"
|
||||
#include "string_segment.h"
|
||||
|
||||
U_NAMESPACE_BEGIN namespace numparse {
|
||||
U_NAMESPACE_BEGIN
|
||||
namespace numparse {
|
||||
namespace impl {
|
||||
|
||||
// Forward-declarations
|
||||
class StringSegment;
|
||||
class ParsedNumber;
|
||||
|
||||
typedef int32_t result_flags_t;
|
||||
|
@ -169,115 +170,6 @@ class U_I18N_API ParsedNumber {
|
|||
};
|
||||
|
||||
|
||||
/**
|
||||
* A mutable class allowing for a String with a variable offset and length. The charAt, length, and
|
||||
* subSequence methods all operate relative to the fixed offset into the String.
|
||||
*
|
||||
* @author sffc
|
||||
*/
|
||||
// Exported as U_I18N_API for tests
|
||||
class U_I18N_API StringSegment : public UMemory {
|
||||
public:
|
||||
StringSegment(const UnicodeString& str, bool ignoreCase);
|
||||
|
||||
int32_t getOffset() const;
|
||||
|
||||
void setOffset(int32_t start);
|
||||
|
||||
/**
|
||||
* Equivalent to <code>setOffset(getOffset()+delta)</code>.
|
||||
*
|
||||
* <p>
|
||||
* This method is usually called by a Matcher to register that a char was consumed. If the char is
|
||||
* strong (it usually is, except for things like whitespace), follow this with a call to
|
||||
* {@link ParsedNumber#setCharsConsumed}. For more information on strong chars, see that method.
|
||||
*/
|
||||
void adjustOffset(int32_t delta);
|
||||
|
||||
/**
|
||||
* Adjusts the offset by the width of the current code point, either 1 or 2 chars.
|
||||
*/
|
||||
void adjustOffsetByCodePoint();
|
||||
|
||||
void setLength(int32_t length);
|
||||
|
||||
void resetLength();
|
||||
|
||||
int32_t length() const;
|
||||
|
||||
char16_t charAt(int32_t index) const;
|
||||
|
||||
UChar32 codePointAt(int32_t index) const;
|
||||
|
||||
UnicodeString toUnicodeString() const;
|
||||
|
||||
const UnicodeString toTempUnicodeString() const;
|
||||
|
||||
/**
|
||||
* Returns the first code point in the string segment, or -1 if the string starts with an invalid
|
||||
* code point.
|
||||
*
|
||||
* <p>
|
||||
* <strong>Important:</strong> Most of the time, you should use {@link #matches}, which handles case
|
||||
* folding logic, instead of this method.
|
||||
*/
|
||||
UChar32 getCodePoint() const;
|
||||
|
||||
/**
|
||||
* Returns true if the first code point of this StringSegment equals the given code point.
|
||||
*
|
||||
* <p>
|
||||
* This method will perform case folding if case folding is enabled for the parser.
|
||||
*/
|
||||
bool startsWith(UChar32 otherCp) const;
|
||||
|
||||
/**
|
||||
* Returns true if the first code point of this StringSegment is in the given UnicodeSet.
|
||||
*/
|
||||
bool startsWith(const UnicodeSet& uniset) const;
|
||||
|
||||
/**
|
||||
* Returns true if there is at least one code point of overlap between this StringSegment and the
|
||||
* given UnicodeString.
|
||||
*/
|
||||
bool startsWith(const UnicodeString& other) const;
|
||||
|
||||
/**
|
||||
* Returns the length of the prefix shared by this StringSegment and the given CharSequence. For
|
||||
* example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
|
||||
* since the first 2 characters are the same.
|
||||
*
|
||||
* <p>
|
||||
* This method only returns offsets along code point boundaries.
|
||||
*
|
||||
* <p>
|
||||
* This method will perform case folding if case folding was enabled in the constructor.
|
||||
*
|
||||
* <p>
|
||||
* IMPORTANT: The given UnicodeString must not be empty! It is the caller's responsibility to check.
|
||||
*/
|
||||
int32_t getCommonPrefixLength(const UnicodeString& other);
|
||||
|
||||
/**
|
||||
* Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding is
|
||||
* enabled for the parser.
|
||||
*/
|
||||
int32_t getCaseSensitivePrefixLength(const UnicodeString& other);
|
||||
|
||||
bool operator==(const UnicodeString& other) const;
|
||||
|
||||
private:
|
||||
const UnicodeString& fStr;
|
||||
int32_t fStart;
|
||||
int32_t fEnd;
|
||||
bool fFoldCase;
|
||||
|
||||
int32_t getPrefixLengthInternal(const UnicodeString& other, bool foldCase);
|
||||
|
||||
static bool codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase);
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* The core interface implemented by all matchers used for number parsing.
|
||||
*
|
||||
|
|
|
@ -382,7 +382,7 @@ UPRV_FORMATTED_VALUE_SUBCLASS_AUTO_IMPL(FormattedNumberRange)
|
|||
UBool FormattedNumberRange::nextFieldPosition(FieldPosition& fieldPosition, UErrorCode& status) const {
|
||||
UPRV_FORMATTED_VALUE_METHOD_GUARD(FALSE)
|
||||
// NOTE: MSVC sometimes complains when implicitly converting between bool and UBool
|
||||
return fData->getStringRef().nextFieldPosition(fieldPosition, status) ? TRUE : FALSE;
|
||||
return fData->nextFieldPosition(fieldPosition, status);
|
||||
}
|
||||
|
||||
void FormattedNumberRange::getAllFieldPositions(FieldPositionIterator& iterator, UErrorCode& status) const {
|
||||
|
@ -393,7 +393,7 @@ void FormattedNumberRange::getAllFieldPositions(FieldPositionIterator& iterator,
|
|||
void FormattedNumberRange::getAllFieldPositionsImpl(
|
||||
FieldPositionIteratorHandler& fpih, UErrorCode& status) const {
|
||||
UPRV_FORMATTED_VALUE_METHOD_GUARD(UPRV_NOARG)
|
||||
fData->getStringRef().getAllFieldPositions(fpih, status);
|
||||
fData->getAllFieldPositions(fpih, status);
|
||||
}
|
||||
|
||||
UnicodeString FormattedNumberRange::getFirstDecimal(UErrorCode& status) const {
|
||||
|
|
|
@ -397,7 +397,7 @@ void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
|
|||
break;
|
||||
}
|
||||
|
||||
NumberStringBuilder& string = data.getStringRef();
|
||||
FormattedStringBuilder& string = data.getStringRef();
|
||||
int32_t lengthPrefix = 0;
|
||||
int32_t length1 = 0;
|
||||
int32_t lengthInfix = 0;
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
#include "number_types.h"
|
||||
#include "number_decimalquantity.h"
|
||||
#include "number_formatimpl.h"
|
||||
#include "number_stringbuilder.h"
|
||||
#include "formatted_string_builder.h"
|
||||
#include "formattedval_impl.h"
|
||||
|
||||
U_NAMESPACE_BEGIN namespace number {
|
||||
|
@ -29,9 +29,9 @@ namespace impl {
|
|||
* Possible magic number: 0x46445200
|
||||
* Reads in ASCII as "FDR" (FormatteDnumberRange with room at the end)
|
||||
*/
|
||||
class UFormattedNumberRangeData : public FormattedValueNumberStringBuilderImpl {
|
||||
class UFormattedNumberRangeData : public FormattedValueStringBuilderImpl {
|
||||
public:
|
||||
UFormattedNumberRangeData() : FormattedValueNumberStringBuilderImpl(0) {}
|
||||
UFormattedNumberRangeData() : FormattedValueStringBuilderImpl(0) {}
|
||||
virtual ~UFormattedNumberRangeData();
|
||||
|
||||
DecimalQuantity quantity1;
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
#include "uassert.h"
|
||||
#include "number_decimalquantity.h"
|
||||
#include "number_utypes.h"
|
||||
#include "number_stringbuilder.h"
|
||||
#include "formatted_string_builder.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -180,7 +180,7 @@ void QuantityFormatter::formatAndSelect(
|
|||
double quantity,
|
||||
const NumberFormat& fmt,
|
||||
const PluralRules& rules,
|
||||
number::impl::NumberStringBuilder& output,
|
||||
FormattedStringBuilder& output,
|
||||
StandardPlural::Form& pluralForm,
|
||||
UErrorCode& status) {
|
||||
UnicodeString pluralKeyword;
|
||||
|
|
|
@ -26,12 +26,7 @@ class PluralRules;
|
|||
class NumberFormat;
|
||||
class Formattable;
|
||||
class FieldPosition;
|
||||
|
||||
namespace number {
|
||||
namespace impl {
|
||||
class NumberStringBuilder;
|
||||
}
|
||||
}
|
||||
class FormattedStringBuilder;
|
||||
|
||||
/**
|
||||
* A plural aware formatter that is good for expressing a single quantity and
|
||||
|
@ -129,7 +124,7 @@ public:
|
|||
|
||||
/**
|
||||
* Formats a quantity and selects its plural form. The output is appended
|
||||
* to a NumberStringBuilder in order to retain field information.
|
||||
* to a FormattedStringBuilder in order to retain field information.
|
||||
*
|
||||
* @param quantity The number to format.
|
||||
* @param fmt The formatter to use to format the number.
|
||||
|
@ -144,7 +139,7 @@ public:
|
|||
double quantity,
|
||||
const NumberFormat& fmt,
|
||||
const PluralRules& rules,
|
||||
number::impl::NumberStringBuilder& output,
|
||||
FormattedStringBuilder& output,
|
||||
StandardPlural::Form& pluralForm,
|
||||
UErrorCode& status);
|
||||
|
||||
|
|
|
@ -43,7 +43,7 @@
|
|||
#include "standardplural.h"
|
||||
#include "unifiedcache.h"
|
||||
#include "util.h"
|
||||
#include "number_stringbuilder.h"
|
||||
#include "formatted_string_builder.h"
|
||||
#include "number_utypes.h"
|
||||
#include "number_modifiers.h"
|
||||
#include "formattedval_impl.h"
|
||||
|
@ -725,14 +725,14 @@ const RelativeDateTimeCacheData *LocaleCacheKey<RelativeDateTimeCacheData>::crea
|
|||
|
||||
|
||||
static constexpr number::impl::Field kRDTNumericField
|
||||
= number::impl::NumFieldUtils::compress<UFIELD_CATEGORY_RELATIVE_DATETIME, UDAT_REL_NUMERIC_FIELD>();
|
||||
= StringBuilderFieldUtils::compress<UFIELD_CATEGORY_RELATIVE_DATETIME, UDAT_REL_NUMERIC_FIELD>();
|
||||
|
||||
static constexpr number::impl::Field kRDTLiteralField
|
||||
= number::impl::NumFieldUtils::compress<UFIELD_CATEGORY_RELATIVE_DATETIME, UDAT_REL_LITERAL_FIELD>();
|
||||
= StringBuilderFieldUtils::compress<UFIELD_CATEGORY_RELATIVE_DATETIME, UDAT_REL_LITERAL_FIELD>();
|
||||
|
||||
class FormattedRelativeDateTimeData : public FormattedValueNumberStringBuilderImpl {
|
||||
class FormattedRelativeDateTimeData : public FormattedValueStringBuilderImpl {
|
||||
public:
|
||||
FormattedRelativeDateTimeData() : FormattedValueNumberStringBuilderImpl(kRDTNumericField) {}
|
||||
FormattedRelativeDateTimeData() : FormattedValueStringBuilderImpl(kRDTNumericField) {}
|
||||
virtual ~FormattedRelativeDateTimeData();
|
||||
};
|
||||
|
||||
|
|
|
@ -10,14 +10,12 @@
|
|||
#define UNISTR_FROM_STRING_EXPLICIT
|
||||
|
||||
#include "numparse_types.h"
|
||||
#include "numparse_stringsegment.h"
|
||||
#include "string_segment.h"
|
||||
#include "putilimp.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
using namespace icu;
|
||||
using namespace icu::numparse;
|
||||
using namespace icu::numparse::impl;
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
||||
StringSegment::StringSegment(const UnicodeString& str, bool ignoreCase)
|
||||
|
@ -143,4 +141,5 @@ bool StringSegment::operator==(const UnicodeString& other) const {
|
|||
}
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
134
icu4c/source/i18n/string_segment.h
Normal file
134
icu4c/source/i18n/string_segment.h
Normal file
|
@ -0,0 +1,134 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
#ifndef __NUMPARSE_STRINGSEGMENT_H__
|
||||
#define __NUMPARSE_STRINGSEGMENT_H__
|
||||
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/uniset.h"
|
||||
|
||||
U_NAMESPACE_BEGIN
|
||||
|
||||
|
||||
/**
|
||||
* A mutable UnicodeString wrapper with a variable offset and length and
|
||||
* support for case folding. The charAt, length, and subSequence methods all
|
||||
* operate relative to the fixed offset into the UnicodeString.
|
||||
*
|
||||
* Intended to be useful for parsing.
|
||||
*
|
||||
* CAUTION: Since this class is mutable, it must not be used anywhere that an
|
||||
* immutable object is required, like in a cache or as the key of a hash map.
|
||||
*
|
||||
* @author sffc (Shane Carr)
|
||||
*/
|
||||
// Exported as U_I18N_API for tests
|
||||
class U_I18N_API StringSegment : public UMemory {
|
||||
public:
|
||||
StringSegment(const UnicodeString& str, bool ignoreCase);
|
||||
|
||||
int32_t getOffset() const;
|
||||
|
||||
void setOffset(int32_t start);
|
||||
|
||||
/**
|
||||
* Equivalent to <code>setOffset(getOffset()+delta)</code>.
|
||||
*
|
||||
* <p>
|
||||
* This method is usually called by a Matcher to register that a char was consumed. If the char is
|
||||
* strong (it usually is, except for things like whitespace), follow this with a call to
|
||||
* {@link ParsedNumber#setCharsConsumed}. For more information on strong chars, see that method.
|
||||
*/
|
||||
void adjustOffset(int32_t delta);
|
||||
|
||||
/**
|
||||
* Adjusts the offset by the width of the current code point, either 1 or 2 chars.
|
||||
*/
|
||||
void adjustOffsetByCodePoint();
|
||||
|
||||
void setLength(int32_t length);
|
||||
|
||||
void resetLength();
|
||||
|
||||
int32_t length() const;
|
||||
|
||||
char16_t charAt(int32_t index) const;
|
||||
|
||||
UChar32 codePointAt(int32_t index) const;
|
||||
|
||||
UnicodeString toUnicodeString() const;
|
||||
|
||||
const UnicodeString toTempUnicodeString() const;
|
||||
|
||||
/**
|
||||
* Returns the first code point in the string segment, or -1 if the string starts with an invalid
|
||||
* code point.
|
||||
*
|
||||
* <p>
|
||||
* <strong>Important:</strong> Most of the time, you should use {@link #startsWith}, which handles case
|
||||
* folding logic, instead of this method.
|
||||
*/
|
||||
UChar32 getCodePoint() const;
|
||||
|
||||
/**
|
||||
* Returns true if the first code point of this StringSegment equals the given code point.
|
||||
*
|
||||
* <p>
|
||||
* This method will perform case folding if case folding is enabled for the parser.
|
||||
*/
|
||||
bool startsWith(UChar32 otherCp) const;
|
||||
|
||||
/**
|
||||
* Returns true if the first code point of this StringSegment is in the given UnicodeSet.
|
||||
*/
|
||||
bool startsWith(const UnicodeSet& uniset) const;
|
||||
|
||||
/**
|
||||
* Returns true if there is at least one code point of overlap between this StringSegment and the
|
||||
* given UnicodeString.
|
||||
*/
|
||||
bool startsWith(const UnicodeString& other) const;
|
||||
|
||||
/**
|
||||
* Returns the length of the prefix shared by this StringSegment and the given UnicodeString. For
|
||||
* example, if this string segment is "aab", and the char sequence is "aac", this method returns 2,
|
||||
* since the first 2 characters are the same.
|
||||
*
|
||||
* <p>
|
||||
* This method only returns offsets along code point boundaries.
|
||||
*
|
||||
* <p>
|
||||
* This method will perform case folding if case folding was enabled in the constructor.
|
||||
*
|
||||
* <p>
|
||||
* IMPORTANT: The given UnicodeString must not be empty! It is the caller's responsibility to check.
|
||||
*/
|
||||
int32_t getCommonPrefixLength(const UnicodeString& other);
|
||||
|
||||
/**
|
||||
* Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding is
|
||||
* enabled for the parser.
|
||||
*/
|
||||
int32_t getCaseSensitivePrefixLength(const UnicodeString& other);
|
||||
|
||||
bool operator==(const UnicodeString& other) const;
|
||||
|
||||
private:
|
||||
const UnicodeString& fStr;
|
||||
int32_t fStart;
|
||||
int32_t fEnd;
|
||||
bool fFoldCase;
|
||||
|
||||
int32_t getPrefixLengthInternal(const UnicodeString& other, bool foldCase);
|
||||
|
||||
static bool codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase);
|
||||
};
|
||||
|
||||
|
||||
U_NAMESPACE_END
|
||||
|
||||
#endif //__NUMPARSE_STRINGSEGMENT_H__
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
|
@ -85,6 +85,7 @@ U_NAMESPACE_BEGIN
|
|||
// Forward declarations:
|
||||
class IFixedDecimal;
|
||||
class FieldPositionIteratorHandler;
|
||||
class FormattedStringBuilder;
|
||||
|
||||
namespace numparse {
|
||||
namespace impl {
|
||||
|
@ -142,7 +143,6 @@ class MultiplierProducer;
|
|||
class RoundingImpl;
|
||||
class ScientificHandler;
|
||||
class Modifier;
|
||||
class NumberStringBuilder;
|
||||
class AffixPatternProvider;
|
||||
class NumberPropertyMapper;
|
||||
struct DecimalFormatProperties;
|
||||
|
@ -1343,7 +1343,7 @@ class U_I18N_API Padder : public UMemory {
|
|||
}
|
||||
|
||||
int32_t padAndApply(const impl::Modifier &mod1, const impl::Modifier &mod2,
|
||||
impl::NumberStringBuilder &string, int32_t leftIndex, int32_t rightIndex,
|
||||
FormattedStringBuilder &string, int32_t leftIndex, int32_t rightIndex,
|
||||
UErrorCode &status) const;
|
||||
|
||||
// To allow MacroProps/MicroProps to initialize empty instances:
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
* \file
|
||||
* \brief C API: StringSearch
|
||||
*
|
||||
* C Apis for an engine that provides language-sensitive text searching based
|
||||
* C APIs for an engine that provides language-sensitive text searching based
|
||||
* on the comparison rules defined in a <tt>UCollator</tt> data struct,
|
||||
* see <tt>ucol.h</tt>. This ensures that language eccentricity can be
|
||||
* handled, e.g. for the German collator, characters ß and SS will be matched
|
||||
|
@ -55,7 +55,7 @@
|
|||
* <p>
|
||||
* This search has APIs similar to that of other text iteration mechanisms
|
||||
* such as the break iterators in <tt>ubrk.h</tt>. Using these
|
||||
* APIs, it is easy to scan through text looking for all occurances of
|
||||
* APIs, it is easy to scan through text looking for all occurrences of
|
||||
* a given pattern. This search iterator allows changing of direction by
|
||||
* calling a <tt>reset</tt> followed by a <tt>next</tt> or <tt>previous</tt>.
|
||||
* Though a direction change can occur without calling <tt>reset</tt> first,
|
||||
|
@ -130,7 +130,7 @@
|
|||
* pos = usearch_next(search, &status))
|
||||
* {
|
||||
* printf("Found match at %d pos, length is %d\n", pos,
|
||||
* usearch_getMatchLength(search));
|
||||
* usearch_getMatchedLength(search));
|
||||
* }
|
||||
* }
|
||||
*
|
||||
|
@ -479,7 +479,7 @@ U_STABLE int32_t U_EXPORT2 usearch_getMatchedLength(
|
|||
* possible. If the buffer fits the matched text exactly, a null-termination
|
||||
* is not possible, then a U_STRING_NOT_TERMINATED_ERROR set in status.
|
||||
* Pre-flighting can be either done with length = 0 or the API
|
||||
* <tt>usearch_getMatchLength</tt>.
|
||||
* <tt>usearch_getMatchedLength</tt>.
|
||||
* @param strsrch search iterator data struct
|
||||
* @param result UChar buffer to store the matched string
|
||||
* @param resultCapacity length of the result buffer
|
||||
|
@ -766,7 +766,7 @@ U_STABLE void U_EXPORT2 usearch_reset(UStringSearch *strsrch);
|
|||
#ifndef U_HIDE_INTERNAL_API
|
||||
/**
|
||||
* Simple forward search for the pattern, starting at a specified index,
|
||||
* and using using a default set search options.
|
||||
* and using a default set search options.
|
||||
*
|
||||
* This is an experimental function, and is not an official part of the
|
||||
* ICU API.
|
||||
|
@ -783,7 +783,7 @@ U_STABLE void U_EXPORT2 usearch_reset(UStringSearch *strsrch);
|
|||
* are part of a combining sequence, as described below.
|
||||
*
|
||||
* A match will not include a partial combining sequence. Combining
|
||||
* character sequences are considered to be inseperable units,
|
||||
* character sequences are considered to be inseparable units,
|
||||
* and either match the pattern completely, or are considered to not match
|
||||
* at all. Thus, for example, an A followed a combining accent mark will
|
||||
* not be found when searching for a plain (unaccented) A. (unless
|
||||
|
@ -792,7 +792,7 @@ U_STABLE void U_EXPORT2 usearch_reset(UStringSearch *strsrch);
|
|||
* When beginning a search, the initial starting position, startIdx,
|
||||
* is assumed to be an acceptable match boundary with respect to
|
||||
* combining characters. A combining sequence that spans across the
|
||||
* starting point will not supress a match beginning at startIdx.
|
||||
* starting point will not suppress a match beginning at startIdx.
|
||||
*
|
||||
* Characters that expand to multiple collation elements
|
||||
* (German sharp-S becoming 'ss', or the composed forms of accented
|
||||
|
@ -843,7 +843,7 @@ U_INTERNAL UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
|
|||
* are part of a combining sequence, as described below.
|
||||
*
|
||||
* A match will not include a partial combining sequence. Combining
|
||||
* character sequences are considered to be inseperable units,
|
||||
* character sequences are considered to be inseparable units,
|
||||
* and either match the pattern completely, or are considered to not match
|
||||
* at all. Thus, for example, an A followed a combining accent mark will
|
||||
* not be found when searching for a plain (unaccented) A. (unless
|
||||
|
@ -852,7 +852,7 @@ U_INTERNAL UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
|
|||
* When beginning a search, the initial starting position, startIdx,
|
||||
* is assumed to be an acceptable match boundary with respect to
|
||||
* combining characters. A combining sequence that spans across the
|
||||
* starting point will not supress a match beginning at startIdx.
|
||||
* starting point will not suppress a match beginning at startIdx.
|
||||
*
|
||||
* Characters that expand to multiple collation elements
|
||||
* (German sharp-S becoming 'ss', or the composed forms of accented
|
||||
|
|
|
@ -1351,7 +1351,7 @@ inline int getUnblockedAccentIndex(UChar *accents, int32_t *accentsindex)
|
|||
* @param destinationlength target array size, returning the appended length
|
||||
* @param source1 null-terminated first array
|
||||
* @param source2 second array
|
||||
* @param source2length length of seond array
|
||||
* @param source2length length of second array
|
||||
* @param source3 null-terminated third array
|
||||
* @param status error status if any
|
||||
* @return new destination array, destination if there was no new allocation
|
||||
|
@ -1560,7 +1560,7 @@ inline void cleanUpSafeText(const UStringSearch *strsrch, UChar *safetext,
|
|||
|
||||
/**
|
||||
* Take the rearranged end accents and tries matching. If match failed at
|
||||
* a seperate preceding set of accents (seperated from the rearranged on by
|
||||
* a separate preceding set of accents (separated from the rearranged on by
|
||||
* at least a base character) then we rearrange the preceding accents and
|
||||
* tries matching again.
|
||||
* We allow skipping of the ends of the accent set if the ces do not match.
|
||||
|
@ -2220,7 +2220,7 @@ int32_t doPreviousCanonicalSuffixMatch(UStringSearch *strsrch,
|
|||
|
||||
/**
|
||||
* Take the rearranged start accents and tries matching. If match failed at
|
||||
* a seperate following set of accents (seperated from the rearranged on by
|
||||
* a separate following set of accents (separated from the rearranged on by
|
||||
* at least a base character) then we rearrange the preceding accents and
|
||||
* tries matching again.
|
||||
* We allow skipping of the ends of the accent set if the ces do not match.
|
||||
|
@ -3852,7 +3852,7 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
|
|||
|
||||
#endif
|
||||
// Input parameter sanity check.
|
||||
// TODO: should input indicies clip to the text length
|
||||
// TODO: should input indices clip to the text length
|
||||
// in the same way that UText does.
|
||||
if(strsrch->pattern.cesLength == 0 ||
|
||||
startIdx < 0 ||
|
||||
|
@ -4014,7 +4014,7 @@ U_CAPI UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
|
|||
|
||||
// Check for the start of the match being within an Collation Element Expansion,
|
||||
// meaning that the first char of the match is only partially matched.
|
||||
// With exapnsions, the first CE will report the index of the source
|
||||
// With expansions, the first CE will report the index of the source
|
||||
// character, and all subsequent (expansions) CEs will report the source index of the
|
||||
// _following_ character.
|
||||
int32_t secondIx = firstCEI->highIndex;
|
||||
|
|
|
@ -932,7 +932,9 @@ group: double_conversion
|
|||
platform
|
||||
|
||||
group: number_representation
|
||||
number_decimalquantity.o number_stringbuilder.o numparse_stringsegment.o number_utils.o
|
||||
number_decimalquantity.o string_segment.o number_utils.o
|
||||
# TODO(ICU-20429) Move formatted_string_builder to its own unit.
|
||||
formatted_string_builder.o
|
||||
deps
|
||||
decnumber double_conversion
|
||||
# for trimming whitespace around fields
|
||||
|
|
|
@ -64,10 +64,10 @@ scientificnumberformattertest.o datadrivennumberformattestsuite.o \
|
|||
numberformattesttuple.o pluralmaptest.o \
|
||||
numbertest_affixutils.o numbertest_api.o numbertest_decimalquantity.o \
|
||||
numbertest_modifiers.o numbertest_patternmodifier.o numbertest_patternstring.o \
|
||||
numbertest_stringbuilder.o numbertest_stringsegment.o \
|
||||
string_segment_test.o \
|
||||
numbertest_parse.o numbertest_doubleconversion.o numbertest_skeletons.o \
|
||||
static_unisets_test.o numfmtdatadriventest.o numbertest_range.o erarulestest.o \
|
||||
formattedvaluetest.o
|
||||
formattedvaluetest.o formatted_string_builder_test.o
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
||||
|
|
|
@ -1695,6 +1695,49 @@ void DateIntervalFormatTest::testFormattedDateInterval() {
|
|||
UPRV_LENGTHOF(expectedFieldPositions));
|
||||
}
|
||||
|
||||
{
|
||||
const char16_t* message = u"FormattedDateInterval identical dates test: no span field";
|
||||
const char16_t* expectedString = u"July 20, 2018";
|
||||
LocalPointer<Calendar> input1(Calendar::createInstance("en-GB", status));
|
||||
input1->set(2018, 6, 20);
|
||||
FormattedDateInterval result = fmt->formatToValue(*input1, *input1, status);
|
||||
static const UFieldPositionWithCategory expectedFieldPositions[] = {
|
||||
// field, begin index, end index
|
||||
{UFIELD_CATEGORY_DATE, UDAT_MONTH_FIELD, 0, 4},
|
||||
{UFIELD_CATEGORY_DATE, UDAT_DATE_FIELD, 5, 7},
|
||||
{UFIELD_CATEGORY_DATE, UDAT_YEAR_FIELD, 9, 13}};
|
||||
checkMixedFormattedValue(
|
||||
message,
|
||||
result,
|
||||
expectedString,
|
||||
expectedFieldPositions,
|
||||
UPRV_LENGTHOF(expectedFieldPositions));
|
||||
}
|
||||
|
||||
// Test sample code
|
||||
{
|
||||
LocalPointer<Calendar> input1(Calendar::createInstance("en-GB", status));
|
||||
LocalPointer<Calendar> input2(Calendar::createInstance("en-GB", status));
|
||||
input1->set(2018, 6, 20);
|
||||
input2->set(2018, 7, 3);
|
||||
|
||||
// Let fmt be a DateIntervalFormat for locale en-US and skeleton dMMMMy
|
||||
// Let input1 be July 20, 2018 and input2 be August 3, 2018:
|
||||
FormattedDateInterval result = fmt->formatToValue(*input1, *input2, status);
|
||||
assertEquals("Expected output from format",
|
||||
u"July 20 \u2013 August 3, 2018", result.toString(status));
|
||||
ConstrainedFieldPosition cfpos;
|
||||
cfpos.constrainField(UFIELD_CATEGORY_DATE_INTERVAL_SPAN, 0);
|
||||
if (result.nextPosition(cfpos, status)) {
|
||||
assertEquals("Expect start index", 0, cfpos.getStart());
|
||||
assertEquals("Expect end index", 7, cfpos.getLimit());
|
||||
} else {
|
||||
// No such span: can happen if input dates are equal.
|
||||
}
|
||||
assertFalse("No more than one occurrence of the field",
|
||||
result.nextPosition(cfpos, status));
|
||||
}
|
||||
|
||||
// To test the fallback pattern behavior, make a custom DateIntervalInfo.
|
||||
DateIntervalInfo dtitvinf(status);
|
||||
dtitvinf.setFallbackIntervalPattern("<< {1} --- {0} >>", status);
|
||||
|
|
|
@ -6,7 +6,26 @@
|
|||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#include "putilimp.h"
|
||||
#include "numbertest.h"
|
||||
#include "intltest.h"
|
||||
#include "formatted_string_builder.h"
|
||||
#include "formattedval_impl.h"
|
||||
|
||||
|
||||
class FormattedStringBuilderTest : public IntlTest {
|
||||
public:
|
||||
void testInsertAppendUnicodeString();
|
||||
void testSplice();
|
||||
void testInsertAppendCodePoint();
|
||||
void testCopy();
|
||||
void testFields();
|
||||
void testUnlimitedCapacity();
|
||||
void testCodePoints();
|
||||
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
|
||||
|
||||
private:
|
||||
void assertEqualsImpl(const UnicodeString &a, const FormattedStringBuilder &b);
|
||||
};
|
||||
|
||||
static const char16_t *EXAMPLE_STRINGS[] = {
|
||||
u"",
|
||||
|
@ -17,9 +36,9 @@ static const char16_t *EXAMPLE_STRINGS[] = {
|
|||
u"with combining characters like 🇦🇧🇨🇩",
|
||||
u"A very very very very very very very very very very long string to force heap"};
|
||||
|
||||
void NumberStringBuilderTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *) {
|
||||
void FormattedStringBuilderTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *) {
|
||||
if (exec) {
|
||||
logln("TestSuite NumberStringBuilderTest: ");
|
||||
logln("TestSuite FormattedStringBuilderTest: ");
|
||||
}
|
||||
TESTCASE_AUTO_BEGIN;
|
||||
TESTCASE_AUTO(testInsertAppendUnicodeString);
|
||||
|
@ -32,14 +51,14 @@ void NumberStringBuilderTest::runIndexedTest(int32_t index, UBool exec, const ch
|
|||
TESTCASE_AUTO_END;
|
||||
}
|
||||
|
||||
void NumberStringBuilderTest::testInsertAppendUnicodeString() {
|
||||
void FormattedStringBuilderTest::testInsertAppendUnicodeString() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeString sb1;
|
||||
NumberStringBuilder sb2;
|
||||
FormattedStringBuilder sb2;
|
||||
for (const char16_t* strPtr : EXAMPLE_STRINGS) {
|
||||
UnicodeString str(strPtr);
|
||||
|
||||
NumberStringBuilder sb3;
|
||||
FormattedStringBuilder sb3;
|
||||
sb1.append(str);
|
||||
// Note: UNUM_FIELD_COUNT is like passing null in Java
|
||||
sb2.append(str, UNUM_FIELD_COUNT, status);
|
||||
|
@ -50,7 +69,7 @@ void NumberStringBuilderTest::testInsertAppendUnicodeString() {
|
|||
assertEqualsImpl(str, sb3);
|
||||
|
||||
UnicodeString sb4;
|
||||
NumberStringBuilder sb5;
|
||||
FormattedStringBuilder sb5;
|
||||
sb4.append(u"😇");
|
||||
sb4.append(str);
|
||||
sb4.append(u"xx");
|
||||
|
@ -68,7 +87,7 @@ void NumberStringBuilderTest::testInsertAppendUnicodeString() {
|
|||
assertEqualsImpl(sb4, sb5);
|
||||
|
||||
UnicodeString sb4cp(sb4);
|
||||
NumberStringBuilder sb5cp(sb5);
|
||||
FormattedStringBuilder sb5cp(sb5);
|
||||
sb4.append(sb4cp);
|
||||
sb5.append(sb5cp, status);
|
||||
assertSuccess("Appending again to sb5", status);
|
||||
|
@ -76,7 +95,7 @@ void NumberStringBuilderTest::testInsertAppendUnicodeString() {
|
|||
}
|
||||
}
|
||||
|
||||
void NumberStringBuilderTest::testSplice() {
|
||||
void FormattedStringBuilderTest::testSplice() {
|
||||
static const struct TestCase {
|
||||
const char16_t* input;
|
||||
const int32_t startThis;
|
||||
|
@ -94,7 +113,7 @@ void NumberStringBuilderTest::testSplice() {
|
|||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeString sb1;
|
||||
NumberStringBuilder sb2;
|
||||
FormattedStringBuilder sb2;
|
||||
for (auto cas : cases) {
|
||||
for (const char16_t* replacementPtr : EXAMPLE_STRINGS) {
|
||||
UnicodeString replacement(replacementPtr);
|
||||
|
@ -125,14 +144,14 @@ void NumberStringBuilderTest::testSplice() {
|
|||
}
|
||||
}
|
||||
|
||||
void NumberStringBuilderTest::testInsertAppendCodePoint() {
|
||||
void FormattedStringBuilderTest::testInsertAppendCodePoint() {
|
||||
static const UChar32 cases[] = {
|
||||
0, 1, 60, 127, 128, 0x7fff, 0x8000, 0xffff, 0x10000, 0x1f000, 0x10ffff};
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeString sb1;
|
||||
NumberStringBuilder sb2;
|
||||
FormattedStringBuilder sb2;
|
||||
for (UChar32 cas : cases) {
|
||||
NumberStringBuilder sb3;
|
||||
FormattedStringBuilder sb3;
|
||||
sb1.append(cas);
|
||||
sb2.appendCodePoint(cas, UNUM_FIELD_COUNT, status);
|
||||
assertSuccess("Appending to sb2", status);
|
||||
|
@ -147,7 +166,7 @@ void NumberStringBuilderTest::testInsertAppendCodePoint() {
|
|||
sb3.charAt(0));
|
||||
|
||||
UnicodeString sb4;
|
||||
NumberStringBuilder sb5;
|
||||
FormattedStringBuilder sb5;
|
||||
sb4.append(u"😇xx");
|
||||
sb4.insert(2, cas);
|
||||
sb5.append(u"😇xx", UNUM_FIELD_COUNT, status);
|
||||
|
@ -158,13 +177,13 @@ void NumberStringBuilderTest::testInsertAppendCodePoint() {
|
|||
}
|
||||
}
|
||||
|
||||
void NumberStringBuilderTest::testCopy() {
|
||||
void FormattedStringBuilderTest::testCopy() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
for (UnicodeString str : EXAMPLE_STRINGS) {
|
||||
NumberStringBuilder sb1;
|
||||
FormattedStringBuilder sb1;
|
||||
sb1.append(str, UNUM_FIELD_COUNT, status);
|
||||
assertSuccess("Appending to sb1 first time", status);
|
||||
NumberStringBuilder sb2(sb1);
|
||||
FormattedStringBuilder sb2(sb1);
|
||||
assertTrue("Content should equal itself", sb1.contentEquals(sb2));
|
||||
|
||||
sb1.append("12345", UNUM_FIELD_COUNT, status);
|
||||
|
@ -173,25 +192,28 @@ void NumberStringBuilderTest::testCopy() {
|
|||
}
|
||||
}
|
||||
|
||||
void NumberStringBuilderTest::testFields() {
|
||||
void FormattedStringBuilderTest::testFields() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
// Note: This is a C++11 for loop that calls the UnicodeString constructor on each iteration.
|
||||
for (UnicodeString str : EXAMPLE_STRINGS) {
|
||||
NumberStringBuilder sb;
|
||||
FormattedValueStringBuilderImpl sbi(0);
|
||||
FormattedStringBuilder& sb = sbi.getStringRef();
|
||||
sb.append(str, UNUM_FIELD_COUNT, status);
|
||||
assertSuccess("Appending to sb", status);
|
||||
sb.append(str, UNUM_CURRENCY_FIELD, status);
|
||||
assertSuccess("Appending to sb", status);
|
||||
assertEquals("Reference string copied twice", str.length() * 2, sb.length());
|
||||
for (int32_t i = 0; i < str.length(); i++) {
|
||||
assertEquals("Null field first", (Field) UNUM_FIELD_COUNT, sb.fieldAt(i));
|
||||
assertEquals("Currency field second", (Field) UNUM_CURRENCY_FIELD, sb.fieldAt(i + str.length()));
|
||||
assertEquals("Null field first",
|
||||
(FormattedStringBuilder::Field) UNUM_FIELD_COUNT, sb.fieldAt(i));
|
||||
assertEquals("Currency field second",
|
||||
(FormattedStringBuilder::Field) UNUM_CURRENCY_FIELD, sb.fieldAt(i + str.length()));
|
||||
}
|
||||
|
||||
// Very basic FieldPosition test. More robust tests happen in NumberFormatTest.
|
||||
// Let NumberFormatTest also take care of FieldPositionIterator material.
|
||||
FieldPosition fp(UNUM_CURRENCY_FIELD);
|
||||
sb.nextFieldPosition(fp, status);
|
||||
sbi.nextFieldPosition(fp, status);
|
||||
assertSuccess("Populating the FieldPosition", status);
|
||||
assertEquals("Currency start position", str.length(), fp.getBeginIndex());
|
||||
assertEquals("Currency end position", str.length() * 2, fp.getEndIndex());
|
||||
|
@ -200,17 +222,17 @@ void NumberStringBuilderTest::testFields() {
|
|||
sb.insertCodePoint(2, 100, UNUM_INTEGER_FIELD, status);
|
||||
assertSuccess("Inserting code point into sb", status);
|
||||
assertEquals("New length", str.length() * 2 + 1, sb.length());
|
||||
assertEquals("Integer field", (Field) UNUM_INTEGER_FIELD, sb.fieldAt(2));
|
||||
assertEquals("Integer field", (FormattedStringBuilder::Field) UNUM_INTEGER_FIELD, sb.fieldAt(2));
|
||||
}
|
||||
|
||||
NumberStringBuilder old(sb);
|
||||
FormattedStringBuilder old(sb);
|
||||
sb.append(old, status);
|
||||
assertSuccess("Appending to myself", status);
|
||||
int32_t numNull = 0;
|
||||
int32_t numCurr = 0;
|
||||
int32_t numInt = 0;
|
||||
for (int32_t i = 0; i < sb.length(); i++) {
|
||||
Field field = sb.fieldAt(i);
|
||||
FormattedStringBuilder::Field field = sb.fieldAt(i);
|
||||
assertEquals("Field should equal location in old", old.fieldAt(i % old.length()), field);
|
||||
if (field == UNUM_FIELD_COUNT) {
|
||||
numNull++;
|
||||
|
@ -228,9 +250,9 @@ void NumberStringBuilderTest::testFields() {
|
|||
}
|
||||
}
|
||||
|
||||
void NumberStringBuilderTest::testUnlimitedCapacity() {
|
||||
void FormattedStringBuilderTest::testUnlimitedCapacity() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
NumberStringBuilder builder;
|
||||
FormattedStringBuilder builder;
|
||||
// The builder should never fail upon repeated appends.
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
UnicodeString message("Iteration #");
|
||||
|
@ -242,9 +264,9 @@ void NumberStringBuilderTest::testUnlimitedCapacity() {
|
|||
}
|
||||
}
|
||||
|
||||
void NumberStringBuilderTest::testCodePoints() {
|
||||
void FormattedStringBuilderTest::testCodePoints() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
NumberStringBuilder nsb;
|
||||
FormattedStringBuilder nsb;
|
||||
assertEquals("First is -1 on empty string", -1, nsb.getFirstCodePoint());
|
||||
assertEquals("Last is -1 on empty string", -1, nsb.getLastCodePoint());
|
||||
assertEquals("Length is 0 on empty string", 0, nsb.codePointCount());
|
||||
|
@ -268,7 +290,7 @@ void NumberStringBuilderTest::testCodePoints() {
|
|||
assertEquals("Code point count is 2", 2, nsb.codePointCount());
|
||||
}
|
||||
|
||||
void NumberStringBuilderTest::assertEqualsImpl(const UnicodeString &a, const NumberStringBuilder &b) {
|
||||
void FormattedStringBuilderTest::assertEqualsImpl(const UnicodeString &a, const FormattedStringBuilder &b) {
|
||||
// TODO: Why won't this compile without the IntlTest:: qualifier?
|
||||
IntlTest::assertEquals("Lengths should be the same", a.length(), b.length());
|
||||
IntlTest::assertEquals("Code point counts should be the same", a.countChar32(), b.codePointCount());
|
||||
|
@ -285,4 +307,9 @@ void NumberStringBuilderTest::assertEqualsImpl(const UnicodeString &a, const Num
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
extern IntlTest *createFormattedStringBuilderTest() {
|
||||
return new FormattedStringBuilderTest();
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
|
@ -232,6 +232,7 @@
|
|||
</ClCompile>
|
||||
<ClCompile Include="dtptngts.cpp" />
|
||||
<ClCompile Include="fldset.cpp" />
|
||||
<ClCompile Include="formatted_string_builder_test.cpp" />
|
||||
<ClCompile Include="genderinfotest.cpp" />
|
||||
<ClCompile Include="incaltst.cpp" />
|
||||
<ClCompile Include="itformat.cpp" />
|
||||
|
@ -251,8 +252,7 @@
|
|||
<ClCompile Include="numbertest_modifiers.cpp" />
|
||||
<ClCompile Include="numbertest_patternmodifier.cpp" />
|
||||
<ClCompile Include="numbertest_patternstring.cpp" />
|
||||
<ClCompile Include="numbertest_stringbuilder.cpp" />
|
||||
<ClCompile Include="numbertest_stringsegment.cpp" />
|
||||
<ClCompile Include="string_segment_test.cpp" />
|
||||
<ClCompile Include="numbertest_parse.cpp" />
|
||||
<ClCompile Include="numbertest_doubleconversion.cpp" />
|
||||
<ClCompile Include="numbertest_skeletons.cpp" />
|
||||
|
|
|
@ -217,6 +217,9 @@
|
|||
<ClCompile Include="fldset.cpp">
|
||||
<Filter>formatting</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="formatted_string_builder_test.cpp">
|
||||
<Filter>formatting</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="genderinfotest.cpp">
|
||||
<Filter>formatting</Filter>
|
||||
</ClCompile>
|
||||
|
@ -274,10 +277,7 @@
|
|||
<ClCompile Include="numbertest_patternstring.cpp">
|
||||
<Filter>formatting</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="numbertest_stringbuilder.cpp">
|
||||
<Filter>formatting</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="numbertest_stringsegment.cpp">
|
||||
<ClCompile Include="string_segment_test.cpp">
|
||||
<Filter>formatting</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="numbertest_parse.cpp">
|
||||
|
|
|
@ -72,6 +72,8 @@ extern IntlTest *createMeasureFormatTest();
|
|||
extern IntlTest *createNumberFormatSpecificationTest();
|
||||
extern IntlTest *createScientificNumberFormatterTest();
|
||||
extern IntlTest *createFormattedValueTest();
|
||||
extern IntlTest *createFormattedStringBuilderTest();
|
||||
extern IntlTest *createStringSegmentTest();
|
||||
|
||||
|
||||
#define TESTCLASS(id, TestClass) \
|
||||
|
@ -227,6 +229,24 @@ void IntlTestFormat::runIndexedTest( int32_t index, UBool exec, const char* &nam
|
|||
callTest(*test, par);
|
||||
}
|
||||
break;
|
||||
case 54:
|
||||
name = "FormattedStringBuilderTest";
|
||||
if (exec) {
|
||||
logln("FormattedStringBuilderTest test---");
|
||||
logln((UnicodeString)"");
|
||||
LocalPointer<IntlTest> test(createFormattedStringBuilderTest());
|
||||
callTest(*test, par);
|
||||
}
|
||||
break;
|
||||
case 55:
|
||||
name = "StringSegmentTest";
|
||||
if (exec) {
|
||||
logln("StringSegmentTest test---");
|
||||
logln((UnicodeString)"");
|
||||
LocalPointer<IntlTest> test(createStringSegmentTest());
|
||||
callTest(*test, par);
|
||||
}
|
||||
break;
|
||||
default: name = ""; break; //needed to end loop
|
||||
}
|
||||
if (exec) {
|
||||
|
|
|
@ -6,11 +6,11 @@
|
|||
#if !UCONFIG_NO_FORMATTING
|
||||
#pragma once
|
||||
|
||||
#include "number_stringbuilder.h"
|
||||
#include "formatted_string_builder.h"
|
||||
#include "intltest.h"
|
||||
#include "itformat.h"
|
||||
#include "number_affixutils.h"
|
||||
#include "numparse_stringsegment.h"
|
||||
#include "string_segment.h"
|
||||
#include "numrange_impl.h"
|
||||
#include "unicode/locid.h"
|
||||
#include "unicode/numberformatter.h"
|
||||
|
@ -71,6 +71,7 @@ class NumberFormatterApiTest : public IntlTestWithFieldPosition {
|
|||
void decimal();
|
||||
void scale();
|
||||
void locale();
|
||||
void skeletonUserGuideExamples();
|
||||
void formatTypes();
|
||||
void fieldPositionLogic();
|
||||
void fieldPositionCoverage();
|
||||
|
@ -173,7 +174,7 @@ class ModifiersTest : public IntlTest {
|
|||
UnicodeString expectedChars, UnicodeString expectedFields,
|
||||
UErrorCode &status);
|
||||
|
||||
void assertModifierEquals(const Modifier &mod, NumberStringBuilder &sb, int32_t expectedPrefixLength,
|
||||
void assertModifierEquals(const Modifier &mod, FormattedStringBuilder &sb, int32_t expectedPrefixLength,
|
||||
bool expectedStrong, UnicodeString expectedChars,
|
||||
UnicodeString expectedFields, UErrorCode &status);
|
||||
};
|
||||
|
@ -203,33 +204,6 @@ class PatternStringTest : public IntlTest {
|
|||
private:
|
||||
};
|
||||
|
||||
class NumberStringBuilderTest : public IntlTest {
|
||||
public:
|
||||
void testInsertAppendUnicodeString();
|
||||
void testSplice();
|
||||
void testInsertAppendCodePoint();
|
||||
void testCopy();
|
||||
void testFields();
|
||||
void testUnlimitedCapacity();
|
||||
void testCodePoints();
|
||||
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
|
||||
|
||||
private:
|
||||
void assertEqualsImpl(const UnicodeString &a, const NumberStringBuilder &b);
|
||||
};
|
||||
|
||||
class StringSegmentTest : public IntlTest {
|
||||
public:
|
||||
void testOffset();
|
||||
void testLength();
|
||||
void testCharAt();
|
||||
void testGetCodePoint();
|
||||
void testCommonPrefixLength();
|
||||
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
|
||||
};
|
||||
|
||||
class NumberParserTest : public IntlTest {
|
||||
public:
|
||||
void testBasic();
|
||||
|
@ -339,12 +313,10 @@ class NumberTest : public IntlTest {
|
|||
TESTCLASS(3, ModifiersTest);
|
||||
TESTCLASS(4, PatternModifierTest);
|
||||
TESTCLASS(5, PatternStringTest);
|
||||
TESTCLASS(6, NumberStringBuilderTest);
|
||||
TESTCLASS(7, DoubleConversionTest);
|
||||
TESTCLASS(8, StringSegmentTest);
|
||||
TESTCLASS(9, NumberParserTest);
|
||||
TESTCLASS(10, NumberSkeletonTest);
|
||||
TESTCLASS(11, NumberRangeFormatterTest);
|
||||
TESTCLASS(6, DoubleConversionTest);
|
||||
TESTCLASS(7, NumberParserTest);
|
||||
TESTCLASS(8, NumberSkeletonTest);
|
||||
TESTCLASS(9, NumberRangeFormatterTest);
|
||||
default: name = ""; break; // needed to end loop
|
||||
}
|
||||
}
|
||||
|
|
|
@ -217,7 +217,7 @@ void AffixUtilsTest::testUnescapeWithSymbolProvider() {
|
|||
NumericSymbolProvider provider;
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
NumberStringBuilder sb;
|
||||
FormattedStringBuilder sb;
|
||||
for (auto& cas : cases) {
|
||||
UnicodeString input(cas[0]);
|
||||
UnicodeString expected(cas[1]);
|
||||
|
@ -239,7 +239,7 @@ void AffixUtilsTest::testUnescapeWithSymbolProvider() {
|
|||
|
||||
UnicodeString AffixUtilsTest::unescapeWithDefaults(const SymbolProvider &defaultProvider,
|
||||
UnicodeString input, UErrorCode &status) {
|
||||
NumberStringBuilder nsb;
|
||||
FormattedStringBuilder nsb;
|
||||
int32_t length = AffixUtils::unescape(input, nsb, 0, defaultProvider, UNUM_FIELD_COUNT, status);
|
||||
assertEquals("Return value of unescape", nsb.length(), length);
|
||||
return nsb.toUnicodeString();
|
||||
|
|
|
@ -89,6 +89,7 @@ void NumberFormatterApiTest::runIndexedTest(int32_t index, UBool exec, const cha
|
|||
TESTCASE_AUTO(decimal);
|
||||
TESTCASE_AUTO(scale);
|
||||
TESTCASE_AUTO(locale);
|
||||
TESTCASE_AUTO(skeletonUserGuideExamples);
|
||||
TESTCASE_AUTO(formatTypes);
|
||||
TESTCASE_AUTO(fieldPositionLogic);
|
||||
TESTCASE_AUTO(fieldPositionCoverage);
|
||||
|
@ -2232,6 +2233,47 @@ void NumberFormatterApiTest::locale() {
|
|||
assertEquals("Locale withLocale()", u"1\u202f234", actual);
|
||||
}
|
||||
|
||||
void NumberFormatterApiTest::skeletonUserGuideExamples() {
|
||||
IcuTestErrorCode status(*this, "skeletonUserGuideExamples");
|
||||
|
||||
// Test the skeleton examples in userguide/format_parse/numbers/skeletons.md
|
||||
struct TestCase {
|
||||
const char16_t* skeleton;
|
||||
double input;
|
||||
const char16_t* expected;
|
||||
} cases[] = {
|
||||
{u"percent", 25, u"25%"},
|
||||
{u".00", 25, u"25.00"},
|
||||
{u"percent .00", 25, u"25.00%"},
|
||||
{u"scale/100", 0.3, u"30"},
|
||||
{u"percent scale/100", 0.3, u"30%"},
|
||||
{u"measure-unit/length-meter", 5, u"5 m"},
|
||||
{u"measure-unit/length-meter unit-width-full-name", 5, u"5 meters"},
|
||||
{u"currency/CAD", 10, u"CA$10.00"},
|
||||
{u"currency/CAD unit-width-narrow", 10, u"$10.00"},
|
||||
{u"compact-short", 5000, u"5K"},
|
||||
{u"compact-long", 5000, u"5 thousand"},
|
||||
{u"compact-short currency/CAD", 5000, u"CA$5K"},
|
||||
{u"", 5000, u"5,000"},
|
||||
{u"group-min2", 5000, u"5000"},
|
||||
{u"group-min2", 15000, u"15,000"},
|
||||
{u"sign-always", 60, u"+60"},
|
||||
{u"sign-always", 0, u"+0"},
|
||||
{u"sign-except-zero", 60, u"+60"},
|
||||
{u"sign-except-zero", 0, u"0"},
|
||||
{u"sign-accounting currency/CAD", -40, u"(CA$40.00)"}
|
||||
};
|
||||
|
||||
for (const auto& cas : cases) {
|
||||
status.setScope(cas.skeleton);
|
||||
FormattedNumber actual = NumberFormatter::forSkeleton(cas.skeleton, status)
|
||||
.locale("en-US")
|
||||
.formatDouble(cas.input, status);
|
||||
assertEquals(cas.skeleton, cas.expected, actual.toTempString(status));
|
||||
status.errIfFailureAndReset();
|
||||
}
|
||||
}
|
||||
|
||||
void NumberFormatterApiTest::formatTypes() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
LocalizedNumberFormatter formatter = NumberFormatter::withLocale(Locale::getEnglish());
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
#include "putilimp.h"
|
||||
#include "intltest.h"
|
||||
#include "number_stringbuilder.h"
|
||||
#include "formatted_string_builder.h"
|
||||
#include "number_modifiers.h"
|
||||
#include "numbertest.h"
|
||||
|
||||
|
@ -36,8 +36,8 @@ void ModifiersTest::testConstantAffixModifier() {
|
|||
|
||||
void ModifiersTest::testConstantMultiFieldModifier() {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
NumberStringBuilder prefix;
|
||||
NumberStringBuilder suffix;
|
||||
FormattedStringBuilder prefix;
|
||||
FormattedStringBuilder suffix;
|
||||
ConstantMultiFieldModifier mod1(prefix, suffix, false, true);
|
||||
assertModifierEquals(mod1, 0, true, u"|", u"n", status);
|
||||
assertSuccess("Spot 1", status);
|
||||
|
@ -87,7 +87,7 @@ void ModifiersTest::testSimpleModifier() {
|
|||
|
||||
// Test strange insertion positions
|
||||
for (int32_t j = 0; j < NUM_OUTPUTS; j++) {
|
||||
NumberStringBuilder output;
|
||||
FormattedStringBuilder output;
|
||||
output.append(outputs[j].baseString, UNUM_FIELD_COUNT, status);
|
||||
mod.apply(output, outputs[j].leftIndex, outputs[j].rightIndex, status);
|
||||
UnicodeString expected = expecteds[j][i];
|
||||
|
@ -105,8 +105,8 @@ void ModifiersTest::testCurrencySpacingEnabledModifier() {
|
|||
return;
|
||||
}
|
||||
|
||||
NumberStringBuilder prefix;
|
||||
NumberStringBuilder suffix;
|
||||
FormattedStringBuilder prefix;
|
||||
FormattedStringBuilder suffix;
|
||||
CurrencySpacingEnabledModifier mod1(prefix, suffix, false, true, symbols, status);
|
||||
assertSuccess("Spot 2", status);
|
||||
assertModifierEquals(mod1, 0, true, u"|", u"n", status);
|
||||
|
@ -120,15 +120,15 @@ void ModifiersTest::testCurrencySpacingEnabledModifier() {
|
|||
assertSuccess("Spot 6", status);
|
||||
|
||||
// Test the default currency spacing rules
|
||||
NumberStringBuilder sb;
|
||||
FormattedStringBuilder sb;
|
||||
sb.append("123", UNUM_INTEGER_FIELD, status);
|
||||
assertSuccess("Spot 7", status);
|
||||
NumberStringBuilder sb1(sb);
|
||||
FormattedStringBuilder sb1(sb);
|
||||
assertModifierEquals(mod2, sb1, 3, true, u"USD\u00A0123", u"$$$niii", status);
|
||||
assertSuccess("Spot 8", status);
|
||||
|
||||
// Compare with the unsafe code path
|
||||
NumberStringBuilder sb2(sb);
|
||||
FormattedStringBuilder sb2(sb);
|
||||
sb2.insert(0, "USD", UNUM_CURRENCY_FIELD, status);
|
||||
assertSuccess("Spot 9", status);
|
||||
CurrencySpacingEnabledModifier::applyCurrencySpacing(sb2, 0, 3, 6, 0, symbols, status);
|
||||
|
@ -149,14 +149,14 @@ void ModifiersTest::testCurrencySpacingEnabledModifier() {
|
|||
void ModifiersTest::assertModifierEquals(const Modifier &mod, int32_t expectedPrefixLength,
|
||||
bool expectedStrong, UnicodeString expectedChars,
|
||||
UnicodeString expectedFields, UErrorCode &status) {
|
||||
NumberStringBuilder sb;
|
||||
FormattedStringBuilder sb;
|
||||
sb.appendCodePoint('|', UNUM_FIELD_COUNT, status);
|
||||
assertModifierEquals(
|
||||
mod, sb, expectedPrefixLength, expectedStrong, expectedChars, expectedFields, status);
|
||||
|
||||
}
|
||||
|
||||
void ModifiersTest::assertModifierEquals(const Modifier &mod, NumberStringBuilder &sb,
|
||||
void ModifiersTest::assertModifierEquals(const Modifier &mod, FormattedStringBuilder &sb,
|
||||
int32_t expectedPrefixLength, bool expectedStrong,
|
||||
UnicodeString expectedChars, UnicodeString expectedFields,
|
||||
UErrorCode &status) {
|
||||
|
@ -171,7 +171,7 @@ void ModifiersTest::assertModifierEquals(const Modifier &mod, NumberStringBuilde
|
|||
}
|
||||
|
||||
UnicodeString debugString;
|
||||
debugString.append(u"<NumberStringBuilder [");
|
||||
debugString.append(u"<FormattedStringBuilder [");
|
||||
debugString.append(expectedChars);
|
||||
debugString.append(u"] [");
|
||||
debugString.append(expectedFields);
|
||||
|
|
|
@ -99,7 +99,7 @@ void PatternModifierTest::testPatternWithNoPlaceholder() {
|
|||
mod.setNumberProperties(1, StandardPlural::Form::COUNT);
|
||||
|
||||
// Unsafe Code Path
|
||||
NumberStringBuilder nsb;
|
||||
FormattedStringBuilder nsb;
|
||||
nsb.append(u"x123y", UNUM_FIELD_COUNT, status);
|
||||
assertSuccess("Spot 3", status);
|
||||
mod.apply(nsb, 1, 4, status);
|
||||
|
@ -141,21 +141,21 @@ void PatternModifierTest::testMutableEqualsImmutable() {
|
|||
DecimalQuantity fq;
|
||||
fq.setToInt(1);
|
||||
|
||||
NumberStringBuilder nsb1;
|
||||
FormattedStringBuilder nsb1;
|
||||
MicroProps micros1;
|
||||
mod.addToChain(µs1);
|
||||
mod.processQuantity(fq, micros1, status);
|
||||
micros1.modMiddle->apply(nsb1, 0, 0, status);
|
||||
assertSuccess("Spot 3", status);
|
||||
|
||||
NumberStringBuilder nsb2;
|
||||
FormattedStringBuilder nsb2;
|
||||
MicroProps micros2;
|
||||
LocalPointer<ImmutablePatternModifier> immutable(mod.createImmutable(status));
|
||||
immutable->applyToMicros(micros2, fq, status);
|
||||
micros2.modMiddle->apply(nsb2, 0, 0, status);
|
||||
assertSuccess("Spot 4", status);
|
||||
|
||||
NumberStringBuilder nsb3;
|
||||
FormattedStringBuilder nsb3;
|
||||
MicroProps micros3;
|
||||
mod.addToChain(µs3);
|
||||
mod.setPatternAttributes(UNUM_SIGN_ALWAYS, false);
|
||||
|
@ -168,14 +168,14 @@ void PatternModifierTest::testMutableEqualsImmutable() {
|
|||
}
|
||||
|
||||
UnicodeString PatternModifierTest::getPrefix(const MutablePatternModifier &mod, UErrorCode &status) {
|
||||
NumberStringBuilder nsb;
|
||||
FormattedStringBuilder nsb;
|
||||
mod.apply(nsb, 0, 0, status);
|
||||
int32_t prefixLength = mod.getPrefixLength();
|
||||
return UnicodeString(nsb.toUnicodeString(), 0, prefixLength);
|
||||
}
|
||||
|
||||
UnicodeString PatternModifierTest::getSuffix(const MutablePatternModifier &mod, UErrorCode &status) {
|
||||
NumberStringBuilder nsb;
|
||||
FormattedStringBuilder nsb;
|
||||
mod.apply(nsb, 0, 0, status);
|
||||
int32_t prefixLength = mod.getPrefixLength();
|
||||
return UnicodeString(nsb.toUnicodeString(), prefixLength, nsb.length() - prefixLength);
|
||||
|
|
|
@ -149,6 +149,8 @@ void NumberSkeletonTest::invalidTokens() {
|
|||
u"integer-width/xxx",
|
||||
u"integer-width/0+",
|
||||
u"integer-width/+0#",
|
||||
u"integer-width/+#",
|
||||
u"integer-width/+#0",
|
||||
u"scientific/foo"};
|
||||
|
||||
expectedErrorSkeleton(cases, UPRV_LENGTHOF(cases));
|
||||
|
|
|
@ -5,8 +5,19 @@
|
|||
|
||||
#if !UCONFIG_NO_FORMATTING
|
||||
|
||||
#include "numbertest.h"
|
||||
#include "numparse_stringsegment.h"
|
||||
#include "string_segment.h"
|
||||
#include "intltest.h"
|
||||
|
||||
class StringSegmentTest : public IntlTest {
|
||||
public:
|
||||
void testOffset();
|
||||
void testLength();
|
||||
void testCharAt();
|
||||
void testGetCodePoint();
|
||||
void testCommonPrefixLength();
|
||||
|
||||
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0);
|
||||
};
|
||||
|
||||
static const char16_t* SAMPLE_STRING = u"📻 radio 📻";
|
||||
|
||||
|
@ -101,4 +112,9 @@ void StringSegmentTest::testCommonPrefixLength() {
|
|||
assertEquals("", 0, segment.getCommonPrefixLength(u"foo"));
|
||||
}
|
||||
|
||||
|
||||
extern IntlTest *createStringSegmentTest() {
|
||||
return new StringSegmentTest();
|
||||
}
|
||||
|
||||
#endif
|
|
@ -14,9 +14,9 @@ top_builddir = ../..
|
|||
## All the flags and other definitions are included here.
|
||||
include $(top_builddir)/icudefs.mk
|
||||
|
||||
MIDDLE_SO_TARGET=
|
||||
|
||||
OUTPUTFILE=pkgdata.inc
|
||||
MIDDLE_SO_TARGET=
|
||||
PKGDATA_TRAILING_SPACE=" "
|
||||
|
||||
all : clean
|
||||
@echo GENCCODE_ASSEMBLY_TYPE=$(GENCCODE_ASSEMBLY) >> $(OUTPUTFILE)
|
||||
|
|
|
@ -205,10 +205,10 @@ main(int argc,
|
|||
"\t-c or --copyright include copyright notice\n");
|
||||
fprintf(stderr,
|
||||
"\t-e or --encoding encoding of source files\n"
|
||||
"\t-d of --destdir destination directory, followed by the path, defaults to %s\n"
|
||||
"\t-s or --sourcedir source directory for files followed by path, defaults to %s\n"
|
||||
"\t-d or --destdir destination directory, followed by the path, defaults to '%s'\n"
|
||||
"\t-s or --sourcedir source directory for files followed by path, defaults to '%s'\n"
|
||||
"\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
|
||||
"\t followed by path, defaults to %s\n",
|
||||
"\t followed by path, defaults to '%s'\n",
|
||||
u_getDataDirectory(), u_getDataDirectory(), u_getDataDirectory());
|
||||
fprintf(stderr,
|
||||
"\t-j or --write-java write a Java ListResourceBundle for ICU4J, followed by optional encoding\n"
|
||||
|
|
|
@ -274,11 +274,11 @@ expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenV
|
|||
}
|
||||
}
|
||||
|
||||
static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment, UErrorCode *status)
|
||||
static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment,
|
||||
int32_t &stringLength, UErrorCode *status)
|
||||
{
|
||||
struct UString *tokenValue;
|
||||
char *result;
|
||||
uint32_t count;
|
||||
|
||||
expect(state, TOK_STRING, &tokenValue, comment, line, status);
|
||||
|
||||
|
@ -287,14 +287,13 @@ static char *getInvariantString(ParseState* state, uint32_t *line, struct UStrin
|
|||
return NULL;
|
||||
}
|
||||
|
||||
count = u_strlen(tokenValue->fChars);
|
||||
if(!uprv_isInvariantUString(tokenValue->fChars, count)) {
|
||||
if(!uprv_isInvariantUString(tokenValue->fChars, tokenValue->fLength)) {
|
||||
*status = U_INVALID_FORMAT_ERROR;
|
||||
error(*line, "invariant characters required for table keys, binary data, etc.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
result = static_cast<char *>(uprv_malloc(count+1));
|
||||
result = static_cast<char *>(uprv_malloc(tokenValue->fLength+1));
|
||||
|
||||
if (result == NULL)
|
||||
{
|
||||
|
@ -302,7 +301,8 @@ static char *getInvariantString(ParseState* state, uint32_t *line, struct UStrin
|
|||
return NULL;
|
||||
}
|
||||
|
||||
u_UCharsToChars(tokenValue->fChars, result, count+1);
|
||||
u_UCharsToChars(tokenValue->fChars, result, tokenValue->fLength+1);
|
||||
stringLength = tokenValue->fLength;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -1371,7 +1371,6 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US
|
|||
int32_t value;
|
||||
UBool readToken = FALSE;
|
||||
char *stopstring;
|
||||
uint32_t len;
|
||||
struct UString memberComments;
|
||||
|
||||
IntVectorResource *result = intvector_open(state->bundle, tag, comment, status);
|
||||
|
@ -1404,7 +1403,8 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US
|
|||
return result;
|
||||
}
|
||||
|
||||
string = getInvariantString(state, NULL, NULL, status);
|
||||
int32_t stringLength;
|
||||
string = getInvariantString(state, NULL, NULL, stringLength, status);
|
||||
|
||||
if (U_FAILURE(*status))
|
||||
{
|
||||
|
@ -1414,9 +1414,9 @@ parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct US
|
|||
|
||||
/* For handling illegal char in the Intvector */
|
||||
value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
|
||||
len=(uint32_t)(stopstring-string);
|
||||
int32_t len = (int32_t)(stopstring-string);
|
||||
|
||||
if(len==uprv_strlen(string))
|
||||
if(len==stringLength)
|
||||
{
|
||||
result->add(value, *status);
|
||||
uprv_free(string);
|
||||
|
@ -1454,7 +1454,8 @@ static struct SResource *
|
|||
parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
|
||||
{
|
||||
uint32_t line;
|
||||
LocalMemory<char> string(getInvariantString(state, &line, NULL, status));
|
||||
int32_t stringLength;
|
||||
LocalMemory<char> string(getInvariantString(state, &line, NULL, stringLength, status));
|
||||
if (string.isNull() || U_FAILURE(*status))
|
||||
{
|
||||
return NULL;
|
||||
|
@ -1470,46 +1471,45 @@ parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UStri
|
|||
printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
|
||||
}
|
||||
|
||||
uint32_t count = (uint32_t)uprv_strlen(string.getAlias());
|
||||
if (count > 0){
|
||||
if((count % 2)==0){
|
||||
LocalMemory<uint8_t> value;
|
||||
if (value.allocateInsteadAndCopy(count) == NULL)
|
||||
{
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
LocalMemory<uint8_t> value;
|
||||
int32_t count = 0;
|
||||
if (stringLength > 0 && value.allocateInsteadAndCopy(stringLength) == NULL)
|
||||
{
|
||||
*status = U_MEMORY_ALLOCATION_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char toConv[3] = {'\0', '\0', '\0'};
|
||||
for (uint32_t i = 0; i < count; i += 2)
|
||||
{
|
||||
toConv[0] = string[i];
|
||||
toConv[1] = string[i + 1];
|
||||
|
||||
char *stopstring;
|
||||
value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
|
||||
uint32_t len=(uint32_t)(stopstring-toConv);
|
||||
|
||||
if(len!=2)
|
||||
{
|
||||
*status=U_INVALID_CHAR_FOUND;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return bin_open(state->bundle, tag, count >> 1, value.getAlias(), NULL, comment, status);
|
||||
char toConv[3] = {'\0', '\0', '\0'};
|
||||
for (int32_t i = 0; i < stringLength;)
|
||||
{
|
||||
// Skip spaces (which may have been line endings).
|
||||
char c0 = string[i++];
|
||||
if (c0 == ' ') { continue; }
|
||||
if (i == stringLength) {
|
||||
*status=U_INVALID_CHAR_FOUND;
|
||||
error(line, "Encountered invalid binary value (odd number of hex digits)");
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
toConv[0] = c0;
|
||||
toConv[1] = string[i++];
|
||||
|
||||
char *stopstring;
|
||||
value[count++] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
|
||||
uint32_t len=(uint32_t)(stopstring-toConv);
|
||||
|
||||
if(len!=2)
|
||||
{
|
||||
*status = U_INVALID_CHAR_FOUND;
|
||||
error(line, "Encountered invalid binary value (length is odd)");
|
||||
*status=U_INVALID_CHAR_FOUND;
|
||||
error(line, "Encountered invalid binary value (not all pairs of hex digits)");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
if (count == 0) {
|
||||
warning(startline, "Encountered empty binary value");
|
||||
return bin_open(state->bundle, tag, 0, NULL, "", comment, status);
|
||||
} else {
|
||||
return bin_open(state->bundle, tag, count, value.getAlias(), NULL, comment, status);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1520,9 +1520,9 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr
|
|||
int32_t value;
|
||||
char *string;
|
||||
char *stopstring;
|
||||
uint32_t len;
|
||||
|
||||
string = getInvariantString(state, NULL, NULL, status);
|
||||
int32_t stringLength;
|
||||
string = getInvariantString(state, NULL, NULL, stringLength, status);
|
||||
|
||||
if (string == NULL || U_FAILURE(*status))
|
||||
{
|
||||
|
@ -1541,7 +1541,7 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr
|
|||
printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
|
||||
}
|
||||
|
||||
if (uprv_strlen(string) <= 0)
|
||||
if (stringLength == 0)
|
||||
{
|
||||
warning(startline, "Encountered empty integer. Default value is 0.");
|
||||
}
|
||||
|
@ -1549,8 +1549,8 @@ parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UStr
|
|||
/* Allow integer support for hexdecimal, octal digit and decimal*/
|
||||
/* and handle illegal char in the integer*/
|
||||
value = uprv_strtoul(string, &stopstring, 0);
|
||||
len=(uint32_t)(stopstring-string);
|
||||
if(len==uprv_strlen(string))
|
||||
int32_t len = (int32_t)(stopstring-string);
|
||||
if(len==stringLength)
|
||||
{
|
||||
result = int_open(state->bundle, tag, value, comment, status);
|
||||
}
|
||||
|
@ -1567,7 +1567,8 @@ static struct SResource *
|
|||
parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
|
||||
{
|
||||
uint32_t line;
|
||||
LocalMemory<char> filename(getInvariantString(state, &line, NULL, status));
|
||||
int32_t stringLength;
|
||||
LocalMemory<char> filename(getInvariantString(state, &line, NULL, stringLength, status));
|
||||
if (U_FAILURE(*status))
|
||||
{
|
||||
return NULL;
|
||||
|
@ -1628,12 +1629,11 @@ parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UStr
|
|||
|
||||
UCHARBUF *ucbuf;
|
||||
char *fullname = NULL;
|
||||
int32_t count = 0;
|
||||
const char* cp = NULL;
|
||||
const UChar* uBuffer = NULL;
|
||||
|
||||
filename = getInvariantString(state, &line, NULL, status);
|
||||
count = (int32_t)uprv_strlen(filename);
|
||||
int32_t stringLength;
|
||||
filename = getInvariantString(state, &line, NULL, stringLength, status);
|
||||
|
||||
if (U_FAILURE(*status))
|
||||
{
|
||||
|
@ -1652,7 +1652,7 @@ parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UStr
|
|||
printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline);
|
||||
}
|
||||
|
||||
fullname = (char *) uprv_malloc(state->inputdirLength + count + 2);
|
||||
fullname = (char *) uprv_malloc(state->inputdirLength + stringLength + 2);
|
||||
/* test for NULL */
|
||||
if(fullname == NULL)
|
||||
{
|
||||
|
|
|
@ -504,7 +504,6 @@ main(int argc, char* argv[]) {
|
|||
if (o.files != NULL) {
|
||||
pkg_deleteList(o.files);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -544,6 +543,7 @@ normal_command_mode:
|
|||
int result = system(cmd);
|
||||
if (result != 0) {
|
||||
fprintf(stderr, "-- return status = %d\n", result);
|
||||
result = 1; // system() result code is platform specific.
|
||||
}
|
||||
|
||||
if (cmd != cmdBuffer && cmd != command) {
|
||||
|
|
|
@ -368,6 +368,7 @@
|
|||
<pathelement location="${icu4j.regiondata.jar}"/>
|
||||
<pathelement location="${icu4j.translit.jar}"/>
|
||||
<pathelement location="${icu4j.test-framework.jar}"/>
|
||||
<pathelement location="${icu4j.tools.jar}"/>
|
||||
<pathelement location="${icu4j.core-tests.jar}"/>
|
||||
<pathelement location="${icu4j.collate-tests.jar}"/>
|
||||
<pathelement location="${icu4j.charset-tests.jar}"/>
|
||||
|
@ -570,7 +571,7 @@
|
|||
</icu-junit>
|
||||
</target>
|
||||
|
||||
<target name="packagingCheck" depends="info, core, packaging-tests" description="Run packaging tests">
|
||||
<target name="packagingCheck" depends="info, core, langdata, regiondata, packaging-tests" description="Run packaging tests">
|
||||
<antcall target="_packagingCheckNoLangData"/>
|
||||
<antcall target="_packagingCheckNoRegionData"/>
|
||||
<antcall target="_packagingCheckNoLangNorRegionData"/>
|
||||
|
@ -1201,7 +1202,7 @@
|
|||
</ant>
|
||||
</target>
|
||||
|
||||
<target name="core-tests" depends="core, test-framework" description="Build core tests">
|
||||
<target name="core-tests" depends="core, test-framework, tools" description="Build core tests">
|
||||
<ant dir="${icu4j.core-tests.dir}" inheritAll="false">
|
||||
<reference refid="junit.jars"/>
|
||||
</ant>
|
||||
|
@ -1249,7 +1250,7 @@
|
|||
<ant dir="${icu4j.build-tools.dir}" inheritAll="false"/>
|
||||
</target>
|
||||
|
||||
<target name="tools" depends="core, core-tests, collate, translit, translit-tests" description="Build tool classes">
|
||||
<target name="tools" depends="core, collate, translit" description="Build tool classes">
|
||||
<ant dir="${icu4j.tools.dir}" inheritAll="false"/>
|
||||
</target>
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ class CharsetUTF7 extends CharsetICU {
|
|||
|
||||
public CharsetUTF7(String icuCanonicalName, String javaCanonicalName, String[] aliases) {
|
||||
super(icuCanonicalName, javaCanonicalName, aliases);
|
||||
maxBytesPerChar=4; /* max 3 bytes per code unit from UTF-7 (base64) */
|
||||
maxBytesPerChar=5; /* max 3 bytes per code unit from UTF-7 (base64) plus SIN SOUT */
|
||||
minBytesPerChar=1;
|
||||
maxCharsPerByte=1;
|
||||
|
||||
|
|
|
@ -1,22 +1,17 @@
|
|||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.number;
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
import java.text.AttributedCharacterIterator;
|
||||
import java.text.AttributedString;
|
||||
import java.text.FieldPosition;
|
||||
import java.text.Format.Field;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import com.ibm.icu.impl.StaticUnicodeSets;
|
||||
import com.ibm.icu.text.ConstrainedFieldPosition;
|
||||
// NumberFormat is imported only for the toDebugString() implementation.
|
||||
import com.ibm.icu.text.NumberFormat;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
/**
|
||||
* A StringBuilder optimized for number formatting. It implements the following key features beyond a
|
||||
* A StringBuilder optimized for formatting. It implements the following key features beyond a
|
||||
* normal JDK StringBuilder:
|
||||
*
|
||||
* <ol>
|
||||
|
@ -24,33 +19,37 @@ import com.ibm.icu.text.UnicodeSet;
|
|||
* <li>Keeps tracks of Fields in an efficient manner.
|
||||
* <li>String operations are fast-pathed to code point operations when possible.
|
||||
* </ol>
|
||||
*
|
||||
* See also FormattedValueStringBuilderImpl.
|
||||
*
|
||||
* @author sffc (Shane Carr)
|
||||
*/
|
||||
public class NumberStringBuilder implements CharSequence {
|
||||
public class FormattedStringBuilder implements CharSequence {
|
||||
|
||||
/** A constant, empty NumberStringBuilder. Do NOT call mutative operations on this. */
|
||||
public static final NumberStringBuilder EMPTY = new NumberStringBuilder();
|
||||
/** A constant, empty FormattedStringBuilder. Do NOT call mutative operations on this. */
|
||||
public static final FormattedStringBuilder EMPTY = new FormattedStringBuilder();
|
||||
|
||||
private char[] chars;
|
||||
private Field[] fields;
|
||||
private int zero;
|
||||
private int length;
|
||||
char[] chars;
|
||||
Field[] fields;
|
||||
int zero;
|
||||
int length;
|
||||
|
||||
public NumberStringBuilder() {
|
||||
public FormattedStringBuilder() {
|
||||
this(40);
|
||||
}
|
||||
|
||||
public NumberStringBuilder(int capacity) {
|
||||
public FormattedStringBuilder(int capacity) {
|
||||
chars = new char[capacity];
|
||||
fields = new Field[capacity];
|
||||
zero = capacity / 2;
|
||||
length = 0;
|
||||
}
|
||||
|
||||
public NumberStringBuilder(NumberStringBuilder source) {
|
||||
public FormattedStringBuilder(FormattedStringBuilder source) {
|
||||
copyFrom(source);
|
||||
}
|
||||
|
||||
public void copyFrom(NumberStringBuilder source) {
|
||||
public void copyFrom(FormattedStringBuilder source) {
|
||||
chars = Arrays.copyOf(source.chars, source.chars.length);
|
||||
fields = Arrays.copyOf(source.fields, source.fields.length);
|
||||
zero = source.zero;
|
||||
|
@ -101,7 +100,7 @@ public class NumberStringBuilder implements CharSequence {
|
|||
return Character.codePointBefore(chars, zero + index, zero);
|
||||
}
|
||||
|
||||
public NumberStringBuilder clear() {
|
||||
public FormattedStringBuilder clear() {
|
||||
zero = getCapacity() / 2;
|
||||
length = 0;
|
||||
return this;
|
||||
|
@ -237,20 +236,20 @@ public class NumberStringBuilder implements CharSequence {
|
|||
}
|
||||
|
||||
/**
|
||||
* Appends the contents of another {@link NumberStringBuilder} to the end of this instance.
|
||||
* Appends the contents of another {@link FormattedStringBuilder} to the end of this instance.
|
||||
*
|
||||
* @return The number of chars added, which is the length of the other {@link NumberStringBuilder}.
|
||||
* @return The number of chars added, which is the length of the other {@link FormattedStringBuilder}.
|
||||
*/
|
||||
public int append(NumberStringBuilder other) {
|
||||
public int append(FormattedStringBuilder other) {
|
||||
return insert(length, other);
|
||||
}
|
||||
|
||||
/**
|
||||
* Inserts the contents of another {@link NumberStringBuilder} into this instance at the given index.
|
||||
* Inserts the contents of another {@link FormattedStringBuilder} into this instance at the given index.
|
||||
*
|
||||
* @return The number of chars added, which is the length of the other {@link NumberStringBuilder}.
|
||||
* @return The number of chars added, which is the length of the other {@link FormattedStringBuilder}.
|
||||
*/
|
||||
public int insert(int index, NumberStringBuilder other) {
|
||||
public int insert(int index, FormattedStringBuilder other) {
|
||||
if (this == other) {
|
||||
throw new IllegalArgumentException("Cannot call insert/append on myself");
|
||||
}
|
||||
|
@ -365,14 +364,14 @@ public class NumberStringBuilder implements CharSequence {
|
|||
return chars.length;
|
||||
}
|
||||
|
||||
/** Note: this returns a NumberStringBuilder. Do not return publicly. */
|
||||
/** Note: this returns a FormattedStringBuilder. Do not return publicly. */
|
||||
@Override
|
||||
@Deprecated
|
||||
public CharSequence subSequence(int start, int end) {
|
||||
assert start >= 0;
|
||||
assert end <= length;
|
||||
assert end >= start;
|
||||
NumberStringBuilder other = new NumberStringBuilder(this);
|
||||
FormattedStringBuilder other = new FormattedStringBuilder(this);
|
||||
other.zero = zero + start;
|
||||
other.length = end - start;
|
||||
return other;
|
||||
|
@ -420,20 +419,22 @@ public class NumberStringBuilder implements CharSequence {
|
|||
*
|
||||
* <p>
|
||||
* For example, if the string is "-12.345", the debug string will be something like
|
||||
* "<NumberStringBuilder [-123.45] [-iii.ff]>"
|
||||
* "<FormattedStringBuilder [-123.45] [-iii.ff]>"
|
||||
*
|
||||
* @return A string for debugging purposes.
|
||||
*/
|
||||
public String toDebugString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("<NumberStringBuilder [");
|
||||
sb.append("<FormattedStringBuilder [");
|
||||
sb.append(this.toString());
|
||||
sb.append("] [");
|
||||
for (int i = zero; i < zero + length; i++) {
|
||||
if (fields[i] == null) {
|
||||
sb.append('n');
|
||||
} else {
|
||||
} else if (fieldToDebugChar.containsKey(fields[i])) {
|
||||
sb.append(fieldToDebugChar.get(fields[i]));
|
||||
} else {
|
||||
sb.append('?');
|
||||
}
|
||||
}
|
||||
sb.append("]>");
|
||||
|
@ -475,7 +476,7 @@ public class NumberStringBuilder implements CharSequence {
|
|||
* The instance to compare.
|
||||
* @return Whether the contents of this instance is currently equal to the given instance.
|
||||
*/
|
||||
public boolean contentEquals(NumberStringBuilder other) {
|
||||
public boolean contentEquals(FormattedStringBuilder other) {
|
||||
if (length != other.length)
|
||||
return false;
|
||||
for (int i = 0; i < length; i++) {
|
||||
|
@ -495,170 +496,4 @@ public class NumberStringBuilder implements CharSequence {
|
|||
public boolean equals(Object other) {
|
||||
throw new UnsupportedOperationException("Don't call #hashCode() or #equals() on a mutable.");
|
||||
}
|
||||
|
||||
public boolean nextFieldPosition(FieldPosition fp) {
|
||||
java.text.Format.Field rawField = fp.getFieldAttribute();
|
||||
|
||||
if (rawField == null) {
|
||||
// Backwards compatibility: read from fp.getField()
|
||||
if (fp.getField() == NumberFormat.INTEGER_FIELD) {
|
||||
rawField = NumberFormat.Field.INTEGER;
|
||||
} else if (fp.getField() == NumberFormat.FRACTION_FIELD) {
|
||||
rawField = NumberFormat.Field.FRACTION;
|
||||
} else {
|
||||
// No field is set
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(rawField instanceof NumberFormat.Field)) {
|
||||
throw new IllegalArgumentException(
|
||||
"You must pass an instance of com.ibm.icu.text.NumberFormat.Field as your FieldPosition attribute. You passed: "
|
||||
+ rawField.getClass().toString());
|
||||
}
|
||||
|
||||
ConstrainedFieldPosition cfpos = new ConstrainedFieldPosition();
|
||||
cfpos.constrainField(rawField);
|
||||
cfpos.setState(rawField, null, fp.getBeginIndex(), fp.getEndIndex());
|
||||
if (nextPosition(cfpos, null)) {
|
||||
fp.setBeginIndex(cfpos.getStart());
|
||||
fp.setEndIndex(cfpos.getLimit());
|
||||
return true;
|
||||
}
|
||||
|
||||
// Special case: fraction should start after integer if fraction is not present
|
||||
if (rawField == NumberFormat.Field.FRACTION && fp.getEndIndex() == 0) {
|
||||
boolean inside = false;
|
||||
int i = zero;
|
||||
for (; i < zero + length; i++) {
|
||||
if (isIntOrGroup(fields[i]) || fields[i] == NumberFormat.Field.DECIMAL_SEPARATOR) {
|
||||
inside = true;
|
||||
} else if (inside) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
fp.setBeginIndex(i - zero);
|
||||
fp.setEndIndex(i - zero);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public AttributedCharacterIterator toCharacterIterator(Field numericField) {
|
||||
ConstrainedFieldPosition cfpos = new ConstrainedFieldPosition();
|
||||
AttributedString as = new AttributedString(toString());
|
||||
while (this.nextPosition(cfpos, numericField)) {
|
||||
// Backwards compatibility: field value = field
|
||||
as.addAttribute(cfpos.getField(), cfpos.getField(), cfpos.getStart(), cfpos.getLimit());
|
||||
}
|
||||
return as.getIterator();
|
||||
}
|
||||
|
||||
static class NullField extends Field {
|
||||
private static final long serialVersionUID = 1L;
|
||||
static final NullField END = new NullField("end");
|
||||
private NullField(String name) {
|
||||
super(name);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of nextPosition consistent with the contract of FormattedValue.
|
||||
*
|
||||
* @param cfpos
|
||||
* The argument passed to the public API.
|
||||
* @param numericField
|
||||
* Optional. If non-null, apply this field to the entire numeric portion of the string.
|
||||
* @return See FormattedValue#nextPosition.
|
||||
*/
|
||||
public boolean nextPosition(ConstrainedFieldPosition cfpos, Field numericField) {
|
||||
int fieldStart = -1;
|
||||
Field currField = null;
|
||||
for (int i = zero + cfpos.getLimit(); i <= zero + length; i++) {
|
||||
Field _field = (i < zero + length) ? fields[i] : NullField.END;
|
||||
// Case 1: currently scanning a field.
|
||||
if (currField != null) {
|
||||
if (currField != _field) {
|
||||
int end = i - zero;
|
||||
// Grouping separators can be whitespace; don't throw them out!
|
||||
if (currField != NumberFormat.Field.GROUPING_SEPARATOR) {
|
||||
end = trimBack(end);
|
||||
}
|
||||
if (end <= fieldStart) {
|
||||
// Entire field position is ignorable; skip.
|
||||
fieldStart = -1;
|
||||
currField = null;
|
||||
i--; // look at this index again
|
||||
continue;
|
||||
}
|
||||
int start = fieldStart;
|
||||
if (currField != NumberFormat.Field.GROUPING_SEPARATOR) {
|
||||
start = trimFront(start);
|
||||
}
|
||||
cfpos.setState(currField, null, start, end);
|
||||
return true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// Special case: coalesce the INTEGER if we are pointing at the end of the INTEGER.
|
||||
if (cfpos.matchesField(NumberFormat.Field.INTEGER, null)
|
||||
&& i > zero
|
||||
// don't return the same field twice in a row:
|
||||
&& i - zero > cfpos.getLimit()
|
||||
&& isIntOrGroup(fields[i - 1])
|
||||
&& !isIntOrGroup(_field)) {
|
||||
int j = i - 1;
|
||||
for (; j >= zero && isIntOrGroup(fields[j]); j--) {}
|
||||
cfpos.setState(NumberFormat.Field.INTEGER, null, j - zero + 1, i - zero);
|
||||
return true;
|
||||
}
|
||||
// Special case: coalesce NUMERIC if we are pointing at the end of the NUMERIC.
|
||||
if (numericField != null
|
||||
&& cfpos.matchesField(numericField, null)
|
||||
&& i > zero
|
||||
// don't return the same field twice in a row:
|
||||
&& (i - zero > cfpos.getLimit() || cfpos.getField() != numericField)
|
||||
&& isNumericField(fields[i - 1])
|
||||
&& !isNumericField(_field)) {
|
||||
int j = i - 1;
|
||||
for (; j >= zero && isNumericField(fields[j]); j--) {}
|
||||
cfpos.setState(numericField, null, j - zero + 1, i - zero);
|
||||
return true;
|
||||
}
|
||||
// Special case: skip over INTEGER; will be coalesced later.
|
||||
if (_field == NumberFormat.Field.INTEGER) {
|
||||
_field = null;
|
||||
}
|
||||
// Case 2: no field starting at this position.
|
||||
if (_field == null || _field == NullField.END) {
|
||||
continue;
|
||||
}
|
||||
// Case 3: check for field starting at this position
|
||||
if (cfpos.matchesField(_field, null)) {
|
||||
fieldStart = i - zero;
|
||||
currField = _field;
|
||||
}
|
||||
}
|
||||
|
||||
assert currField == null;
|
||||
return false;
|
||||
}
|
||||
|
||||
private static boolean isIntOrGroup(Field field) {
|
||||
return field == NumberFormat.Field.INTEGER || field == NumberFormat.Field.GROUPING_SEPARATOR;
|
||||
}
|
||||
|
||||
private static boolean isNumericField(Field field) {
|
||||
return field == null || NumberFormat.Field.class.isAssignableFrom(field.getClass());
|
||||
}
|
||||
|
||||
private int trimBack(int limit) {
|
||||
return StaticUnicodeSets.get(StaticUnicodeSets.Key.DEFAULT_IGNORABLES)
|
||||
.spanBack(this, limit, UnicodeSet.SpanCondition.CONTAINED);
|
||||
}
|
||||
|
||||
private int trimFront(int start) {
|
||||
return StaticUnicodeSets.get(StaticUnicodeSets.Key.DEFAULT_IGNORABLES)
|
||||
.span(this, start, UnicodeSet.SpanCondition.CONTAINED);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,193 @@
|
|||
// © 2019 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl;
|
||||
|
||||
import java.text.AttributedCharacterIterator;
|
||||
import java.text.AttributedString;
|
||||
import java.text.FieldPosition;
|
||||
import java.text.Format.Field;
|
||||
|
||||
import com.ibm.icu.text.ConstrainedFieldPosition;
|
||||
import com.ibm.icu.text.NumberFormat;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
/**
|
||||
* Implementation of FormattedValue based on FormattedStringBuilder.
|
||||
*
|
||||
* The implementation currently revolves around numbers and number fields.
|
||||
* However, it can be generalized in the future when there is a need.
|
||||
*
|
||||
* In C++, this implements FormattedValue. In Java, it is a stateless
|
||||
* collection of static functions to avoid having to use nested objects.
|
||||
*
|
||||
* @author sffc (Shane Carr)
|
||||
*/
|
||||
public class FormattedValueStringBuilderImpl {
|
||||
|
||||
|
||||
public static boolean nextFieldPosition(FormattedStringBuilder self, FieldPosition fp) {
|
||||
java.text.Format.Field rawField = fp.getFieldAttribute();
|
||||
|
||||
if (rawField == null) {
|
||||
// Backwards compatibility: read from fp.getField()
|
||||
if (fp.getField() == NumberFormat.INTEGER_FIELD) {
|
||||
rawField = NumberFormat.Field.INTEGER;
|
||||
} else if (fp.getField() == NumberFormat.FRACTION_FIELD) {
|
||||
rawField = NumberFormat.Field.FRACTION;
|
||||
} else {
|
||||
// No field is set
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(rawField instanceof NumberFormat.Field)) {
|
||||
throw new IllegalArgumentException(
|
||||
"You must pass an instance of com.ibm.icu.text.NumberFormat.Field as your FieldPosition attribute. You passed: "
|
||||
+ rawField.getClass().toString());
|
||||
}
|
||||
|
||||
ConstrainedFieldPosition cfpos = new ConstrainedFieldPosition();
|
||||
cfpos.constrainField(rawField);
|
||||
cfpos.setState(rawField, null, fp.getBeginIndex(), fp.getEndIndex());
|
||||
if (nextPosition(self, cfpos, null)) {
|
||||
fp.setBeginIndex(cfpos.getStart());
|
||||
fp.setEndIndex(cfpos.getLimit());
|
||||
return true;
|
||||
}
|
||||
|
||||
// Special case: fraction should start after integer if fraction is not present
|
||||
if (rawField == NumberFormat.Field.FRACTION && fp.getEndIndex() == 0) {
|
||||
boolean inside = false;
|
||||
int i = self.zero;
|
||||
for (; i < self.zero + self.length; i++) {
|
||||
if (isIntOrGroup(self.fields[i]) || self.fields[i] == NumberFormat.Field.DECIMAL_SEPARATOR) {
|
||||
inside = true;
|
||||
} else if (inside) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
fp.setBeginIndex(i - self.zero);
|
||||
fp.setEndIndex(i - self.zero);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public static AttributedCharacterIterator toCharacterIterator(FormattedStringBuilder self, Field numericField) {
|
||||
ConstrainedFieldPosition cfpos = new ConstrainedFieldPosition();
|
||||
AttributedString as = new AttributedString(self.toString());
|
||||
while (nextPosition(self, cfpos, numericField)) {
|
||||
// Backwards compatibility: field value = field
|
||||
as.addAttribute(cfpos.getField(), cfpos.getField(), cfpos.getStart(), cfpos.getLimit());
|
||||
}
|
||||
return as.getIterator();
|
||||
}
|
||||
|
||||
static class NullField extends Field {
|
||||
private static final long serialVersionUID = 1L;
|
||||
static final NullField END = new NullField("end");
|
||||
private NullField(String name) {
|
||||
super(name);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of nextPosition consistent with the contract of FormattedValue.
|
||||
*
|
||||
* @param cfpos
|
||||
* The argument passed to the public API.
|
||||
* @param numericField
|
||||
* Optional. If non-null, apply this field to the entire numeric portion of the string.
|
||||
* @return See FormattedValue#nextPosition.
|
||||
*/
|
||||
public static boolean nextPosition(FormattedStringBuilder self, ConstrainedFieldPosition cfpos, Field numericField) {
|
||||
int fieldStart = -1;
|
||||
Field currField = null;
|
||||
for (int i = self.zero + cfpos.getLimit(); i <= self.zero + self.length; i++) {
|
||||
Field _field = (i < self.zero + self.length) ? self.fields[i] : NullField.END;
|
||||
// Case 1: currently scanning a field.
|
||||
if (currField != null) {
|
||||
if (currField != _field) {
|
||||
int end = i - self.zero;
|
||||
// Grouping separators can be whitespace; don't throw them out!
|
||||
if (currField != NumberFormat.Field.GROUPING_SEPARATOR) {
|
||||
end = trimBack(self, end);
|
||||
}
|
||||
if (end <= fieldStart) {
|
||||
// Entire field position is ignorable; skip.
|
||||
fieldStart = -1;
|
||||
currField = null;
|
||||
i--; // look at this index again
|
||||
continue;
|
||||
}
|
||||
int start = fieldStart;
|
||||
if (currField != NumberFormat.Field.GROUPING_SEPARATOR) {
|
||||
start = trimFront(self, start);
|
||||
}
|
||||
cfpos.setState(currField, null, start, end);
|
||||
return true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// Special case: coalesce the INTEGER if we are pointing at the end of the INTEGER.
|
||||
if (cfpos.matchesField(NumberFormat.Field.INTEGER, null)
|
||||
&& i > self.zero
|
||||
// don't return the same field twice in a row:
|
||||
&& i - self.zero > cfpos.getLimit()
|
||||
&& isIntOrGroup(self.fields[i - 1])
|
||||
&& !isIntOrGroup(_field)) {
|
||||
int j = i - 1;
|
||||
for (; j >= self.zero && isIntOrGroup(self.fields[j]); j--) {}
|
||||
cfpos.setState(NumberFormat.Field.INTEGER, null, j - self.zero + 1, i - self.zero);
|
||||
return true;
|
||||
}
|
||||
// Special case: coalesce NUMERIC if we are pointing at the end of the NUMERIC.
|
||||
if (numericField != null
|
||||
&& cfpos.matchesField(numericField, null)
|
||||
&& i > self.zero
|
||||
// don't return the same field twice in a row:
|
||||
&& (i - self.zero > cfpos.getLimit() || cfpos.getField() != numericField)
|
||||
&& isNumericField(self.fields[i - 1])
|
||||
&& !isNumericField(_field)) {
|
||||
int j = i - 1;
|
||||
for (; j >= self.zero && isNumericField(self.fields[j]); j--) {}
|
||||
cfpos.setState(numericField, null, j - self.zero + 1, i - self.zero);
|
||||
return true;
|
||||
}
|
||||
// Special case: skip over INTEGER; will be coalesced later.
|
||||
if (_field == NumberFormat.Field.INTEGER) {
|
||||
_field = null;
|
||||
}
|
||||
// Case 2: no field starting at this position.
|
||||
if (_field == null || _field == NullField.END) {
|
||||
continue;
|
||||
}
|
||||
// Case 3: check for field starting at this position
|
||||
if (cfpos.matchesField(_field, null)) {
|
||||
fieldStart = i - self.zero;
|
||||
currField = _field;
|
||||
}
|
||||
}
|
||||
|
||||
assert currField == null;
|
||||
return false;
|
||||
}
|
||||
|
||||
private static boolean isIntOrGroup(Field field) {
|
||||
return field == NumberFormat.Field.INTEGER || field == NumberFormat.Field.GROUPING_SEPARATOR;
|
||||
}
|
||||
|
||||
private static boolean isNumericField(Field field) {
|
||||
return field == null || NumberFormat.Field.class.isAssignableFrom(field.getClass());
|
||||
}
|
||||
|
||||
private static int trimBack(FormattedStringBuilder self, int limit) {
|
||||
return StaticUnicodeSets.get(StaticUnicodeSets.Key.DEFAULT_IGNORABLES)
|
||||
.spanBack(self, limit, UnicodeSet.SpanCondition.CONTAINED);
|
||||
}
|
||||
|
||||
private static int trimFront(FormattedStringBuilder self, int start) {
|
||||
return StaticUnicodeSets.get(StaticUnicodeSets.Key.DEFAULT_IGNORABLES)
|
||||
.span(self, start, UnicodeSet.SpanCondition.CONTAINED);
|
||||
}
|
||||
}
|
|
@ -35,17 +35,15 @@ public final class IDNA2003 {
|
|||
private static final StringPrep namePrep = StringPrep.getInstance(StringPrep.RFC3491_NAMEPREP);
|
||||
|
||||
private static boolean startsWithPrefix(StringBuffer src){
|
||||
boolean startsWithPrefix = true;
|
||||
|
||||
if(src.length() < ACE_PREFIX.length){
|
||||
return false;
|
||||
}
|
||||
for(int i=0; i<ACE_PREFIX.length;i++){
|
||||
if(toASCIILower(src.charAt(i)) != ACE_PREFIX[i]){
|
||||
startsWithPrefix = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return startsWithPrefix;
|
||||
return true;
|
||||
}
|
||||
|
||||
private static char toASCIILower(char ch){
|
||||
|
@ -168,6 +166,7 @@ public final class IDNA2003 {
|
|||
while((ch = src.next())!= UCharacterIterator.DONE){
|
||||
if(ch> 0x7f){
|
||||
srcIsASCII = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
int failPos = -1;
|
||||
|
|
|
@ -6,14 +6,16 @@ import com.ibm.icu.lang.UCharacter;
|
|||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
/**
|
||||
* A mutable String wrapper with a variable offset and length and support for case folding.
|
||||
* <p>
|
||||
* The charAt, length, and subSequence methods all operate relative to the fixed offset into the String.
|
||||
* <p>
|
||||
* CAUTION: Since this class is mutable, it must not be used anywhere that an immutable object is
|
||||
* required, like in a cache or as the key of a hash map.
|
||||
* A mutable String wrapper with a variable offset and length and
|
||||
* support for case folding. The charAt, length, and subSequence methods all
|
||||
* operate relative to the fixed offset into the String.
|
||||
*
|
||||
* @author sffc
|
||||
* Intended to be useful for parsing.
|
||||
*
|
||||
* CAUTION: Since this class is mutable, it must not be used anywhere that an
|
||||
* immutable object is required, like in a cache or as the key of a hash map.
|
||||
*
|
||||
* @author sffc (Shane Carr)
|
||||
*/
|
||||
public class StringSegment implements CharSequence {
|
||||
private final String str;
|
||||
|
|
|
@ -4,16 +4,18 @@ package com.ibm.icu.impl.locale;
|
|||
|
||||
import java.util.Objects;
|
||||
|
||||
final class LSR {
|
||||
static final int REGION_INDEX_LIMIT = 1000 + 26 * 26;
|
||||
public final class LSR {
|
||||
public static final int REGION_INDEX_LIMIT = 1001 + 26 * 26;
|
||||
|
||||
final String language;
|
||||
final String script;
|
||||
final String region;
|
||||
public static final boolean DEBUG_OUTPUT = false;
|
||||
|
||||
public final String language;
|
||||
public final String script;
|
||||
public final String region;
|
||||
/** Index for region, negative if ill-formed. @see indexForRegion */
|
||||
final int regionIndex;
|
||||
|
||||
LSR(String language, String script, String region) {
|
||||
public LSR(String language, String script, String region) {
|
||||
this.language = language;
|
||||
this.script = script;
|
||||
this.region = region;
|
||||
|
@ -21,27 +23,27 @@ final class LSR {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns a non-negative index for a well-formed region code.
|
||||
* Returns a positive index (>0) for a well-formed region code.
|
||||
* Do not rely on a particular region->index mapping; it may change.
|
||||
* Returns -1 for ill-formed strings.
|
||||
* Returns 0 for ill-formed strings.
|
||||
*/
|
||||
static final int indexForRegion(String region) {
|
||||
public static final int indexForRegion(String region) {
|
||||
if (region.length() == 2) {
|
||||
int a = region.charAt(0) - 'A';
|
||||
if (a < 0 || 25 < a) { return -1; }
|
||||
if (a < 0 || 25 < a) { return 0; }
|
||||
int b = region.charAt(1) - 'A';
|
||||
if (b < 0 || 25 < b) { return -1; }
|
||||
return 26 * a + b + 1000;
|
||||
if (b < 0 || 25 < b) { return 0; }
|
||||
return 26 * a + b + 1001;
|
||||
} else if (region.length() == 3) {
|
||||
int a = region.charAt(0) - '0';
|
||||
if (a < 0 || 9 < a) { return -1; }
|
||||
if (a < 0 || 9 < a) { return 0; }
|
||||
int b = region.charAt(1) - '0';
|
||||
if (b < 0 || 9 < b) { return -1; }
|
||||
if (b < 0 || 9 < b) { return 0; }
|
||||
int c = region.charAt(2) - '0';
|
||||
if (c < 0 || 9 < c) { return -1; }
|
||||
return (10 * a + b) * 10 + c;
|
||||
if (c < 0 || 9 < c) { return 0; }
|
||||
return (10 * a + b) * 10 + c + 1;
|
||||
}
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -324,7 +324,11 @@ public class LanguageTag {
|
|||
if (_variants.isEmpty()) {
|
||||
_variants = new ArrayList<String>(3);
|
||||
}
|
||||
_variants.add(s);
|
||||
// Ignore repeated variant
|
||||
s = s.toUpperCase();
|
||||
if (!_variants.contains(s)) {
|
||||
_variants.add(s);
|
||||
}
|
||||
sts._parseLength = itr.currentEnd();
|
||||
itr.next();
|
||||
}
|
||||
|
@ -343,7 +347,7 @@ public class LanguageTag {
|
|||
String s = itr.current();
|
||||
if (isExtensionSingleton(s)) {
|
||||
int start = itr.currentStart();
|
||||
String singleton = s;
|
||||
String singleton = s.toLowerCase();
|
||||
StringBuilder sb = new StringBuilder(singleton);
|
||||
|
||||
itr.next();
|
||||
|
@ -367,7 +371,14 @@ public class LanguageTag {
|
|||
if (_extensions.size() == 0) {
|
||||
_extensions = new ArrayList<String>(4);
|
||||
}
|
||||
_extensions.add(sb.toString());
|
||||
// Ignore the extension if it is already in _extensions.
|
||||
boolean alreadyHas = false;
|
||||
for (String extension : _extensions) {
|
||||
alreadyHas |= extension.charAt(0) == sb.charAt(0);
|
||||
}
|
||||
if (!alreadyHas) {
|
||||
_extensions.add(sb.toString());
|
||||
}
|
||||
found = true;
|
||||
} else {
|
||||
break;
|
||||
|
|
|
@ -2,11 +2,20 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.locale;
|
||||
|
||||
import java.util.LinkedHashMap;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.MissingResourceException;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import com.ibm.icu.impl.ICUData;
|
||||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.impl.UResource;
|
||||
import com.ibm.icu.util.BytesTrie;
|
||||
import com.ibm.icu.util.LocaleMatcher.FavorSubtag;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
/**
|
||||
|
@ -14,9 +23,21 @@ import com.ibm.icu.util.ULocale;
|
|||
* Mostly but not only the data for mapping locales to their maximized forms.
|
||||
*/
|
||||
public class LocaleDistance {
|
||||
/** Distance value bit flag, set by the builder. */
|
||||
public static final int DISTANCE_SKIP_SCRIPT = 0x80;
|
||||
/** Distance value bit flag, set by trieNext(). */
|
||||
private static final int DISTANCE_IS_FINAL = 0x100;
|
||||
private static final int DISTANCE_IS_FINAL_OR_SKIP_SCRIPT =
|
||||
DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT;
|
||||
// Indexes into array of distances.
|
||||
public static final int IX_DEF_LANG_DISTANCE = 0;
|
||||
public static final int IX_DEF_SCRIPT_DISTANCE = 1;
|
||||
public static final int IX_DEF_REGION_DISTANCE = 2;
|
||||
public static final int IX_MIN_REGION_DISTANCE = 3;
|
||||
public static final int IX_LIMIT = 4;
|
||||
private static final int ABOVE_THRESHOLD = 100;
|
||||
|
||||
private static final boolean DEBUG_OUTPUT = false;
|
||||
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
|
||||
|
||||
// The trie maps each dlang+slang+dscript+sscript+dregion+sregion
|
||||
// (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
|
||||
|
@ -28,7 +49,7 @@ public class LocaleDistance {
|
|||
* Maps each region to zero or more single-character partitions.
|
||||
*/
|
||||
private final byte[] regionToPartitionsIndex;
|
||||
private final String[][] partitionArrays;
|
||||
private final String[] partitionArrays;
|
||||
|
||||
/**
|
||||
* Used to get the paradigm region for a cluster, if there is one.
|
||||
|
@ -38,49 +59,127 @@ public class LocaleDistance {
|
|||
private final int defaultLanguageDistance;
|
||||
private final int defaultScriptDistance;
|
||||
private final int defaultRegionDistance;
|
||||
private final int minRegionDistance;
|
||||
private final int defaultDemotionPerDesiredLocale;
|
||||
|
||||
// TODO: Load prebuilt data from a resource bundle
|
||||
// to avoid the dependency on the builder code.
|
||||
// VisibleForTesting
|
||||
public static final LocaleDistance INSTANCE = LocaleDistanceBuilder.build();
|
||||
public static final class Data {
|
||||
public byte[] trie;
|
||||
public byte[] regionToPartitionsIndex;
|
||||
public String[] partitionArrays;
|
||||
public Set<LSR> paradigmLSRs;
|
||||
public int[] distances;
|
||||
|
||||
public Data(byte[] trie,
|
||||
byte[] regionToPartitionsIndex, String[] partitionArrays,
|
||||
Set<LSR> paradigmLSRs, int[] distances) {
|
||||
this.trie = trie;
|
||||
this.regionToPartitionsIndex = regionToPartitionsIndex;
|
||||
this.partitionArrays = partitionArrays;
|
||||
this.paradigmLSRs = paradigmLSRs;
|
||||
this.distances = distances;
|
||||
}
|
||||
|
||||
private static UResource.Value getValue(UResource.Table table,
|
||||
String key, UResource.Value value) {
|
||||
if (!table.findValue(key, value)) {
|
||||
throw new MissingResourceException(
|
||||
"langInfo.res missing data", "", "match/" + key);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
public static Data load() throws MissingResourceException {
|
||||
ICUResourceBundle langInfo = ICUResourceBundle.getBundleInstance(
|
||||
ICUData.ICU_BASE_NAME, "langInfo",
|
||||
ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
|
||||
UResource.Value value = langInfo.getValueWithFallback("match");
|
||||
UResource.Table matchTable = value.getTable();
|
||||
|
||||
ByteBuffer buffer = getValue(matchTable, "trie", value).getBinary();
|
||||
byte[] trie = new byte[buffer.remaining()];
|
||||
buffer.get(trie);
|
||||
|
||||
buffer = getValue(matchTable, "regionToPartitions", value).getBinary();
|
||||
byte[] regionToPartitions = new byte[buffer.remaining()];
|
||||
buffer.get(regionToPartitions);
|
||||
if (regionToPartitions.length < LSR.REGION_INDEX_LIMIT) {
|
||||
throw new MissingResourceException(
|
||||
"langInfo.res binary data too short", "", "match/regionToPartitions");
|
||||
}
|
||||
|
||||
String[] partitions = getValue(matchTable, "partitions", value).getStringArray();
|
||||
|
||||
Set<LSR> paradigmLSRs;
|
||||
if (matchTable.findValue("paradigms", value)) {
|
||||
String[] paradigms = value.getStringArray();
|
||||
paradigmLSRs = new HashSet<>(paradigms.length / 3);
|
||||
for (int i = 0; i < paradigms.length; i += 3) {
|
||||
paradigmLSRs.add(new LSR(paradigms[i], paradigms[i + 1], paradigms[i + 2]));
|
||||
}
|
||||
} else {
|
||||
paradigmLSRs = Collections.emptySet();
|
||||
}
|
||||
|
||||
int[] distances = getValue(matchTable, "distances", value).getIntVector();
|
||||
if (distances.length < IX_LIMIT) {
|
||||
throw new MissingResourceException(
|
||||
"langInfo.res intvector too short", "", "match/distances");
|
||||
}
|
||||
|
||||
return new Data(trie, regionToPartitions, partitions, paradigmLSRs, distances);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (this == other) { return true; }
|
||||
if (!getClass().equals(other.getClass())) { return false; }
|
||||
Data od = (Data)other;
|
||||
return Arrays.equals(trie, od.trie) &&
|
||||
Arrays.equals(regionToPartitionsIndex, od.regionToPartitionsIndex) &&
|
||||
Arrays.equals(partitionArrays, od.partitionArrays) &&
|
||||
paradigmLSRs.equals(od.paradigmLSRs) &&
|
||||
Arrays.equals(distances, od.distances);
|
||||
}
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
public static final LocaleDistance INSTANCE = new LocaleDistance(Data.load());
|
||||
|
||||
private LocaleDistance(Data data) {
|
||||
this.trie = new BytesTrie(data.trie, 0);
|
||||
this.regionToPartitionsIndex = data.regionToPartitionsIndex;
|
||||
this.partitionArrays = data.partitionArrays;
|
||||
this.paradigmLSRs = data.paradigmLSRs;
|
||||
defaultLanguageDistance = data.distances[IX_DEF_LANG_DISTANCE];
|
||||
defaultScriptDistance = data.distances[IX_DEF_SCRIPT_DISTANCE];
|
||||
defaultRegionDistance = data.distances[IX_DEF_REGION_DISTANCE];
|
||||
this.minRegionDistance = data.distances[IX_MIN_REGION_DISTANCE];
|
||||
|
||||
LSR en = new LSR("en", "Latn", "US");
|
||||
LSR enGB = new LSR("en", "Latn", "GB");
|
||||
defaultDemotionPerDesiredLocale = getBestIndexAndDistance(en, new LSR[] { enGB },
|
||||
50, FavorSubtag.LANGUAGE) & 0xff;
|
||||
|
||||
LocaleDistance(BytesTrie trie,
|
||||
byte[] regionToPartitionsIndex, String[][] partitionArrays,
|
||||
Set<LSR> paradigmLSRs) {
|
||||
this.trie = trie;
|
||||
if (DEBUG_OUTPUT) {
|
||||
System.out.println("*** locale distance");
|
||||
System.out.println("defaultLanguageDistance=" + defaultLanguageDistance);
|
||||
System.out.println("defaultScriptDistance=" + defaultScriptDistance);
|
||||
System.out.println("defaultRegionDistance=" + defaultRegionDistance);
|
||||
testOnlyPrintDistanceTable();
|
||||
}
|
||||
this.regionToPartitionsIndex = regionToPartitionsIndex;
|
||||
this.partitionArrays = partitionArrays;
|
||||
this.paradigmLSRs = paradigmLSRs;
|
||||
|
||||
BytesTrie iter = new BytesTrie(trie);
|
||||
BytesTrie.Result result = iter.next('*');
|
||||
assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
|
||||
defaultLanguageDistance = iter.getValue();
|
||||
result = iter.next('*');
|
||||
assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
|
||||
defaultScriptDistance = iter.getValue();
|
||||
result = iter.next('*');
|
||||
assert result.hasValue();
|
||||
defaultRegionDistance = iter.getValue();
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
public int testOnlyDistance(ULocale desired, ULocale supported,
|
||||
int threshold, DistanceOption distanceOption) {
|
||||
int threshold, FavorSubtag favorSubtag) {
|
||||
LSR supportedLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported);
|
||||
LSR desiredLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired);
|
||||
return getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR },
|
||||
threshold, distanceOption) & 0xff;
|
||||
threshold, favorSubtag) & 0xff;
|
||||
}
|
||||
|
||||
public enum DistanceOption {REGION_FIRST, SCRIPT_FIRST}
|
||||
// NOTE: Replaced "NORMAL" with "REGION_FIRST". By default, scripts have greater weight
|
||||
// than regions, so they might be considered the "normal" case.
|
||||
|
||||
/**
|
||||
* Finds the supported LSR with the smallest distance from the desired one.
|
||||
* Equivalent LSR subtags must be normalized into a canonical form.
|
||||
|
@ -89,14 +188,13 @@ public class LocaleDistance {
|
|||
* (negative if none has a distance below the threshold),
|
||||
* and its distance (0..ABOVE_THRESHOLD) in bits 7..0.
|
||||
*/
|
||||
int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
|
||||
int threshold, DistanceOption distanceOption) {
|
||||
public int getBestIndexAndDistance(LSR desired, LSR[] supportedLsrs,
|
||||
int threshold, FavorSubtag favorSubtag) {
|
||||
BytesTrie iter = new BytesTrie(trie);
|
||||
// Look up the desired language only once for all supported LSRs.
|
||||
// Its "distance" is either a match point value of 0, or a non-match negative value.
|
||||
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
|
||||
// Set wantValue=true so that iter reads & skips the match point value.
|
||||
int desLangDistance = trieNext(iter, desired.language, true, true);
|
||||
int desLangDistance = trieNext(iter, desired.language, false);
|
||||
long desLangState = desLangDistance >= 0 && supportedLsrs.length > 1 ? iter.getState64() : 0;
|
||||
// Index of the supported LSR with the lowest distance.
|
||||
int bestIndex = -1;
|
||||
|
@ -105,26 +203,31 @@ public class LocaleDistance {
|
|||
boolean star = false;
|
||||
int distance = desLangDistance;
|
||||
if (distance >= 0) {
|
||||
assert (distance & DISTANCE_IS_FINAL) == 0;
|
||||
if (slIndex != 0) {
|
||||
iter.resetToState64(desLangState);
|
||||
}
|
||||
distance = trieNext(iter, supported.language, true, true);
|
||||
distance = trieNext(iter, supported.language, true);
|
||||
}
|
||||
// Note: The data builder verifies that there are no rules with "any" (*) language and
|
||||
// real (non *) script or region subtags.
|
||||
// This means that if the lookup for either language fails we can use
|
||||
// the default distances without further lookups.
|
||||
if (distance < 0) { // <*, *>
|
||||
int flags;
|
||||
if (distance >= 0) {
|
||||
flags = distance & DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
|
||||
distance &= ~DISTANCE_IS_FINAL_OR_SKIP_SCRIPT;
|
||||
} else { // <*, *>
|
||||
if (desired.language.equals(supported.language)) {
|
||||
distance = 0;
|
||||
} else {
|
||||
distance = defaultLanguageDistance;
|
||||
}
|
||||
flags = 0;
|
||||
star = true;
|
||||
}
|
||||
assert 0 <= distance && distance <= 100;
|
||||
boolean scriptFirst = distanceOption == DistanceOption.SCRIPT_FIRST;
|
||||
if (scriptFirst) {
|
||||
if (favorSubtag == FavorSubtag.SCRIPT) {
|
||||
distance >>= 2;
|
||||
}
|
||||
if (distance >= threshold) {
|
||||
|
@ -132,18 +235,17 @@ public class LocaleDistance {
|
|||
}
|
||||
|
||||
int scriptDistance;
|
||||
if (star) {
|
||||
if (star || flags != 0) {
|
||||
if (desired.script.equals(supported.script)) {
|
||||
scriptDistance = 0;
|
||||
} else {
|
||||
scriptDistance = defaultScriptDistance;
|
||||
}
|
||||
} else {
|
||||
scriptDistance = getDesSuppDistance(iter, iter.getState64(),
|
||||
desired.script, supported.script, false);
|
||||
}
|
||||
if (scriptFirst) {
|
||||
scriptDistance >>= 1;
|
||||
scriptDistance = getDesSuppScriptDistance(iter, iter.getState64(),
|
||||
desired.script, supported.script);
|
||||
flags = scriptDistance & DISTANCE_IS_FINAL;
|
||||
scriptDistance &= ~DISTANCE_IS_FINAL;
|
||||
}
|
||||
distance += scriptDistance;
|
||||
if (distance >= threshold) {
|
||||
|
@ -152,27 +254,24 @@ public class LocaleDistance {
|
|||
|
||||
if (desired.region.equals(supported.region)) {
|
||||
// regionDistance = 0
|
||||
} else if (star) {
|
||||
} else if (star || (flags & DISTANCE_IS_FINAL) != 0) {
|
||||
distance += defaultRegionDistance;
|
||||
} else {
|
||||
long startState = iter.getState64();
|
||||
int remainingThreshold = threshold - distance;
|
||||
if (minRegionDistance >= remainingThreshold) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// From here on we know the regions are not equal.
|
||||
// Map each region to zero or more partitions. (zero = one empty string)
|
||||
// Map each region to zero or more partitions. (zero = one non-matching string)
|
||||
// (Each array of single-character partition strings is encoded as one string.)
|
||||
// If either side has more than one, then we find the maximum distance.
|
||||
// This could be optimized by adding some more structure, but probably not worth it.
|
||||
final String[] desiredPartitions = partitionsForRegion(desired);
|
||||
final String[] supportedPartitions = partitionsForRegion(supported);
|
||||
int regionDistance;
|
||||
|
||||
if (desiredPartitions.length > 1 || supportedPartitions.length > 1) {
|
||||
regionDistance = getRegionPartitionsDistance(iter, startState,
|
||||
desiredPartitions, supportedPartitions, threshold - distance);
|
||||
} else {
|
||||
regionDistance = getDesSuppDistance(iter, startState,
|
||||
desiredPartitions[0], supportedPartitions[0], true);
|
||||
}
|
||||
distance += regionDistance;
|
||||
distance += getRegionPartitionsDistance(
|
||||
iter, iter.getState64(),
|
||||
partitionsForRegion(desired),
|
||||
partitionsForRegion(supported),
|
||||
remainingThreshold);
|
||||
}
|
||||
if (distance < threshold) {
|
||||
if (distance == 0) {
|
||||
|
@ -185,105 +284,144 @@ public class LocaleDistance {
|
|||
return bestIndex >= 0 ? (bestIndex << 8) | threshold : 0xffffff00 | ABOVE_THRESHOLD;
|
||||
}
|
||||
|
||||
private int getRegionPartitionsDistance(BytesTrie iter, long startState,
|
||||
String[] desiredPartitions, String[] supportedPartitions, int threshold) {
|
||||
int regionDistance = -1;
|
||||
for (String dp : desiredPartitions) {
|
||||
for (String sp : supportedPartitions) {
|
||||
if (regionDistance >= 0) { // no need to reset in first iteration
|
||||
iter.resetToState64(startState);
|
||||
}
|
||||
int d = getDesSuppDistance(iter, startState, dp, sp, true);
|
||||
if (regionDistance < d) {
|
||||
if (d >= threshold) {
|
||||
return d;
|
||||
}
|
||||
regionDistance = d;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert regionDistance >= 0;
|
||||
return regionDistance;
|
||||
}
|
||||
|
||||
// Modified from
|
||||
// DistanceTable#getDistance(desired, supported, Output distanceTable, starEquals).
|
||||
private static final int getDesSuppDistance(BytesTrie iter, long startState,
|
||||
String desired, String supported, boolean finalSubtag) {
|
||||
private static final int getDesSuppScriptDistance(BytesTrie iter, long startState,
|
||||
String desired, String supported) {
|
||||
// Note: The data builder verifies that there are no <*, supported> or <desired, *> rules.
|
||||
int distance = trieNext(iter, desired, false, true);
|
||||
int distance = trieNext(iter, desired, false);
|
||||
if (distance >= 0) {
|
||||
distance = trieNext(iter, supported, true, !finalSubtag);
|
||||
distance = trieNext(iter, supported, true);
|
||||
}
|
||||
if (distance < 0) {
|
||||
BytesTrie.Result result = iter.resetToState64(startState).next('*'); // <*, *>
|
||||
assert finalSubtag ? result.hasValue() : result == BytesTrie.Result.INTERMEDIATE_VALUE;
|
||||
if (!finalSubtag && desired.equals(supported)) {
|
||||
distance = 0; // same language or script
|
||||
assert result.hasValue();
|
||||
if (desired.equals(supported)) {
|
||||
distance = 0; // same script
|
||||
} else {
|
||||
distance = iter.getValue();
|
||||
assert distance >= 0;
|
||||
}
|
||||
if (result == BytesTrie.Result.FINAL_VALUE) {
|
||||
distance |= DISTANCE_IS_FINAL;
|
||||
}
|
||||
}
|
||||
return distance;
|
||||
}
|
||||
|
||||
private static final int trieNext(BytesTrie iter, String s, boolean wantValue, boolean wantNext) {
|
||||
private static final int getRegionPartitionsDistance(BytesTrie iter, long startState,
|
||||
String desiredPartitions, String supportedPartitions, int threshold) {
|
||||
int desLength = desiredPartitions.length();
|
||||
int suppLength = supportedPartitions.length();
|
||||
if (desLength == 1 && suppLength == 1) {
|
||||
BytesTrie.Result result = iter.next(desiredPartitions.charAt(0) | 0x80);
|
||||
if (result.hasNext()) {
|
||||
result = iter.next(supportedPartitions.charAt(0) | 0x80);
|
||||
if (result.hasValue()) {
|
||||
return iter.getValue();
|
||||
}
|
||||
}
|
||||
return getFallbackRegionDistance(iter, startState);
|
||||
}
|
||||
|
||||
int regionDistance = 0;
|
||||
// Fall back to * only once, not for each pair of partition strings.
|
||||
boolean star = false;
|
||||
for (int di = 0;;) {
|
||||
// Look up each desired-partition string only once,
|
||||
// not for each (desired, supported) pair.
|
||||
BytesTrie.Result result = iter.next(desiredPartitions.charAt(di++) | 0x80);
|
||||
if (result.hasNext()) {
|
||||
long desState = suppLength > 1 ? iter.getState64() : 0;
|
||||
for (int si = 0;;) {
|
||||
result = iter.next(supportedPartitions.charAt(si++) | 0x80);
|
||||
int d;
|
||||
if (result.hasValue()) {
|
||||
d = iter.getValue();
|
||||
} else if (star) {
|
||||
d = 0;
|
||||
} else {
|
||||
d = getFallbackRegionDistance(iter, startState);
|
||||
star = true;
|
||||
}
|
||||
if (d >= threshold) {
|
||||
return d;
|
||||
} else if (regionDistance < d) {
|
||||
regionDistance = d;
|
||||
}
|
||||
if (si < suppLength) {
|
||||
iter.resetToState64(desState);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (!star) {
|
||||
int d = getFallbackRegionDistance(iter, startState);
|
||||
if (d >= threshold) {
|
||||
return d;
|
||||
} else if (regionDistance < d) {
|
||||
regionDistance = d;
|
||||
}
|
||||
star = true;
|
||||
}
|
||||
if (di < desLength) {
|
||||
iter.resetToState64(startState);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return regionDistance;
|
||||
}
|
||||
|
||||
private static final int getFallbackRegionDistance(BytesTrie iter, long startState) {
|
||||
BytesTrie.Result result = iter.resetToState64(startState).next('*'); // <*, *>
|
||||
assert result.hasValue();
|
||||
int distance = iter.getValue();
|
||||
assert distance >= 0;
|
||||
return distance;
|
||||
}
|
||||
|
||||
private static final int trieNext(BytesTrie iter, String s, boolean wantValue) {
|
||||
if (s.isEmpty()) {
|
||||
return -1; // no empty subtags in the distance data
|
||||
}
|
||||
BytesTrie.Result result;
|
||||
int end = s.length() - 1;
|
||||
for (int i = 0;; ++i) {
|
||||
for (int i = 0, end = s.length() - 1;; ++i) {
|
||||
int c = s.charAt(i);
|
||||
assert c <= 0x7f;
|
||||
if (i < end) {
|
||||
result = iter.next(c);
|
||||
if (!result.hasNext()) {
|
||||
if (!iter.next(c).hasNext()) {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
// last character of this subtag
|
||||
result = iter.next(c | 0x80);
|
||||
break;
|
||||
BytesTrie.Result result = iter.next(c | 0x80);
|
||||
if (wantValue) {
|
||||
if (result.hasValue()) {
|
||||
int value = iter.getValue();
|
||||
if (result == BytesTrie.Result.FINAL_VALUE) {
|
||||
value |= DISTANCE_IS_FINAL;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
} else {
|
||||
if (result.hasNext()) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (wantValue) {
|
||||
if (wantNext) {
|
||||
if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
|
||||
return iter.getValue();
|
||||
}
|
||||
} else {
|
||||
if (result.hasValue()) {
|
||||
return iter.getValue();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (wantNext) {
|
||||
if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
if (result.hasValue()) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return testOnlyGetDistanceTable(true).toString();
|
||||
return testOnlyGetDistanceTable().toString();
|
||||
}
|
||||
|
||||
private String[] partitionsForRegion(LSR lsr) {
|
||||
// ill-formed region -> one empty string
|
||||
int pIndex = lsr.regionIndex >= 0 ? regionToPartitionsIndex[lsr.regionIndex] : 0;
|
||||
private String partitionsForRegion(LSR lsr) {
|
||||
// ill-formed region -> one non-matching string
|
||||
int pIndex = regionToPartitionsIndex[lsr.regionIndex];
|
||||
return partitionArrays[pIndex];
|
||||
}
|
||||
|
||||
boolean isParadigmLSR(LSR lsr) {
|
||||
public boolean isParadigmLSR(LSR lsr) {
|
||||
return paradigmLSRs.contains(lsr);
|
||||
}
|
||||
|
||||
|
@ -296,48 +434,50 @@ public class LocaleDistance {
|
|||
return defaultRegionDistance;
|
||||
}
|
||||
|
||||
public int getDefaultDemotionPerDesiredLocale() {
|
||||
return defaultDemotionPerDesiredLocale;
|
||||
}
|
||||
|
||||
// TODO: When we build data offline,
|
||||
// write test code to compare the loaded table with the builder output.
|
||||
// Fail if different, with instructions for how to update the data file.
|
||||
// VisibleForTesting
|
||||
public Map<String, Integer> testOnlyGetDistanceTable(boolean skipIntermediateMatchPoints) {
|
||||
Map<String, Integer> map = new LinkedHashMap<>();
|
||||
public Map<String, Integer> testOnlyGetDistanceTable() {
|
||||
Map<String, Integer> map = new TreeMap<>();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (BytesTrie.Entry entry : trie) {
|
||||
sb.setLength(0);
|
||||
int numSubtags = 0;
|
||||
int length = entry.bytesLength();
|
||||
for (int i = 0; i < length; ++i) {
|
||||
byte b = entry.byteAt(i);
|
||||
if (b == '*') {
|
||||
// One * represents a (desired, supported) = (ANY, ANY) pair.
|
||||
sb.append("*-*-");
|
||||
numSubtags += 2;
|
||||
} else {
|
||||
if (b >= 0) {
|
||||
sb.append((char) b);
|
||||
} else { // end of subtag
|
||||
sb.append((char) (b & 0x7f)).append('-');
|
||||
++numSubtags;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert sb.length() > 0 && sb.charAt(sb.length() - 1) == '-';
|
||||
if (!skipIntermediateMatchPoints || (numSubtags & 1) == 0) {
|
||||
sb.setLength(sb.length() - 1);
|
||||
String s = sb.toString();
|
||||
if (!skipIntermediateMatchPoints && s.endsWith("*-*")) {
|
||||
// Re-insert single-ANY match points to show consistent structure
|
||||
// for the test code.
|
||||
map.put(s.substring(0, s.length() - 2), 0);
|
||||
}
|
||||
map.put(s, entry.value);
|
||||
}
|
||||
sb.setLength(sb.length() - 1);
|
||||
map.put(sb.toString(), entry.value);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
public void testOnlyPrintDistanceTable() {
|
||||
for (Map.Entry<String, Integer> mapping : testOnlyGetDistanceTable(true).entrySet()) {
|
||||
System.out.println(mapping);
|
||||
for (Map.Entry<String, Integer> mapping : testOnlyGetDistanceTable().entrySet()) {
|
||||
String suffix = "";
|
||||
int value = mapping.getValue();
|
||||
if ((value & DISTANCE_SKIP_SCRIPT) != 0) {
|
||||
value &= ~DISTANCE_SKIP_SCRIPT;
|
||||
suffix = " skip script";
|
||||
}
|
||||
System.out.println(mapping.getKey() + '=' + value + suffix);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,11 +2,18 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.locale;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.MissingResourceException;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import com.ibm.icu.impl.ICUData;
|
||||
import com.ibm.icu.impl.ICUResourceBundle;
|
||||
import com.ibm.icu.impl.UResource;
|
||||
import com.ibm.icu.util.BytesTrie;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
|
@ -15,27 +22,93 @@ public final class XLikelySubtags {
|
|||
private static final String PSEUDO_BIDI_PREFIX = "+"; // -XB, -PSBIDI
|
||||
private static final String PSEUDO_CRACKED_PREFIX = ","; // -XC, -PSCRACK
|
||||
|
||||
private static final boolean DEBUG_OUTPUT = false;
|
||||
public static final int SKIP_SCRIPT = 1;
|
||||
|
||||
// TODO: Load prebuilt data from a resource bundle
|
||||
// to avoid the dependency on the builder code.
|
||||
static final XLikelySubtags INSTANCE = new XLikelySubtags(LikelySubtagsBuilder.build());
|
||||
private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
|
||||
|
||||
static final class Data {
|
||||
private final Map<String, String> languageAliases;
|
||||
private final Map<String, String> regionAliases;
|
||||
private final BytesTrie trie;
|
||||
private final LSR[] lsrs;
|
||||
// VisibleForTesting
|
||||
public static final class Data {
|
||||
public final Map<String, String> languageAliases;
|
||||
public final Map<String, String> regionAliases;
|
||||
public final byte[] trie;
|
||||
public final LSR[] lsrs;
|
||||
|
||||
Data(Map<String, String> languageAliases, Map<String, String> regionAliases,
|
||||
BytesTrie trie, LSR[] lsrs) {
|
||||
public Data(Map<String, String> languageAliases, Map<String, String> regionAliases,
|
||||
byte[] trie, LSR[] lsrs) {
|
||||
this.languageAliases = languageAliases;
|
||||
this.regionAliases = regionAliases;
|
||||
this.trie = trie;
|
||||
this.lsrs = lsrs;
|
||||
}
|
||||
|
||||
private static UResource.Value getValue(UResource.Table table,
|
||||
String key, UResource.Value value) {
|
||||
if (!table.findValue(key, value)) {
|
||||
throw new MissingResourceException(
|
||||
"langInfo.res missing data", "", "likely/" + key);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
public static Data load() throws MissingResourceException {
|
||||
ICUResourceBundle langInfo = ICUResourceBundle.getBundleInstance(
|
||||
ICUData.ICU_BASE_NAME, "langInfo",
|
||||
ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
|
||||
UResource.Value value = langInfo.getValueWithFallback("likely");
|
||||
UResource.Table likelyTable = value.getTable();
|
||||
|
||||
Map<String, String> languageAliases;
|
||||
if (likelyTable.findValue("languageAliases", value)) {
|
||||
String[] pairs = value.getStringArray();
|
||||
languageAliases = new HashMap<>(pairs.length / 2);
|
||||
for (int i = 0; i < pairs.length; i += 2) {
|
||||
languageAliases.put(pairs[i], pairs[i + 1]);
|
||||
}
|
||||
} else {
|
||||
languageAliases = Collections.emptyMap();
|
||||
}
|
||||
|
||||
Map<String, String> regionAliases;
|
||||
if (likelyTable.findValue("regionAliases", value)) {
|
||||
String[] pairs = value.getStringArray();
|
||||
regionAliases = new HashMap<>(pairs.length / 2);
|
||||
for (int i = 0; i < pairs.length; i += 2) {
|
||||
regionAliases.put(pairs[i], pairs[i + 1]);
|
||||
}
|
||||
} else {
|
||||
regionAliases = Collections.emptyMap();
|
||||
}
|
||||
|
||||
ByteBuffer buffer = getValue(likelyTable, "trie", value).getBinary();
|
||||
byte[] trie = new byte[buffer.remaining()];
|
||||
buffer.get(trie);
|
||||
|
||||
String[] lsrSubtags = getValue(likelyTable, "lsrs", value).getStringArray();
|
||||
LSR[] lsrs = new LSR[lsrSubtags.length / 3];
|
||||
for (int i = 0, j = 0; i < lsrSubtags.length; i += 3, ++j) {
|
||||
lsrs[j] = new LSR(lsrSubtags[i], lsrSubtags[i + 1], lsrSubtags[i + 2]);
|
||||
}
|
||||
|
||||
return new Data(languageAliases, regionAliases, trie, lsrs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (this == other) { return true; }
|
||||
if (!getClass().equals(other.getClass())) { return false; }
|
||||
Data od = (Data)other;
|
||||
return
|
||||
languageAliases.equals(od.languageAliases) &&
|
||||
regionAliases.equals(od.regionAliases) &&
|
||||
Arrays.equals(trie, od.trie) &&
|
||||
Arrays.equals(lsrs, od.lsrs);
|
||||
}
|
||||
}
|
||||
|
||||
// VisibleForTesting
|
||||
public static final XLikelySubtags INSTANCE = new XLikelySubtags(Data.load());
|
||||
|
||||
private final Map<String, String> languageAliases;
|
||||
private final Map<String, String> regionAliases;
|
||||
|
||||
|
@ -46,30 +119,35 @@ public final class XLikelySubtags {
|
|||
private final long trieUndState;
|
||||
private final long trieUndZzzzState;
|
||||
private final int defaultLsrIndex;
|
||||
private final long[] trieFirstLetterStates = new long[26];
|
||||
private final LSR[] lsrs;
|
||||
|
||||
private XLikelySubtags(XLikelySubtags.Data data) {
|
||||
languageAliases = data.languageAliases;
|
||||
regionAliases = data.regionAliases;
|
||||
trie = data.trie;
|
||||
trie = new BytesTrie(data.trie, 0);
|
||||
lsrs = data.lsrs;
|
||||
|
||||
// Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**").
|
||||
BytesTrie.Result result = trie.next('*');
|
||||
assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
|
||||
int value = trie.getValue();
|
||||
assert value == 0;
|
||||
assert result.hasNext();
|
||||
trieUndState = trie.getState64();
|
||||
result = trie.next('*');
|
||||
assert result == BytesTrie.Result.INTERMEDIATE_VALUE;
|
||||
value = trie.getValue();
|
||||
assert value == 0;
|
||||
assert result.hasNext();
|
||||
trieUndZzzzState = trie.getState64();
|
||||
result = trie.next('*');
|
||||
assert result.hasValue();
|
||||
defaultLsrIndex = trie.getValue();
|
||||
trie.reset();
|
||||
|
||||
for (char c = 'a'; c <= 'z'; ++c) {
|
||||
result = trie.next(c);
|
||||
if (result == BytesTrie.Result.NO_VALUE) {
|
||||
trieFirstLetterStates[c - 'a'] = trie.getState64();
|
||||
}
|
||||
trie.reset();
|
||||
}
|
||||
|
||||
if (DEBUG_OUTPUT) {
|
||||
System.out.println("*** likely subtags");
|
||||
for (Map.Entry<String, LSR> mapping : getTable().entrySet()) {
|
||||
|
@ -78,24 +156,53 @@ public final class XLikelySubtags {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implementation of LocaleMatcher.canonicalize(ULocale).
|
||||
*/
|
||||
public ULocale canonicalize(ULocale locale) {
|
||||
String lang = locale.getLanguage();
|
||||
String lang2 = languageAliases.get(lang);
|
||||
String region = locale.getCountry();
|
||||
String region2 = regionAliases.get(region);
|
||||
if (lang2 != null || region2 != null) {
|
||||
return new ULocale(
|
||||
lang2 == null ? lang : lang2,
|
||||
locale.getScript(),
|
||||
region2 == null ? region : region2);
|
||||
}
|
||||
return locale;
|
||||
}
|
||||
|
||||
private static String getCanonical(Map<String, String> aliases, String alias) {
|
||||
String canonical = aliases.get(alias);
|
||||
return canonical == null ? alias : canonical;
|
||||
}
|
||||
|
||||
LSR makeMaximizedLsrFrom(ULocale locale) {
|
||||
// VisibleForTesting
|
||||
public LSR makeMaximizedLsrFrom(ULocale locale) {
|
||||
String name = locale.getName();
|
||||
if (name.startsWith("@x=")) {
|
||||
// Private use language tag x-subtag-subtag...
|
||||
return new LSR(name, "", "");
|
||||
}
|
||||
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
|
||||
locale.getVariant());
|
||||
}
|
||||
|
||||
public LSR makeMaximizedLsrFrom(Locale locale) {
|
||||
String tag = locale.toLanguageTag();
|
||||
if (tag.startsWith("x-")) {
|
||||
// Private use language tag x-subtag-subtag...
|
||||
return new LSR(tag, "", "");
|
||||
}
|
||||
return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
|
||||
locale.getVariant());
|
||||
}
|
||||
|
||||
private LSR makeMaximizedLsr(String language, String script, String region, String variant) {
|
||||
// Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
|
||||
// They should match only themselves,
|
||||
// not other locales with what looks like the same language and script subtags.
|
||||
String language = locale.getLanguage();
|
||||
String script = locale.getScript();
|
||||
String region = locale.getCountry();
|
||||
if (region.length() == 2 && region.charAt(0) == 'X') {
|
||||
switch (region.charAt(1)) {
|
||||
case 'A':
|
||||
|
@ -112,7 +219,6 @@ public final class XLikelySubtags {
|
|||
}
|
||||
}
|
||||
|
||||
String variant = locale.getVariant();
|
||||
if (variant.startsWith("PS")) {
|
||||
switch (variant) {
|
||||
case "PSACCENT":
|
||||
|
@ -130,7 +236,7 @@ public final class XLikelySubtags {
|
|||
}
|
||||
|
||||
language = getCanonical(languageAliases, language);
|
||||
// script is ok
|
||||
// (We have no script mappings.)
|
||||
region = getCanonical(regionAliases, region);
|
||||
return INSTANCE.maximize(language, script, region);
|
||||
}
|
||||
|
@ -139,14 +245,31 @@ public final class XLikelySubtags {
|
|||
* Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
|
||||
*/
|
||||
private LSR maximize(String language, String script, String region) {
|
||||
int retainOldMask = 0;
|
||||
BytesTrie iter = new BytesTrie(trie);
|
||||
// language lookup
|
||||
if (language.equals("und")) {
|
||||
language = "";
|
||||
}
|
||||
if (script.equals("Zzzz")) {
|
||||
script = "";
|
||||
}
|
||||
if (region.equals("ZZ")) {
|
||||
region = "";
|
||||
}
|
||||
if (!script.isEmpty() && !region.isEmpty() && !language.isEmpty()) {
|
||||
return new LSR(language, script, region); // already maximized
|
||||
}
|
||||
|
||||
int retainOldMask = 0;
|
||||
BytesTrie iter = new BytesTrie(trie);
|
||||
long state;
|
||||
int value = trieNext(iter, language, false);
|
||||
int value;
|
||||
// Small optimization: Array lookup for first language letter.
|
||||
int c0;
|
||||
if (language.length() >= 2 && 0 <= (c0 = language.charAt(0) - 'a') && c0 <= 25 &&
|
||||
(state = trieFirstLetterStates[c0]) != 0) {
|
||||
value = trieNext(iter.resetToState64(state), language, 1);
|
||||
} else {
|
||||
value = trieNext(iter, language, 0);
|
||||
}
|
||||
if (value >= 0) {
|
||||
if (!language.isEmpty()) {
|
||||
retainOldMask |= 4;
|
||||
|
@ -157,45 +280,54 @@ public final class XLikelySubtags {
|
|||
iter.resetToState64(trieUndState); // "und" ("*")
|
||||
state = 0;
|
||||
}
|
||||
// script lookup
|
||||
if (script.equals("Zzzz")) {
|
||||
script = "";
|
||||
}
|
||||
value = trieNext(iter, script, false);
|
||||
if (value >= 0) {
|
||||
|
||||
if (value > 0) {
|
||||
// Intermediate or final value from just language.
|
||||
if (value == SKIP_SCRIPT) {
|
||||
value = 0;
|
||||
}
|
||||
if (!script.isEmpty()) {
|
||||
retainOldMask |= 2;
|
||||
}
|
||||
state = iter.getState64();
|
||||
} else {
|
||||
retainOldMask |= 2;
|
||||
if (state == 0) {
|
||||
iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
|
||||
} else {
|
||||
iter.resetToState64(state);
|
||||
value = trieNext(iter, "", false);
|
||||
assert value == 0;
|
||||
value = trieNext(iter, script, 0);
|
||||
if (value >= 0) {
|
||||
if (!script.isEmpty()) {
|
||||
retainOldMask |= 2;
|
||||
}
|
||||
state = iter.getState64();
|
||||
} else {
|
||||
retainOldMask |= 2;
|
||||
if (state == 0) {
|
||||
iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**")
|
||||
} else {
|
||||
iter.resetToState64(state);
|
||||
value = trieNext(iter, "", 0);
|
||||
assert value >= 0;
|
||||
state = iter.getState64();
|
||||
}
|
||||
}
|
||||
}
|
||||
// region lookup
|
||||
if (region.equals("ZZ")) {
|
||||
region = "";
|
||||
}
|
||||
value = trieNext(iter, region, true);
|
||||
if (value >= 0) {
|
||||
|
||||
if (value > 0) {
|
||||
// Final value from just language or language+script.
|
||||
if (!region.isEmpty()) {
|
||||
retainOldMask |= 1;
|
||||
}
|
||||
} else {
|
||||
retainOldMask |= 1;
|
||||
if (state == 0) {
|
||||
value = defaultLsrIndex;
|
||||
value = trieNext(iter, region, 0);
|
||||
if (value >= 0) {
|
||||
if (!region.isEmpty()) {
|
||||
retainOldMask |= 1;
|
||||
}
|
||||
} else {
|
||||
iter.resetToState64(state);
|
||||
value = trieNext(iter, "", true);
|
||||
if (value < 0) { // TODO: should never happen?! just assert value >= 0?
|
||||
return null;
|
||||
retainOldMask |= 1;
|
||||
if (state == 0) {
|
||||
value = defaultLsrIndex;
|
||||
} else {
|
||||
iter.resetToState64(state);
|
||||
value = trieNext(iter, "", 0);
|
||||
assert value > 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -220,34 +352,34 @@ public final class XLikelySubtags {
|
|||
return new LSR(language, script, region);
|
||||
}
|
||||
|
||||
private static final int trieNext(BytesTrie iter, String s, boolean finalSubtag) {
|
||||
private static final int trieNext(BytesTrie iter, String s, int i) {
|
||||
BytesTrie.Result result;
|
||||
if (s.isEmpty()) {
|
||||
result = iter.next('*');
|
||||
} else {
|
||||
int end = s.length() - 1;
|
||||
for (int i = 0;; ++i) {
|
||||
result = iter.next(s.charAt(i));
|
||||
for (;; ++i) {
|
||||
int c = s.charAt(i);
|
||||
if (i < end) {
|
||||
if (!result.hasNext()) {
|
||||
if (!iter.next(c).hasNext()) {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
// last character of this subtag
|
||||
result = iter.next(c | 0x80);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!finalSubtag) {
|
||||
if (result == BytesTrie.Result.INTERMEDIATE_VALUE) {
|
||||
return 0; // value should be 0, don't care
|
||||
}
|
||||
} else {
|
||||
if (result.hasValue()) {
|
||||
return iter.getValue();
|
||||
}
|
||||
switch (result) {
|
||||
case NO_MATCH: return -1;
|
||||
case NO_VALUE: return 0;
|
||||
case INTERMEDIATE_VALUE:
|
||||
assert iter.getValue() == SKIP_SCRIPT;
|
||||
return SKIP_SCRIPT;
|
||||
case FINAL_VALUE: return iter.getValue();
|
||||
default: return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn,
|
||||
|
@ -263,11 +395,16 @@ public final class XLikelySubtags {
|
|||
|
||||
// value00 = lookup(result.language, "", "")
|
||||
BytesTrie iter = new BytesTrie(trie);
|
||||
int value = trieNext(iter, result.language, false);
|
||||
int value = trieNext(iter, result.language, 0);
|
||||
assert value >= 0;
|
||||
value = trieNext(iter, "", false);
|
||||
assert value >= 0;
|
||||
value = trieNext(iter, "", true);
|
||||
if (value == 0) {
|
||||
value = trieNext(iter, "", 0);
|
||||
assert value >= 0;
|
||||
if (value == 0) {
|
||||
value = trieNext(iter, "", 0);
|
||||
}
|
||||
}
|
||||
assert value > 0;
|
||||
LSR value00 = lsrs[value];
|
||||
boolean favorRegionOk = false;
|
||||
if (result.script.equals(value00.script)) { //script is default
|
||||
|
@ -292,26 +429,24 @@ public final class XLikelySubtags {
|
|||
}
|
||||
|
||||
private Map<String, LSR> getTable() {
|
||||
Map<String, LSR> map = new LinkedHashMap<>();
|
||||
Set<String> prefixes = new HashSet<>();
|
||||
Map<String, LSR> map = new TreeMap<>();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (BytesTrie.Entry entry : trie) {
|
||||
sb.setLength(0);
|
||||
int length = entry.bytesLength();
|
||||
for (int i = 0; i < length;) {
|
||||
byte b = entry.byteAt(i++);
|
||||
sb.append((char) b);
|
||||
if (i < length && prefixes.contains(sb.toString())) {
|
||||
sb.append('-');
|
||||
if (b == '*') {
|
||||
sb.append("*-");
|
||||
} else if (b >= 0) {
|
||||
sb.append((char) b);
|
||||
} else { // end of subtag
|
||||
sb.append((char) (b & 0x7f)).append('-');
|
||||
}
|
||||
}
|
||||
String s = sb.toString();
|
||||
if (entry.value == 0) {
|
||||
// intermediate match point
|
||||
prefixes.add(s);
|
||||
} else {
|
||||
map.put(s, lsrs[entry.value]);
|
||||
}
|
||||
assert sb.length() > 0 && sb.charAt(sb.length() - 1) == '-';
|
||||
sb.setLength(sb.length() - 1);
|
||||
map.put(sb.toString(), lsrs[entry.value]);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
|
|
@ -1,681 +0,0 @@
|
|||
// © 2017 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.locale;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
import com.ibm.icu.impl.locale.LocaleDistance.DistanceOption;
|
||||
import com.ibm.icu.util.LocalePriorityList;
|
||||
import com.ibm.icu.util.Output;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
|
||||
/**
|
||||
* Immutable class that picks best match between user's desired locales and application's supported locales.
|
||||
* @author markdavis
|
||||
*/
|
||||
public final class XLocaleMatcher {
|
||||
private static final LSR UND_LSR = new LSR("und","","");
|
||||
private static final ULocale UND_LOCALE = new ULocale("und");
|
||||
private static final Iterator<ULocale> NULL_ITERATOR = null;
|
||||
|
||||
// Activates debugging output to stderr with details of GetBestMatch.
|
||||
private static final boolean TRACE_MATCHER = false;
|
||||
|
||||
// List of indexes, optimized for one or two.
|
||||
private static final class Indexes {
|
||||
// Some indexes without further object creation and auto-boxing.
|
||||
int first, second = -1;
|
||||
// We could turn the List into an int array + length and manage its growth.
|
||||
List<Integer> remaining;
|
||||
|
||||
Indexes(int firstIndex) {
|
||||
first = firstIndex;
|
||||
}
|
||||
void add(int i) {
|
||||
if (second < 0) {
|
||||
second = i;
|
||||
} else {
|
||||
if (remaining == null) {
|
||||
remaining = new ArrayList<>();
|
||||
}
|
||||
remaining.add(i);
|
||||
}
|
||||
}
|
||||
int getFirst() { return first; }
|
||||
int get(int i) { // returns -1 when i >= length
|
||||
if (i == 0) {
|
||||
return first;
|
||||
} else if (i == 1) {
|
||||
return second;
|
||||
} else if (remaining != null && (i -= 2) < remaining.size()) {
|
||||
return remaining.get(i);
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Make public, and add public methods that return it.
|
||||
private static final class Result {
|
||||
private Result(ULocale desired, ULocale supported,
|
||||
/* Locale jdesired, */ Locale jsupported,
|
||||
int desIndex, int suppIndex) {
|
||||
desiredLocale = desired;
|
||||
supportedLocale = supported;
|
||||
// desiredJavaLocale = jdesired;
|
||||
supportedJavaLocale = jsupported;
|
||||
desiredIndex = desIndex;
|
||||
supportedIndex = suppIndex;
|
||||
}
|
||||
|
||||
ULocale desiredLocale;
|
||||
ULocale supportedLocale;
|
||||
// Locale desiredJavaLocale;
|
||||
Locale supportedJavaLocale;
|
||||
int desiredIndex;
|
||||
@SuppressWarnings("unused") // unused until public, for other wrappers
|
||||
int supportedIndex;
|
||||
}
|
||||
|
||||
// normally the default values, but can be set via constructor
|
||||
|
||||
private final int thresholdDistance;
|
||||
private final int demotionPerAdditionalDesiredLocale;
|
||||
private final DistanceOption distanceOption;
|
||||
|
||||
// built based on application's supported languages in constructor
|
||||
|
||||
private final ULocale[] supportedLocales;
|
||||
private final Locale[] supportedJavaLocales;
|
||||
private final Map<ULocale, Integer> supportedToIndex;
|
||||
private final Map<LSR, Indexes> supportedLsrToIndexes;
|
||||
// Array versions of the supportedLsrToIndexes keys and values.
|
||||
// The distance lookup loops over the supportedLsrs and returns the index of the best match.
|
||||
private final LSR[] supportedLsrs;
|
||||
private final Indexes[] supportedIndexes;
|
||||
private final ULocale defaultLocale;
|
||||
private final Locale defaultJavaLocale;
|
||||
private final int defaultLocaleIndex;
|
||||
|
||||
public static class Builder {
|
||||
/**
|
||||
* Supported locales. A Set, to avoid duplicates.
|
||||
* Maintains iteration order for consistent matching behavior (first best match wins).
|
||||
*/
|
||||
private Set<ULocale> supportedLocales;
|
||||
private int thresholdDistance = -1;
|
||||
private int demotionPerAdditionalDesiredLocale = -1;;
|
||||
private ULocale defaultLocale;
|
||||
private DistanceOption distanceOption;
|
||||
/**
|
||||
* @param locales the languagePriorityList to set
|
||||
* @return this Builder object
|
||||
*/
|
||||
public Builder setSupportedLocales(String locales) {
|
||||
return setSupportedLocales(LocalePriorityList.add(locales).build());
|
||||
}
|
||||
public Builder setSupportedLocales(Iterable<ULocale> locales) {
|
||||
supportedLocales = new LinkedHashSet<>(); // maintain order
|
||||
for (ULocale locale : locales) {
|
||||
supportedLocales.add(locale);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public Builder setSupportedLocales(Collection<ULocale> locales) {
|
||||
supportedLocales = new LinkedHashSet<>(locales); // maintain order
|
||||
return this;
|
||||
}
|
||||
public Builder setSupportedJavaLocales(Collection<Locale> locales) {
|
||||
supportedLocales = new LinkedHashSet<>(locales.size()); // maintain order
|
||||
for (Locale locale : locales) {
|
||||
supportedLocales.add(ULocale.forLocale(locale));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public Builder addSupportedLocale(ULocale locale) {
|
||||
if (supportedLocales == null) {
|
||||
supportedLocales = new LinkedHashSet<>();
|
||||
}
|
||||
supportedLocales.add(locale);
|
||||
return this;
|
||||
}
|
||||
public Builder addSupportedLocale(Locale locale) {
|
||||
return addSupportedLocale(ULocale.forLocale(locale));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param thresholdDistance the thresholdDistance to set, with -1 = default
|
||||
* @return this Builder object
|
||||
*/
|
||||
public Builder setThresholdDistance(int thresholdDistance) {
|
||||
if (thresholdDistance > 100) {
|
||||
thresholdDistance = 100;
|
||||
}
|
||||
this.thresholdDistance = thresholdDistance;
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* @param demotionPerAdditionalDesiredLocale the demotionPerAdditionalDesiredLocale to set, with -1 = default
|
||||
* @return this Builder object
|
||||
*/
|
||||
public Builder setDemotionPerAdditionalDesiredLocale(int demotionPerAdditionalDesiredLocale) {
|
||||
this.demotionPerAdditionalDesiredLocale = demotionPerAdditionalDesiredLocale;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the default language, with null = default = first supported language
|
||||
* @param defaultLocale the default language
|
||||
* @return this Builder object
|
||||
*/
|
||||
public Builder setDefaultLanguage(ULocale defaultLocale) {
|
||||
this.defaultLocale = defaultLocale;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* If true, then the language differences are smaller than than script differences.
|
||||
* This is used in situations (such as maps) where it is better to fall back to the same script than a similar language.
|
||||
* @param distanceOption the distance option
|
||||
* @return this Builder object
|
||||
*/
|
||||
public Builder setDistanceOption(DistanceOption distanceOption) {
|
||||
this.distanceOption = distanceOption;
|
||||
return this;
|
||||
}
|
||||
|
||||
public XLocaleMatcher build() {
|
||||
return new XLocaleMatcher(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder s = new StringBuilder().append("{XLocaleMatcher.Builder");
|
||||
if (!supportedLocales.isEmpty()) {
|
||||
s.append(" supported={").append(supportedLocales.toString()).append('}');
|
||||
}
|
||||
if (defaultLocale != null) {
|
||||
s.append(" default=").append(defaultLocale.toString());
|
||||
}
|
||||
if (distanceOption != null) {
|
||||
s.append(" distance=").append(distanceOption.toString());
|
||||
}
|
||||
if (thresholdDistance >= 0) {
|
||||
s.append(String.format(" threshold=%d", thresholdDistance));
|
||||
}
|
||||
if (demotionPerAdditionalDesiredLocale >= 0) {
|
||||
s.append(String.format(" demotion=%d", demotionPerAdditionalDesiredLocale));
|
||||
}
|
||||
return s.append('}').toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a builder used in chaining parameters for building a Locale Matcher.
|
||||
* @return this Builder object
|
||||
*/
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
/** Convenience method */
|
||||
public XLocaleMatcher(String supportedLocales) {
|
||||
this(builder().setSupportedLocales(supportedLocales));
|
||||
}
|
||||
/** Convenience method */
|
||||
public XLocaleMatcher(LocalePriorityList supportedLocales) {
|
||||
this(builder().setSupportedLocales(supportedLocales));
|
||||
}
|
||||
/** Convenience method */
|
||||
public XLocaleMatcher(Set<ULocale> supportedLocales) {
|
||||
this(builder().setSupportedLocales(supportedLocales));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a locale matcher with the given Builder parameters.
|
||||
*/
|
||||
private XLocaleMatcher(Builder builder) {
|
||||
thresholdDistance = builder.thresholdDistance < 0 ?
|
||||
LocaleDistance.INSTANCE.getDefaultScriptDistance() : builder.thresholdDistance;
|
||||
// Store the supported locales in input order,
|
||||
// so that when different types are used (e.g., java.util.Locale)
|
||||
// we can return those by parallel index.
|
||||
int supportedLocalesLength = builder.supportedLocales.size();
|
||||
supportedLocales = new ULocale[supportedLocalesLength];
|
||||
supportedJavaLocales = new Locale[supportedLocalesLength];
|
||||
supportedToIndex = new HashMap<>(supportedLocalesLength);
|
||||
// We need an unordered map from LSR to first supported locale with that LSR,
|
||||
// and an ordered list of (LSR, Indexes).
|
||||
// We use a LinkedHashMap for both,
|
||||
// and insert the supported locales in the following order:
|
||||
// 1. First supported locale.
|
||||
// 2. Priority locales in builder order.
|
||||
// 3. Remaining locales in builder order.
|
||||
supportedLsrToIndexes = new LinkedHashMap<>(supportedLocalesLength);
|
||||
Map<LSR, Indexes> otherLsrToIndexes = null;
|
||||
LSR firstLSR = null;
|
||||
int i = 0;
|
||||
for (ULocale locale : builder.supportedLocales) {
|
||||
supportedLocales[i] = locale;
|
||||
supportedJavaLocales[i] = locale.toLocale();
|
||||
// supportedToIndex.putIfAbsent(locale, i)
|
||||
Integer oldIndex = supportedToIndex.get(locale);
|
||||
if (oldIndex == null) {
|
||||
supportedToIndex.put(locale, i);
|
||||
}
|
||||
LSR lsr = getMaximalLsrOrUnd(locale);
|
||||
if (i == 0) {
|
||||
firstLSR = lsr;
|
||||
supportedLsrToIndexes.put(lsr, new Indexes(0));
|
||||
} else if (lsr.equals(firstLSR) || LocaleDistance.INSTANCE.isParadigmLSR(lsr)) {
|
||||
addIndex(supportedLsrToIndexes, lsr, i);
|
||||
} else {
|
||||
if (otherLsrToIndexes == null) {
|
||||
otherLsrToIndexes = new LinkedHashMap<>(supportedLocalesLength);
|
||||
}
|
||||
addIndex(otherLsrToIndexes, lsr, i);
|
||||
}
|
||||
++i;
|
||||
}
|
||||
if (otherLsrToIndexes != null) {
|
||||
supportedLsrToIndexes.putAll(otherLsrToIndexes);
|
||||
}
|
||||
int numSuppLsrs = supportedLsrToIndexes.size();
|
||||
supportedLsrs = supportedLsrToIndexes.keySet().toArray(new LSR[numSuppLsrs]);
|
||||
supportedIndexes = supportedLsrToIndexes.values().toArray(new Indexes[numSuppLsrs]);
|
||||
ULocale def;
|
||||
Locale jdef = null;
|
||||
int idef = -1;
|
||||
if (builder.defaultLocale != null) {
|
||||
def = builder.defaultLocale;
|
||||
} else if (supportedLocalesLength > 0) {
|
||||
def = supportedLocales[0]; // first language
|
||||
jdef = supportedJavaLocales[0];
|
||||
idef = 0;
|
||||
} else {
|
||||
def = null;
|
||||
}
|
||||
if (jdef == null && def != null) {
|
||||
jdef = def.toLocale();
|
||||
}
|
||||
defaultLocale = def;
|
||||
defaultJavaLocale = jdef;
|
||||
defaultLocaleIndex = idef;
|
||||
demotionPerAdditionalDesiredLocale = builder.demotionPerAdditionalDesiredLocale < 0 ?
|
||||
LocaleDistance.INSTANCE.getDefaultRegionDistance() + 1 :
|
||||
builder.demotionPerAdditionalDesiredLocale;
|
||||
distanceOption = builder.distanceOption;
|
||||
}
|
||||
|
||||
private static final void addIndex(Map<LSR, Indexes> lsrToIndexes, LSR lsr, int i) {
|
||||
Indexes indexes = lsrToIndexes.get(lsr);
|
||||
if (indexes == null) {
|
||||
lsrToIndexes.put(lsr, new Indexes(i));
|
||||
} else {
|
||||
indexes.add(i);
|
||||
}
|
||||
}
|
||||
|
||||
private static final LSR getMaximalLsrOrUnd(ULocale locale) {
|
||||
if (locale.equals(UND_LOCALE)) {
|
||||
return UND_LSR;
|
||||
} else {
|
||||
return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
|
||||
}
|
||||
}
|
||||
|
||||
/** Convenience method */
|
||||
public ULocale getBestMatch(ULocale ulocale) {
|
||||
return getBestMatch(ulocale, NULL_ITERATOR).supportedLocale;
|
||||
}
|
||||
/** Convenience method */
|
||||
public ULocale getBestMatch(String languageList) {
|
||||
return getBestMatch(LocalePriorityList.add(languageList).build(), null);
|
||||
}
|
||||
/** Convenience method */
|
||||
public ULocale getBestMatch(ULocale... locales) {
|
||||
return getBestMatch(Arrays.asList(locales), null);
|
||||
}
|
||||
/** Convenience method */
|
||||
public ULocale getBestMatch(Iterable<ULocale> desiredLocales) {
|
||||
return getBestMatch(desiredLocales, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the best match between the desired languages and supported languages
|
||||
* @param desiredLocales Typically the supplied user's languages, in order of preference, with best first.
|
||||
* @param outputBestDesired The one of the desired languages that matched best (can be null).
|
||||
* Set to null if the best match was not below the threshold distance.
|
||||
* @return the best match.
|
||||
*/
|
||||
public ULocale getBestMatch(Iterable<ULocale> desiredLocales, Output<ULocale> outputBestDesired) {
|
||||
Iterator<ULocale> desiredIter = desiredLocales.iterator();
|
||||
if (!desiredIter.hasNext()) {
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = null;
|
||||
}
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning default %s: no desired languages\n", defaultLocale);
|
||||
}
|
||||
return defaultLocale;
|
||||
}
|
||||
ULocale desiredLocale = desiredIter.next();
|
||||
return getBestMatch(desiredLocale, desiredIter, outputBestDesired);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param desiredLocale First desired locale.
|
||||
* @param remainingIter Remaining desired locales, null or empty if none.
|
||||
* @param outputBestDesired If not null,
|
||||
* will be set to the desired locale that matches the best supported one.
|
||||
* @return the best supported locale.
|
||||
*/
|
||||
private ULocale getBestMatch(ULocale desiredLocale, Iterator<ULocale> remainingIter,
|
||||
Output<ULocale> outputBestDesired) {
|
||||
Result result = getBestMatch(desiredLocale, remainingIter);
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = result.desiredLocale;
|
||||
}
|
||||
return result.supportedLocale;
|
||||
}
|
||||
|
||||
private Result getBestMatch(ULocale desiredLocale, Iterator<ULocale> remainingIter) {
|
||||
int desiredIndex = 0;
|
||||
int bestDesiredIndex = -1;
|
||||
ULocale bestDesiredLocale = null;
|
||||
int bestSupportedLsrIndex = 0;
|
||||
for (int bestDistance = thresholdDistance; bestDistance > 0;
|
||||
bestDistance -= demotionPerAdditionalDesiredLocale) {
|
||||
// Quick check for exact locale match.
|
||||
Integer supportedIndex = supportedToIndex.get(desiredLocale);
|
||||
if (supportedIndex != null) {
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning %s: desired=supported\n", desiredLocale);
|
||||
}
|
||||
int suppIndex = supportedIndex;
|
||||
return new Result(desiredLocale, supportedLocales[suppIndex],
|
||||
supportedJavaLocales[suppIndex], desiredIndex, suppIndex);
|
||||
}
|
||||
// Quick check for exact maximized LSR.
|
||||
LSR desiredLSR = getMaximalLsrOrUnd(desiredLocale);
|
||||
Indexes indexes = supportedLsrToIndexes.get(desiredLSR);
|
||||
if (indexes != null) {
|
||||
// If this is a supported LSR, return the first locale.
|
||||
// We already know the exact locale isn't there.
|
||||
int suppIndex = indexes.getFirst();
|
||||
ULocale result = supportedLocales[suppIndex];
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning %s: desiredLSR=supportedLSR\n", result);
|
||||
}
|
||||
return new Result(desiredLocale, result,
|
||||
supportedJavaLocales[suppIndex], desiredIndex, suppIndex);
|
||||
}
|
||||
int bestIndexAndDistance = LocaleDistance.INSTANCE.getBestIndexAndDistance(
|
||||
desiredLSR, supportedLsrs, bestDistance, distanceOption);
|
||||
if (bestIndexAndDistance >= 0) {
|
||||
bestDistance = bestIndexAndDistance & 0xff;
|
||||
bestDesiredIndex = desiredIndex;
|
||||
bestDesiredLocale = desiredLocale;
|
||||
bestSupportedLsrIndex = bestIndexAndDistance >> 8;
|
||||
if (bestDistance == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (remainingIter == null || !remainingIter.hasNext()) {
|
||||
break;
|
||||
}
|
||||
desiredLocale = remainingIter.next();
|
||||
++desiredIndex;
|
||||
}
|
||||
if (bestDesiredIndex < 0) {
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning default %s: no good match\n", defaultLocale);
|
||||
}
|
||||
return new Result(null, defaultLocale, defaultJavaLocale, -1, defaultLocaleIndex);
|
||||
}
|
||||
// Pick exact match if there is one.
|
||||
// The length of the list is normally 1.
|
||||
Indexes bestSupportedIndexes = supportedIndexes[bestSupportedLsrIndex];
|
||||
int suppIndex;
|
||||
for (int i = 0; (suppIndex = bestSupportedIndexes.get(i)) >= 0; ++i) {
|
||||
ULocale locale = supportedLocales[suppIndex];
|
||||
if (bestDesiredLocale.equals(locale)) {
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning %s: desired=best matching supported language\n",
|
||||
bestDesiredLocale);
|
||||
}
|
||||
return new Result(bestDesiredLocale, locale,
|
||||
supportedJavaLocales[suppIndex], bestDesiredIndex, suppIndex);
|
||||
}
|
||||
}
|
||||
// Otherwise return the first of the supported languages that share the best-matching LSR.
|
||||
suppIndex = bestSupportedIndexes.getFirst();
|
||||
ULocale result = supportedLocales[suppIndex];
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning %s: first best matching supported language\n", result);
|
||||
}
|
||||
return new Result(bestDesiredLocale, result,
|
||||
supportedJavaLocales[suppIndex], bestDesiredIndex, suppIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the best match between the desired languages and supported languages
|
||||
* @param desiredLocale the supplied user's language.
|
||||
* @param outputBestDesired The one of the desired languages that matched best.
|
||||
* Set to null if the best match was not below the threshold distance.
|
||||
* @return the best match.
|
||||
*/
|
||||
public ULocale getBestMatch(ULocale desiredLocale, Output<ULocale> outputBestDesired) {
|
||||
return getBestMatch(desiredLocale, null, outputBestDesired);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts Locales to ULocales on the fly.
|
||||
*/
|
||||
private static final class LocalesWrapper implements Iterator<ULocale> {
|
||||
private Iterator<Locale> locales;
|
||||
// Cache locales to avoid conversion of the result.
|
||||
private Locale first, second;
|
||||
private List<Locale> remaining;
|
||||
|
||||
LocalesWrapper(Iterator<Locale> locales) {
|
||||
this.locales = locales;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return locales.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ULocale next() {
|
||||
Locale locale = locales.next();
|
||||
if (first == null) {
|
||||
first = locale;
|
||||
} else if (second == null) {
|
||||
second = locale;
|
||||
} else {
|
||||
if (remaining == null) {
|
||||
remaining = new ArrayList<>();
|
||||
}
|
||||
remaining.add(locale);
|
||||
}
|
||||
return ULocale.forLocale(locale);
|
||||
}
|
||||
|
||||
Locale getJavaLocale(int i) {
|
||||
if (i == 0) {
|
||||
return first;
|
||||
} else if (i == 1) {
|
||||
return second;
|
||||
} else {
|
||||
// TODO: test code coverage
|
||||
return remaining.get(i - 2);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
public Locale getBestJavaMatch(Iterable<Locale> desiredLocales, Output<Locale> outputBestDesired) {
|
||||
Iterator<Locale> desiredIter = desiredLocales.iterator();
|
||||
if (!desiredIter.hasNext()) {
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = null;
|
||||
}
|
||||
if (TRACE_MATCHER) {
|
||||
System.err.printf("Returning default %s: no desired languages\n", defaultLocale);
|
||||
}
|
||||
return defaultJavaLocale;
|
||||
}
|
||||
LocalesWrapper wrapper = new LocalesWrapper(desiredIter);
|
||||
ULocale desiredLocale = wrapper.next();
|
||||
Result result = getBestMatch(desiredLocale, NULL_ITERATOR);
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = result.desiredIndex >= 0 ?
|
||||
wrapper.getJavaLocale(result.desiredIndex) : null;
|
||||
}
|
||||
return result.supportedJavaLocale;
|
||||
}
|
||||
|
||||
public Locale getBestJavaMatch(Locale desiredLocale, Output<Locale> outputBestDesired) {
|
||||
ULocale desiredULocale = ULocale.forLocale(desiredLocale);
|
||||
Result result = getBestMatch(desiredULocale, NULL_ITERATOR);
|
||||
if (outputBestDesired != null) {
|
||||
outputBestDesired.value = result.desiredIndex >= 0 ? desiredLocale : null;
|
||||
}
|
||||
return result.supportedJavaLocale;
|
||||
}
|
||||
|
||||
/** Combine features of the desired locale into those of the supported, and return result. */
|
||||
public static ULocale combine(ULocale bestSupported, ULocale bestDesired) {
|
||||
// for examples of extensions, variants, see
|
||||
// http://unicode.org/repos/cldr/tags/latest/common/bcp47/
|
||||
// http://unicode.org/repos/cldr/tags/latest/common/validity/variant.xml
|
||||
|
||||
if (!bestSupported.equals(bestDesired) && bestDesired != null) {
|
||||
// add region, variants, extensions
|
||||
ULocale.Builder b = new ULocale.Builder().setLocale(bestSupported);
|
||||
|
||||
// copy the region from the desired, if there is one
|
||||
String region = bestDesired.getCountry();
|
||||
if (!region.isEmpty()) {
|
||||
b.setRegion(region);
|
||||
}
|
||||
|
||||
// copy the variants from desired, if there is one
|
||||
// note that this will override any subvariants. Eg "sco-ulster-fonipa" + "…-fonupa" => "sco-fonupa" (nuking ulster)
|
||||
String variants = bestDesired.getVariant();
|
||||
if (!variants.isEmpty()) {
|
||||
b.setVariant(variants);
|
||||
}
|
||||
|
||||
// copy the extensions from desired, if there are any
|
||||
// note that this will override any subkeys. Eg "th-u-nu-latn-ca-buddhist" + "…-u-nu-native" => "th-u-nu-native" (nuking calendar)
|
||||
for (char extensionKey : bestDesired.getExtensionKeys()) {
|
||||
b.setExtension(extensionKey, bestDesired.getExtension(extensionKey));
|
||||
}
|
||||
bestSupported = b.build();
|
||||
}
|
||||
return bestSupported;
|
||||
}
|
||||
|
||||
/** Returns the distance between the two languages. The values are not necessarily symmetric.
|
||||
* @param desired A locale desired by the user
|
||||
* @param supported A locale supported by a program.
|
||||
* @return A return of 0 is a complete match, and 100 is a failure case (above the thresholdDistance).
|
||||
* A language is first maximized with add likely subtags, then compared.
|
||||
*/
|
||||
public int distance(ULocale desired, ULocale supported) {
|
||||
return LocaleDistance.INSTANCE.getBestIndexAndDistance(
|
||||
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired),
|
||||
new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported) },
|
||||
thresholdDistance, distanceOption) & 0xff;
|
||||
}
|
||||
|
||||
/** Convenience method */
|
||||
public int distance(String desiredLanguage, String supportedLanguage) {
|
||||
return LocaleDistance.INSTANCE.getBestIndexAndDistance(
|
||||
XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(new ULocale(desiredLanguage)),
|
||||
new LSR[] { XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(new ULocale(supportedLanguage)) },
|
||||
thresholdDistance, distanceOption) & 0xff;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder s = new StringBuilder().append("{XLocaleMatcher");
|
||||
if (supportedLocales.length > 0) {
|
||||
s.append(" supported={").append(supportedLocales[0].toString());
|
||||
for (int i = 1; i < supportedLocales.length; ++i) {
|
||||
s.append(", ").append(supportedLocales[1].toString());
|
||||
}
|
||||
s.append('}');
|
||||
}
|
||||
s.append(" default=").append(Objects.toString(defaultLocale));
|
||||
if (distanceOption != null) {
|
||||
s.append(" distance=").append(distanceOption.toString());
|
||||
}
|
||||
if (thresholdDistance >= 0) {
|
||||
s.append(String.format(" threshold=%d", thresholdDistance));
|
||||
}
|
||||
s.append(String.format(" demotion=%d", demotionPerAdditionalDesiredLocale));
|
||||
return s.append('}').toString();
|
||||
}
|
||||
|
||||
/** Return the inverse of the distance: that is, 1-distance(desired, supported) */
|
||||
public double match(ULocale desired, ULocale supported) {
|
||||
return (100-distance(desired, supported))/100.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a fraction between 0 and 1, where 1 means that the languages are a
|
||||
* perfect match, and 0 means that they are completely different. This is (100-distance(desired, supported))/100.0.
|
||||
* <br>Note that
|
||||
* the precise values may change over time; no code should be made dependent
|
||||
* on the values remaining constant.
|
||||
* @param desired Desired locale
|
||||
* @param desiredMax Maximized locale (using likely subtags)
|
||||
* @param supported Supported locale
|
||||
* @param supportedMax Maximized locale (using likely subtags)
|
||||
* @return value between 0 and 1, inclusive.
|
||||
* @deprecated Use the form with 2 parameters instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public double match(ULocale desired, ULocale desiredMax, ULocale supported, ULocale supportedMax) {
|
||||
return match(desired, supported);
|
||||
}
|
||||
|
||||
/**
|
||||
* Canonicalize a locale (language). Note that for now, it is canonicalizing
|
||||
* according to CLDR conventions (he vs iw, etc), since that is what is needed
|
||||
* for likelySubtags.
|
||||
* @param ulocale language/locale code
|
||||
* @return ULocale with remapped subtags.
|
||||
* @stable ICU 4.4
|
||||
*/
|
||||
public ULocale canonicalize(ULocale ulocale) {
|
||||
// TODO
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the thresholdDistance. Any distance above this value is treated as a match failure.
|
||||
*/
|
||||
public int getThresholdDistance() {
|
||||
return thresholdDistance;
|
||||
}
|
||||
}
|
|
@ -2,6 +2,7 @@
|
|||
// License & terms of use: http://www.unicode.org/copyright.html#License
|
||||
package com.ibm.icu.impl.number;
|
||||
|
||||
import com.ibm.icu.impl.FormattedStringBuilder;
|
||||
import com.ibm.icu.text.NumberFormat;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
||||
|
@ -290,7 +291,7 @@ public class AffixUtils {
|
|||
/**
|
||||
* Executes the unescape state machine. Replaces the unquoted characters "-", "+", "%", "‰", and "¤"
|
||||
* with the corresponding symbols provided by the {@link SymbolProvider}, and inserts the result into
|
||||
* the NumberStringBuilder at the requested location.
|
||||
* the FormattedStringBuilder at the requested location.
|
||||
*
|
||||
* <p>
|
||||
* Example input: "'-'¤x"; example output: "-$x"
|
||||
|
@ -298,16 +299,16 @@ public class AffixUtils {
|
|||
* @param affixPattern
|
||||
* The original string to be unescaped.
|
||||
* @param output
|
||||
* The NumberStringBuilder to mutate with the result.
|
||||
* The FormattedStringBuilder to mutate with the result.
|
||||
* @param position
|
||||
* The index into the NumberStringBuilder to insert the the string.
|
||||
* The index into the FormattedStringBuilder to insert the the string.
|
||||
* @param provider
|
||||
* An object to generate locale symbols.
|
||||
* @return The length of the string added to affixPattern.
|
||||
*/
|
||||
public static int unescape(
|
||||
CharSequence affixPattern,
|
||||
NumberStringBuilder output,
|
||||
FormattedStringBuilder output,
|
||||
int position,
|
||||
SymbolProvider provider,
|
||||
NumberFormat.Field field) {
|
||||
|
|
|
@ -4,6 +4,8 @@ package com.ibm.icu.impl.number;
|
|||
|
||||
import java.text.Format.Field;
|
||||
|
||||
import com.ibm.icu.impl.FormattedStringBuilder;
|
||||
|
||||
/**
|
||||
* The canonical implementation of {@link Modifier}, containing a prefix and suffix string.
|
||||
*/
|
||||
|
@ -52,7 +54,7 @@ public class ConstantAffixModifier implements Modifier {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int apply(NumberStringBuilder output, int leftIndex, int rightIndex) {
|
||||
public int apply(FormattedStringBuilder output, int leftIndex, int rightIndex) {
|
||||
// Insert the suffix first since inserting the prefix will change the rightIndex
|
||||
int length = output.insert(rightIndex, suffix, field);
|
||||
length += output.insert(leftIndex, prefix, field);
|
||||
|
|
|
@ -5,14 +5,16 @@ package com.ibm.icu.impl.number;
|
|||
import java.text.Format.Field;
|
||||
import java.util.Arrays;
|
||||
|
||||
import com.ibm.icu.impl.FormattedStringBuilder;
|
||||
|
||||
/**
|
||||
* An implementation of {@link Modifier} that allows for multiple types of fields in the same modifier.
|
||||
* Constructed based on the contents of two {@link NumberStringBuilder} instances (one for the prefix,
|
||||
* Constructed based on the contents of two {@link FormattedStringBuilder} instances (one for the prefix,
|
||||
* one for the suffix).
|
||||
*/
|
||||
public class ConstantMultiFieldModifier implements Modifier {
|
||||
|
||||
// NOTE: In Java, these are stored as array pointers. In C++, the NumberStringBuilder is stored by
|
||||
// NOTE: In Java, these are stored as array pointers. In C++, the FormattedStringBuilder is stored by
|
||||
// value and is treated internally as immutable.
|
||||
protected final char[] prefixChars;
|
||||
protected final char[] suffixChars;
|
||||
|
@ -25,16 +27,16 @@ public class ConstantMultiFieldModifier implements Modifier {
|
|||
private final Parameters parameters;
|
||||
|
||||
public ConstantMultiFieldModifier(
|
||||
NumberStringBuilder prefix,
|
||||
NumberStringBuilder suffix,
|
||||
FormattedStringBuilder prefix,
|
||||
FormattedStringBuilder suffix,
|
||||
boolean overwrite,
|
||||
boolean strong) {
|
||||
this(prefix, suffix, overwrite, strong, null);
|
||||
}
|
||||
|
||||
public ConstantMultiFieldModifier(
|
||||
NumberStringBuilder prefix,
|
||||
NumberStringBuilder suffix,
|
||||
FormattedStringBuilder prefix,
|
||||
FormattedStringBuilder suffix,
|
||||
boolean overwrite,
|
||||
boolean strong,
|
||||
Parameters parameters) {
|
||||
|
@ -48,7 +50,7 @@ public class ConstantMultiFieldModifier implements Modifier {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int apply(NumberStringBuilder output, int leftIndex, int rightIndex) {
|
||||
public int apply(FormattedStringBuilder output, int leftIndex, int rightIndex) {
|
||||
int length = output.insert(leftIndex, prefixChars, prefixFields);
|
||||
if (overwrite) {
|
||||
length += output.splice(leftIndex + length, rightIndex + length, "", 0, 0, null);
|
||||
|
@ -109,7 +111,7 @@ public class ConstantMultiFieldModifier implements Modifier {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
NumberStringBuilder temp = new NumberStringBuilder();
|
||||
FormattedStringBuilder temp = new FormattedStringBuilder();
|
||||
apply(temp, 0, 0);
|
||||
int prefixLength = getPrefixLength();
|
||||
return String.format("<ConstantMultiFieldModifier prefix:'%s' suffix:'%s'>",
|
||||
|
|
|
@ -4,6 +4,7 @@ package com.ibm.icu.impl.number;
|
|||
|
||||
import java.text.Format.Field;
|
||||
|
||||
import com.ibm.icu.impl.FormattedStringBuilder;
|
||||
import com.ibm.icu.text.DecimalFormatSymbols;
|
||||
import com.ibm.icu.text.NumberFormat;
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
|
@ -30,8 +31,8 @@ public class CurrencySpacingEnabledModifier extends ConstantMultiFieldModifier {
|
|||
|
||||
/** Safe code path */
|
||||
public CurrencySpacingEnabledModifier(
|
||||
NumberStringBuilder prefix,
|
||||
NumberStringBuilder suffix,
|
||||
FormattedStringBuilder prefix,
|
||||
FormattedStringBuilder suffix,
|
||||
boolean overwrite,
|
||||
boolean strong,
|
||||
DecimalFormatSymbols symbols) {
|
||||
|
@ -73,7 +74,7 @@ public class CurrencySpacingEnabledModifier extends ConstantMultiFieldModifier {
|
|||
|
||||
/** Safe code path */
|
||||
@Override
|
||||
public int apply(NumberStringBuilder output, int leftIndex, int rightIndex) {
|
||||
public int apply(FormattedStringBuilder output, int leftIndex, int rightIndex) {
|
||||
// Currency spacing logic
|
||||
int length = 0;
|
||||
if (rightIndex - leftIndex > 0
|
||||
|
@ -96,7 +97,7 @@ public class CurrencySpacingEnabledModifier extends ConstantMultiFieldModifier {
|
|||
|
||||
/** Unsafe code path */
|
||||
public static int applyCurrencySpacing(
|
||||
NumberStringBuilder output,
|
||||
FormattedStringBuilder output,
|
||||
int prefixStart,
|
||||
int prefixLen,
|
||||
int suffixStart,
|
||||
|
@ -117,7 +118,7 @@ public class CurrencySpacingEnabledModifier extends ConstantMultiFieldModifier {
|
|||
|
||||
/** Unsafe code path */
|
||||
private static int applyCurrencySpacingAffix(
|
||||
NumberStringBuilder output,
|
||||
FormattedStringBuilder output,
|
||||
int index,
|
||||
byte affix,
|
||||
DecimalFormatSymbols symbols) {
|
||||
|
|
|
@ -4,6 +4,7 @@ package com.ibm.icu.impl.number;
|
|||
|
||||
import java.text.Format.Field;
|
||||
|
||||
import com.ibm.icu.impl.FormattedStringBuilder;
|
||||
import com.ibm.icu.impl.StandardPlural;
|
||||
|
||||
/**
|
||||
|
@ -29,7 +30,7 @@ public interface Modifier {
|
|||
* number is being formatted.
|
||||
* @return The number of characters (UTF-16 code units) that were added to the string builder.
|
||||
*/
|
||||
public int apply(NumberStringBuilder output, int leftIndex, int rightIndex);
|
||||
public int apply(FormattedStringBuilder output, int leftIndex, int rightIndex);
|
||||
|
||||
/**
|
||||
* Gets the length of the prefix. This information can be used in combination with {@link #apply} to
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue