Merge pull request #42 from hugovdm/UnitPreferences

UnitPreferences class in unitsdata.cpp
This commit is contained in:
Shane F. Carr 2020-05-12 13:41:30 -05:00 committed by GitHub
commit 24494d985e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 459 additions and 12 deletions

View file

@ -323,6 +323,11 @@ public:
* @return the array pointer
*/
T *getAlias() const { return ptr; }
/**
* Access without ownership change.
* @return the array pointer
*/
const T *getConstAlias() const { return ptr; }
/**
* Returns the array limit. Simple convenience method.
* @return getAlias()+getCapacity()
@ -775,10 +780,14 @@ public:
return this->fCount;
}
T** getAlias() {
T** getAlias() const {
return this->fPool.getAlias();
}
const T *const *getConstAlias() const {
return this->fPool.getConstAlias();
}
/**
* Array item access (read-only).
* No index bounds check.

View file

@ -6,6 +6,7 @@
#if !UCONFIG_NO_FORMATTING
#include "cstring.h"
#include "number_decimalquantity.h"
#include "resource.h"
#include "unitsdata.h"
#include "uresimp.h"
@ -15,6 +16,8 @@ U_NAMESPACE_BEGIN
namespace {
using number::impl::DecimalQuantity;
void trimSpaces(CharString& factor, UErrorCode& status){
CharString trimmed;
for (int i = 0 ; i < factor.length(); i++) {
@ -41,20 +44,18 @@ class ConversionRateDataSink : public ResourceSink {
explicit ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> *out) : outVector(out) {}
/**
* Adds the conversion rate information found in value to the output vector.
* Method for use by `ures_getAllItemsWithFallback`. Adds the unit
* conversion rates that are found in `value` to the output vector.
*
* Each call to put() collects a ConversionRateInfo instance for the
* specified source unit identifier into the vector passed to the
* constructor, but only if an identical instance isn't already present.
*
* @param source The source unit identifier.
* @param value A resource containing conversion rate info (the base unit
* and factor, and possibly an offset).
* @param source This string must be "convertUnits": the resource that this
* class supports reading.
* @param value The "convertUnits" resource, containing unit conversion rate
* information.
* @param noFallback Ignored.
* @param status The standard ICU error code output parameter.
*/
void put(const char *source, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) {
if (U_FAILURE(status)) return;
if (U_FAILURE(status)) { return; }
if (uprv_strcmp(source, "convertUnits") != 0) {
// This is very strict, however it is the cheapest way to be sure
// that with `value`, we're looking at the convertUnits table.
@ -79,7 +80,7 @@ class ConversionRateDataSink : public ResourceSink {
offset = value.getUnicodeString(status);
}
}
if (U_FAILURE(status)) return;
if (U_FAILURE(status)) { return; }
if (baseUnit.isBogus() || factor.isBogus()) {
// We could not find a usable conversion rate: bad resource.
status = U_MISSING_RESOURCE_ERROR;
@ -106,8 +107,257 @@ class ConversionRateDataSink : public ResourceSink {
MaybeStackVector<ConversionRateInfo> *outVector;
};
UnitPreferenceMetadata::UnitPreferenceMetadata(const char *category, const char *usage,
const char *region, int32_t prefsOffset,
int32_t prefsCount, UErrorCode &status) {
this->category.append(category, status);
this->usage.append(usage, status);
this->region.append(region, status);
this->prefsOffset = prefsOffset;
this->prefsCount = prefsCount;
}
int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other) const {
int32_t cmp = uprv_strcmp(category.data(), other.category.data());
if (cmp == 0) { cmp = uprv_strcmp(usage.data(), other.usage.data()); }
if (cmp == 0) { cmp = uprv_strcmp(region.data(), other.region.data()); }
return cmp;
}
int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other, bool *foundCategory,
bool *foundUsage, bool *foundRegion) const {
int32_t cmp = uprv_strcmp(category.data(), other.category.data());
if (cmp == 0) {
*foundCategory = true;
cmp = uprv_strcmp(usage.data(), other.usage.data());
}
if (cmp == 0) {
*foundUsage = true;
cmp = uprv_strcmp(region.data(), other.region.data());
}
if (cmp == 0) {
*foundRegion = true;
}
return cmp;
}
bool operator<(const UnitPreferenceMetadata &a, const UnitPreferenceMetadata &b) {
return a.compareTo(b) < 0;
}
/**
* A ResourceSink that collects unit preferences information.
*
* This class is for use by ures_getAllItemsWithFallback.
*/
class UnitPreferencesSink : public ResourceSink {
public:
/**
* Constructor.
* @param outPrefs The vector to which UnitPreference instances are to be
* added. This vector must outlive the use of the ResourceSink.
* @param outMetadata The vector to which UnitPreferenceMetadata instances
* are to be added. This vector must outlive the use of the ResourceSink.
*/
explicit UnitPreferencesSink(MaybeStackVector<UnitPreference> *outPrefs,
MaybeStackVector<UnitPreferenceMetadata> *outMetadata)
: preferences(outPrefs), metadata(outMetadata) {}
/**
* Method for use by `ures_getAllItemsWithFallback`. Adds the unit
* preferences info that are found in `value` to the output vector.
*
* @param source This string must be "unitPreferenceData": the resource that
* this class supports reading.
* @param value The "unitPreferenceData" resource, containing unit
* preferences data.
* @param noFallback Ignored.
* @param status The standard ICU error code output parameter. Note: if an
* error is returned, outPrefs and outMetadata may be inconsistent.
*/
void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) {
if (U_FAILURE(status)) { return; }
if (uprv_strcmp(key, "unitPreferenceData") != 0) {
// This is very strict, however it is the cheapest way to be sure
// that with `value`, we're looking at the convertUnits table.
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
// The unitPreferenceData structure (see data/misc/units.txt) contains a
// hierarchy of category/usage/region, within which are a set of
// preferences. Hence three for-loops and another loop for the
// preferences themselves:
ResourceTable unitPreferenceDataTable = value.getTable(status);
const char *category;
for (int32_t i = 0; unitPreferenceDataTable.getKeyAndValue(i, category, value); i++) {
ResourceTable categoryTable = value.getTable(status);
const char *usage;
for (int32_t j = 0; categoryTable.getKeyAndValue(j, usage, value); j++) {
ResourceTable regionTable = value.getTable(status);
const char *region;
for (int32_t k = 0; regionTable.getKeyAndValue(k, region, value); k++) {
// `value` now contains the set of preferences for
// category/usage/region.
ResourceArray unitPrefs = value.getArray(status);
if (U_FAILURE(status)) { return; }
int32_t prefLen = unitPrefs.getSize();
// Update metadata for this set of preferences.
UnitPreferenceMetadata *meta = metadata->emplaceBack(
category, usage, region, preferences->length(), prefLen, status);
if (!meta) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
if (U_FAILURE(status)) { return; }
if (metadata->length() > 1) {
// Verify that unit preferences are sorted and
// without duplicates.
if (!(*(*metadata)[metadata->length() - 2] <
*(*metadata)[metadata->length() - 1])) {
status = U_INVALID_FORMAT_ERROR;
return;
}
}
// Collect the individual preferences.
for (int32_t i = 0; unitPrefs.getValue(i, value); i++) {
UnitPreference *up = preferences->emplaceBack();
if (!up) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
ResourceTable unitPref = value.getTable(status);
if (U_FAILURE(status)) { return; }
for (int32_t i = 0; unitPref.getKeyAndValue(i, key, value); ++i) {
if (uprv_strcmp(key, "unit") == 0) {
int32_t length;
const UChar *u = value.getString(length, status);
up->unit.appendInvariantChars(u, length, status);
} else if (uprv_strcmp(key, "geq") == 0) {
int32_t length;
const UChar *g = value.getString(length, status);
CharString geq;
geq.appendInvariantChars(g, length, status);
DecimalQuantity dq;
dq.setToDecNumber(geq.data(), status);
up->geq = dq.toDouble();
} else if (uprv_strcmp(key, "skeleton") == 0) {
int32_t length;
const UChar *s = value.getString(length, status);
up->skeleton.appendInvariantChars(s, length, status);
}
}
}
}
}
}
}
private:
MaybeStackVector<UnitPreference> *preferences;
MaybeStackVector<UnitPreferenceMetadata> *metadata;
};
int32_t binarySearch(const MaybeStackVector<UnitPreferenceMetadata> *metadata, const char *category,
const char *usage, const char *region, bool *foundCategory, bool *foundUsage,
bool *foundRegion, UErrorCode &status) {
UnitPreferenceMetadata desired(category, usage, region, -1, -1, status);
if (U_FAILURE(status)) { return -1; }
int32_t start = 0;
int32_t end = metadata->length();
*foundCategory = false;
*foundUsage = false;
*foundRegion = false;
while (start < end) {
int32_t mid = (start + end) / 2;
int32_t cmp = (*metadata)[mid]->compareTo(desired, foundCategory, foundUsage, foundRegion);
if (cmp < 0) {
start = mid + 1;
} else if (cmp > 0) {
end = mid;
} else {
return mid;
}
}
return -1;
}
/**
* Finds the UnitPreferenceMetadata instance that matches the given category,
* usage and region: if missing, region falls back to "001", and usage falls
* back to "default".
*
* This is implemented as a binary search, with fallback restarting the search
* from the search range at which the parent in the category/usage/region
* hierarchy was found.
*
* @param metadata The full list of UnitPreferenceMetadata instances.
* @param category The category to search for. If category is not known, it can
* be resolved from the baseunit of the input (for supported unit categories).
* TODO(hugovdm): implement the unit->category lookup (via "unitQuantities" in
* the units resource bundle).
* @param usage The usage for which formatting preferences is needed. If the
* given usage is not known, this function automatically falls back to "default"
* usage.
* @param region The region for which preferences are needed. If there are no
* region-specific preferences, this function automatically falls back to the
* "001" region (global).
* @param status The standard ICU error code output parameter.
* * If an invalid category is given, status will be U_ILLEGAL_ARGUMENT_ERROR.
* * If fallback to "default" or "001" didn't resolve, status will be
* U_MISSING_RESOURCE.
* @return The index into the metadata vector which represents the appropriate
* preferences. If appropriate preferences are not found, -1 is returned.
*/
int32_t getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
const char *category, const char *usage, const char *region,
UErrorCode &status) {
if (U_FAILURE(status)) { return -1; }
bool foundCategory, foundUsage, foundRegion;
int32_t idx = binarySearch(metadata, category, usage, region, &foundCategory, &foundUsage,
&foundRegion, status);
if (U_FAILURE(status)) { return -1; }
if (idx >= 0) { return idx; }
if (!foundCategory) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return idx;
}
U_ASSERT(foundCategory);
if (!foundUsage) {
if (uprv_strcmp(usage, "default") != 0) {
usage = "default";
idx = binarySearch(metadata, category, usage, region, &foundCategory, &foundUsage,
&foundRegion, status);
}
if (!foundUsage) {
status = U_MISSING_RESOURCE_ERROR;
return idx;
}
}
U_ASSERT(foundCategory);
U_ASSERT(foundUsage);
if (!foundRegion) {
if (uprv_strcmp(region, "001") != 0) {
region = "001";
idx = binarySearch(metadata, category, usage, region, &foundCategory, &foundUsage,
&foundRegion, status);
}
if (!foundRegion) {
status = U_MISSING_RESOURCE_ERROR;
return idx;
}
}
U_ASSERT(foundCategory);
U_ASSERT(foundUsage);
U_ASSERT(foundRegion);
U_ASSERT(idx >= 0);
return idx;
}
} // namespace
// TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace?
void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status) {
LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
ConversionRateDataSink sink(&result);
@ -124,6 +374,28 @@ const ConversionRateInfo *ConversionRates::extractConversionInfo(StringPiece sou
return nullptr;
}
U_I18N_API UnitPreferences::UnitPreferences(UErrorCode &status) {
LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
UnitPreferencesSink sink(&unitPrefs_, &metadata_);
ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status);
}
// TODO: make outPreferences const?
//
// TODO: consider replacing `UnitPreference **&outPrefrences` with slice class
// of some kind.
void U_I18N_API UnitPreferences::getPreferencesFor(const char *category, const char *usage,
const char *region,
const UnitPreference *const *&outPreferences,
int32_t &preferenceCount, UErrorCode &status) const {
int32_t idx = getPreferenceMetadataIndex(&metadata_, category, usage, region, status);
if (U_FAILURE(status)) { return; }
U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`.
const UnitPreferenceMetadata *m = metadata_[idx];
outPreferences = unitPrefs_.getConstAlias() + m->prefsOffset;
preferenceCount = m->prefsCount;
}
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -76,6 +76,102 @@ class U_I18N_API ConversionRates {
MaybeStackVector<ConversionRateInfo> conversionInfo_;
};
// Encapsulates unitPreferenceData information from units resources, specifying
// a sequence of output unit preferences.
struct U_I18N_API UnitPreference : public UMemory {
UnitPreference() : geq(1) {}
CharString unit;
double geq;
CharString skeleton;
};
namespace {
/**
* Metadata about the preferences in UnitPreferences::unitPrefs_.
*
* UnitPreferenceMetadata lives in the anonymous namespace, because it should
* only be useful to internal code and unit testing code.
*/
class U_I18N_API UnitPreferenceMetadata : public UMemory {
public:
UnitPreferenceMetadata(){};
UnitPreferenceMetadata(const char *category, const char *usage, const char *region,
int32_t prefsOffset, int32_t prefsCount, UErrorCode &status);
// Unit category (e.g. "length", "mass", "electric-capacitance").
CharString category;
// Usage (e.g. "road", "vehicle-fuel", "blood-glucose"). Every category
// should have an entry for "default" usage. TODO(hugovdm): add a test for
// this.
CharString usage;
// Region code (e.g. "US", "CZ", "001"). Every usage should have an entry
// for the "001" region ("world"). TODO(hugovdm): add a test for this.
CharString region;
// Offset into the UnitPreferences::unitPrefs_ list where the relevant
// preferences are found.
int32_t prefsOffset;
// The number of preferences that form this set.
int32_t prefsCount;
int32_t compareTo(const UnitPreferenceMetadata &other) const;
int32_t compareTo(const UnitPreferenceMetadata &other, bool *foundCategory, bool *foundUsage,
bool *foundRegion) const;
};
} // namespace
/**
* Unit Preferences information for various locales and usages.
*
* TODO(hugovdm): add a function to look up the category based on the input
* unit.
*/
class U_I18N_API UnitPreferences {
public:
/**
* Constructor, loads all the preference data.
*
* @param status Receives status.
*/
UnitPreferences(UErrorCode &status);
/**
* Returns the set of unit preferences in the particular cateogry that best
* matches the specified usage and region.
*
* If region can't be found, falls back to global (001). If usage can't be
* found, falls back to "default". Copies the preferences structures.
* TODO(hugovdm/review): Consider returning pointers (references) instead?
*
* @param category The category within which to look up usage and region.
* (TODO(hugovdm): improve docs on how to find the category, once the lookup
* function is added.)
* @param usage The usage parameter. (TODO(hugovdm): improve this
* documentation. Add reference to some list of usages we support.) If the
* given usage is not found, the method automatically falls back to
* "default".
* @param region The region whose preferences are desired. If there are no
* specific preferences for the requested region, the method automatically
* falls back to region "001" ("world").
* @param outPreferences The vector to which preferences will be added.
* @param status Receives status.
*
* TODO: maybe replace `UnitPreference **&outPrefrences` with a slice class?
*/
void getPreferencesFor(const char *category, const char *usage, const char *region,
const UnitPreference *const *&outPreferences, int32_t &preferenceCount,
UErrorCode &status) const;
protected:
// Metadata about the sets of preferences, this is the index for looking up
// preferences in the unitPrefs_ list.
MaybeStackVector<UnitPreferenceMetadata> metadata_;
// All the preferences as a flat list: which usage and region preferences
// are associated with is stored in `metadata_`.
MaybeStackVector<UnitPreference> unitPrefs_;
};
U_NAMESPACE_END
#endif //__GETUNITSDATA_H__

View file

@ -1075,7 +1075,7 @@ group: units
group: unitsformatter
unitsdata.o unitconverter.o
deps
resourcebundle units_extra double_conversion
resourcebundle units_extra double_conversion number_representation
group: decnumber
decContext.o decNumber.o

View file

@ -13,6 +13,7 @@ class UnitsDataTest : public IntlTest {
void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = NULL);
void testGetAllConversionRates();
void testGetPreferences();
};
extern IntlTest *createUnitsDataTest() { return new UnitsDataTest(); }
@ -21,6 +22,7 @@ void UnitsDataTest::runIndexedTest(int32_t index, UBool exec, const char *&name,
if (exec) { logln("TestSuite UnitsDataTest: "); }
TESTCASE_AUTO_BEGIN;
TESTCASE_AUTO(testGetAllConversionRates);
TESTCASE_AUTO(testGetPreferences);
TESTCASE_AUTO_END;
}
@ -40,4 +42,72 @@ void UnitsDataTest::testGetAllConversionRates() {
}
}
class UnitPreferencesOpenedUp : public UnitPreferences {
public:
UnitPreferencesOpenedUp(UErrorCode &status) : UnitPreferences(status) {};
const MaybeStackVector<UnitPreferenceMetadata> *getInternalMetadata() const { return &metadata_; }
const MaybeStackVector<UnitPreference> *getInternalUnitPrefs() const { return &unitPrefs_; }
};
/**
* This test is dependent upon CLDR Data: when the preferences change, the test
* may fail: see the constants for expected Max/Min unit identifiers, for US and
* World, and for Roads and default lengths.
*/
void UnitsDataTest::testGetPreferences() {
const char* USRoadMax = "mile";
const char* USRoadMin = "foot";
const char* USLenMax = "mile";
const char* USLenMin = "inch";
const char* WorldRoadMax = "kilometer";
const char* WorldRoadMin = "meter";
const char* WorldLenMax = "kilometer";
const char* WorldLenMin = "centimeter";
struct TestCase {
const char *name;
const char *category;
const char *usage;
const char *region;
const char *expectedBiggest;
const char *expectedSmallest;
} testCases[]{
{"US road", "length", "road", "US", USRoadMax, USRoadMin},
{"001 road", "length", "road", "001", WorldRoadMax, WorldRoadMin},
{"US lengths", "length", "default", "US", USLenMax, USLenMin},
{"001 lengths", "length", "default", "001", WorldLenMax, WorldLenMin},
{"XX road falls back to 001", "length", "road", "XX", WorldRoadMax, WorldRoadMin},
{"XX default falls back to 001", "length", "default", "XX", WorldLenMax, WorldLenMin},
{"Unknown usage US", "length", "foobar", "US", USLenMax, USLenMin},
{"Unknown usage 001", "length", "foobar", "XX", WorldLenMax, WorldLenMin},
};
IcuTestErrorCode status(*this, "testGetPreferences");
UnitPreferencesOpenedUp preferences(status);
auto *metadata = preferences.getInternalMetadata();
auto *unitPrefs = preferences.getInternalUnitPrefs();
assertTrue(UnicodeString("Metadata count: ") + metadata->length() + " > 200",
metadata->length() > 200);
assertTrue(UnicodeString("Preferences count: ") + unitPrefs->length() + " > 250",
unitPrefs->length() > 250);
for (const auto &t : testCases) {
logln(t.name);
const UnitPreference *const *prefs;
int32_t prefsCount;
preferences.getPreferencesFor(t.category, t.usage, t.region, prefs, prefsCount, status);
if (status.errIfFailureAndReset("getPreferencesFor(\"%s\", \"%s\", \"%s\", ...", t.category,
t.usage, t.region)) {
continue;
}
if (prefsCount > 0) {
assertEquals(UnicodeString(t.name) + " - max unit", t.expectedBiggest,
prefs[0]->unit.data());
assertEquals(UnicodeString(t.name) + " - min unit", t.expectedSmallest,
prefs[prefsCount - 1]->unit.data());
} else {
errln(UnicodeString(t.name) + ": failed to find preferences");
}
status.errIfFailureAndReset("testCase '%s'", t.name);
}
}
#endif /* #if !UCONFIG_NO_FORMATTING */