ICU-22879 Provide collator based predicates for use with STL algorithms.

This commit is contained in:
Fredrik Roubert 2024-09-11 15:59:23 +02:00 committed by Fredrik Roubert
parent 6020a98fbb
commit b160feaac2
4 changed files with 420 additions and 0 deletions

View file

@ -196,6 +196,12 @@ value, such as `ucol_greater`, `ucol_greaterOrEqual`, `ucol_equal` (in C)
`Collator::greater`, `Collator::greaterOrEqual`, `Collator::equal` (in C++) and
`Collator.equals` (in Java).
As of ICU 76 there are also C++ convenience functions and templates to create
standard library compliant comparison function objects that use a collator to
perform comparisons (instead of using the comparison operators on the strings
being compared), such as `Collator::less()` for a C++ API `Collator` or
`collator::less()` for a C API `UCollator`.
### Examples
**C:**
@ -238,6 +244,48 @@ delete coll;
}
```
**C++:** (as of ICU 76)
```c++
icu::ErrorCode status;
icu::Locale locale = icu::Locale::forLanguageTag("sv", status);
icu::LocalPointer<Collator> collator(icu::Collator::createInstance(locale, status), status);
status.assertSuccess(); // Override ErrorCode::handleFailure() to handle failure.
std::vector<std::string> utf8{
"Arnold", "Øystein", "Ingrid", "Åke", "Olof", "İsmail", "Örjan",
};
std::sort(utf8.begin(), utf8.end(), collator->less());
std::vector<UnicodeString> utf16{
u"Arnold", u"Øystein", u"Ingrid", u"Åke", u"Olof", u"İsmail", u"Örjan",
};
std::sort(utf16.begin(), utf16.end(), collator->less());
```
**C++:** (calling the ICU C API, as of ICU 76)
```c++
UErrorCode status = U_ZERO_ERROR;
icu::LocalUCollatorPointer ucollator(ucol_open("sv", &status));
assert(U_SUCCESS(status));
assert(ucollator.isValid());
std::vector<std::string> utf8{
"Arnold", "Øystein", "Ingrid", "Åke", "Olof", "İsmail", "Örjan",
};
std::sort(utf8.begin(), utf8.end(), icu::header::collator::less(ucollator.getAlias()));
std::vector<std::u16string> utf16{
u"Arnold", u"Øystein", u"Ingrid", u"Åke", u"Olof", u"İsmail", u"Örjan",
};
std::sort(utf16.begin(), utf16.end(), icu::header::collator::less(ucollator.getAlias()));
```
**Java:**
```java

View file

@ -58,12 +58,18 @@
#if !UCONFIG_NO_COLLATION
#include <functional>
#include <string_view>
#include <type_traits>
#include "unicode/char16ptr.h"
#include "unicode/uobject.h"
#include "unicode/ucol.h"
#include "unicode/unorm.h"
#include "unicode/locid.h"
#include "unicode/uniset.h"
#include "unicode/umisc.h"
#include "unicode/unistr.h"
#include "unicode/uiter.h"
#include "unicode/stringpiece.h"
@ -588,6 +594,52 @@ public:
*/
UBool equals(const UnicodeString& source, const UnicodeString& target) const;
#ifndef U_HIDE_DRAFT_API
/**
* Creates a comparison function object that uses this collator.
* Like <code>std::equal_to</code> but uses the collator instead of <code>operator==</code>.
* @draft ICU 76
*/
inline auto equal_to() const { return Predicate<std::equal_to, UCOL_EQUAL>(*this); }
/**
* Creates a comparison function object that uses this collator.
* Like <code>std::greater</code> but uses the collator instead of <code>operator&gt;</code>.
* @draft ICU 76
*/
inline auto greater() const { return Predicate<std::equal_to, UCOL_GREATER>(*this); }
/**
* Creates a comparison function object that uses this collator.
* Like <code>std::less</code> but uses the collator instead of <code>operator&lt;</code>.
* @draft ICU 76
*/
inline auto less() const { return Predicate<std::equal_to, UCOL_LESS>(*this); }
/**
* Creates a comparison function object that uses this collator.
* Like <code>std::not_equal_to</code> but uses the collator instead of <code>operator!=</code>.
* @draft ICU 76
*/
inline auto not_equal_to() const { return Predicate<std::not_equal_to, UCOL_EQUAL>(*this); }
/**
* Creates a comparison function object that uses this collator.
* Like <code>std::greater_equal</code> but uses the collator instead of <code>operator&gt;=</code>.
* @draft ICU 76
*/
inline auto greater_equal() const { return Predicate<std::not_equal_to, UCOL_LESS>(*this); }
/**
* Creates a comparison function object that uses this collator.
* Like <code>std::less_equal</code> but uses the collator instead of <code>operator&lt;=</code>.
* @draft ICU 76
*/
inline auto less_equal() const { return Predicate<std::not_equal_to, UCOL_GREATER>(*this); }
#endif // U_HIDE_DRAFT_API
#ifndef U_FORCE_HIDE_DEPRECATED_API
/**
* Determines the minimum strength that will be used in comparison or
@ -1210,6 +1262,47 @@ private:
friend class ICUCollatorService;
static Collator* makeInstance(const Locale& desiredLocale,
UErrorCode& status);
#ifndef U_HIDE_DRAFT_API
/**
* Function object for performing comparisons using a Collator.
* @internal
*/
template <template <typename...> typename Compare, UCollationResult result>
class Predicate {
public:
explicit Predicate(const Collator& parent) : collator(parent) {}
template <
typename T, typename U,
typename = std::enable_if_t<ConvertibleToU16StringView<T> && ConvertibleToU16StringView<U>>>
bool operator()(const T& lhs, const U& rhs) const {
UErrorCode status = U_ZERO_ERROR;
return compare(
collator.compare(
UnicodeString::readOnlyAlias(lhs),
UnicodeString::readOnlyAlias(rhs),
status),
result);
}
bool operator()(std::string_view lhs, std::string_view rhs) const {
UErrorCode status = U_ZERO_ERROR;
return compare(collator.compareUTF8(lhs, rhs, status), result);
}
#if defined(__cpp_char8_t)
bool operator()(std::u8string_view lhs, std::u8string_view rhs) const {
UErrorCode status = U_ZERO_ERROR;
return compare(collator.compareUTF8(lhs, rhs, status), result);
}
#endif
private:
const Collator& collator;
static constexpr Compare<UCollationResult> compare{};
};
#endif // U_HIDE_DRAFT_API
};
#if !UCONFIG_NO_SERVICE

View file

@ -1519,6 +1519,130 @@ ucol_openBinary(const uint8_t *bin, int32_t length,
const UCollator *base,
UErrorCode *status);
#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
#include <functional>
#include <string_view>
#include <type_traits>
#include "unicode/char16ptr.h"
#include "unicode/stringpiece.h"
#include "unicode/unistr.h"
namespace U_HEADER_ONLY_NAMESPACE {
#ifndef U_HIDE_DRAFT_API
namespace collator {
namespace internal {
/**
* Function object for performing comparisons using a UCollator.
* @internal
*/
template <template <typename...> typename Compare, UCollationResult result>
class Predicate {
public:
/** @internal */
explicit Predicate(const UCollator* ucol) : collator(ucol) {}
/** @internal */
template <
typename T, typename U,
typename = std::enable_if_t<ConvertibleToU16StringView<T> && ConvertibleToU16StringView<U>>>
bool operator()(const T& lhs, const U& rhs) const {
return match(UnicodeString::readOnlyAlias(lhs), UnicodeString::readOnlyAlias(rhs));
}
/** @internal */
bool operator()(std::string_view lhs, std::string_view rhs) const {
return match(lhs, rhs);
}
#if defined(__cpp_char8_t)
/** @internal */
bool operator()(std::u8string_view lhs, std::u8string_view rhs) const {
return match(lhs, rhs);
}
#endif
private:
bool match(UnicodeString lhs, UnicodeString rhs) const {
return compare(
ucol_strcoll(
collator,
toUCharPtr(lhs.getBuffer()), lhs.length(),
toUCharPtr(rhs.getBuffer()), rhs.length()),
result);
}
bool match(StringPiece lhs, StringPiece rhs) const {
UErrorCode status = U_ZERO_ERROR;
return compare(
ucol_strcollUTF8(
collator,
lhs.data(), lhs.length(),
rhs.data(), rhs.length(),
&status),
result);
}
const UCollator* const collator;
static constexpr Compare<UCollationResult> compare{};
};
} // namespace internal
/**
* Function object for performing comparisons using this collator.
* Like <code>std::equal_to</code> but uses the collator instead of <code>operator==</code>.
* @draft ICU 76
*/
using equal_to = internal::Predicate<std::equal_to, UCOL_EQUAL>;
/**
* Function object for performing comparisons using this collator.
* Like <code>std::greater</code> but uses the collator instead of <code>operator&gt;</code>.
* @draft ICU 76
*/
using greater = internal::Predicate<std::equal_to, UCOL_GREATER>;
/**
* Function object for performing comparisons using this collator.
* Like <code>std::less</code> but uses the collator instead of <code>operator&lt;</code>.
* @draft ICU 76
*/
using less = internal::Predicate<std::equal_to, UCOL_LESS>;
/**
* Function object for performing comparisons using this collator.
* Like <code>std::not_equal_to</code> but uses the collator instead of <code>operator!=</code>.
* @draft ICU 76
*/
using not_equal_to = internal::Predicate<std::not_equal_to, UCOL_EQUAL>;
/**
* Function object for performing comparisons using this collator.
* Like <code>std::greater_equal</code> but uses the collator instead of <code>operator&gt;=</code>.
* @draft ICU 76
*/
using greater_equal = internal::Predicate<std::not_equal_to, UCOL_LESS>;
/**
* Function object for performing comparisons using this collator.
* Like <code>std::less_equal</code> but uses the collator instead of <code>operator&lt;=</code>.
* @draft ICU 76
*/
using less_equal = internal::Predicate<std::not_equal_to, UCOL_GREATER>;
} // namespace collator
#endif // U_HIDE_DRAFT_API
} // namespace U_HEADER_ONLY_NAMESPACE
#endif // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
#endif /* #if !UCONFIG_NO_COLLATION */

View file

@ -11,6 +11,9 @@
* created by: Markus W. Scherer
*/
#include <string>
#include <vector>
#include "unicode/utypes.h"
#if !UCONFIG_NO_COLLATION
@ -81,6 +84,10 @@ public:
void TestLongLocale();
void TestBuilderContextsOverflow();
void TestHang22414();
void TestCollatorPredicates();
void TestUCollatorPredicates();
void TestCollatorPredicateTypes();
void TestUCollatorPredicateTypes();
private:
void checkFCD(const char *name, CollationIterator &ci, CodePointIterator &cpi);
@ -154,6 +161,10 @@ void CollationTest::runIndexedTest(int32_t index, UBool exec, const char *&name,
TESTCASE_AUTO(TestLongLocale);
TESTCASE_AUTO(TestBuilderContextsOverflow);
TESTCASE_AUTO(TestHang22414);
TESTCASE_AUTO(TestCollatorPredicates);
TESTCASE_AUTO(TestUCollatorPredicates);
TESTCASE_AUTO(TestCollatorPredicateTypes);
TESTCASE_AUTO(TestUCollatorPredicateTypes);
TESTCASE_AUTO_END;
}
@ -1925,4 +1936,148 @@ void CollationTest::TestBuilderContextsOverflow() {
}
}
// Verify that every Collator predicate performs the correct comparison.
void CollationTest::TestCollatorPredicates() {
IcuTestErrorCode status(*this, "TestCollatorPredicates");
setRootCollator(status);
status.assertSuccess();
assertTrue("[01] equal_to", coll->equal_to()("aaa", "aaa"));
assertTrue("[02] not_equal_to", coll->not_equal_to()("aaa", "bbb"));
assertTrue("[03] greater", coll->greater()("bbb", "aaa"));
assertTrue("[04] less", coll->less()("aaa", "bbb"));
assertTrue("[05] greater_equal", coll->greater_equal()("aaa", "aaa"));
assertTrue("[06] greater_equal", coll->greater_equal()("bbb", "aaa"));
assertTrue("[07] less_equal", coll->less_equal()("aaa", "aaa"));
assertTrue("[08] less_equal", coll->less_equal()("aaa", "bbb"));
assertFalse("[09] equal_to", coll->equal_to()("aaa", "bbb"));
assertFalse("[10] not_equal_to", coll->not_equal_to()("aaa", "aaa"));
assertFalse("[11] greater", coll->greater()("aaa", "aaa"));
assertFalse("[12] greater", coll->greater()("aaa", "bbb"));
assertFalse("[13] less", coll->less()("aaa", "aaa"));
assertFalse("[14] less", coll->less()("bbb", "aaa"));
assertFalse("[15] greater_equal", coll->greater_equal()("aaa", "bbb"));
assertFalse("[16] less_equal", coll->less_equal()("bbb", "aaa"));
}
// Verify that every UCollator predicate performs the correct comparison.
void CollationTest::TestUCollatorPredicates() {
using namespace U_HEADER_NESTED_NAMESPACE;
IcuTestErrorCode status(*this, "TestUCollatorPredicates");
setRootCollator(status);
status.assertSuccess();
const UCollator* const ucol = coll->toUCollator();
assertTrue("[01] equal_to", collator::equal_to(ucol)("aaa", "aaa"));
assertTrue("[02] not_equal_to", collator::not_equal_to(ucol)("aaa", "bbb"));
assertTrue("[03] greater", collator::greater(ucol)("bbb", "aaa"));
assertTrue("[04] less", collator::less(ucol)("aaa", "bbb"));
assertTrue("[05] greater_equal", collator::greater_equal(ucol)("aaa", "aaa"));
assertTrue("[06] greater_equal", collator::greater_equal(ucol)("bbb", "aaa"));
assertTrue("[07] less_equal", collator::less_equal(ucol)("aaa", "aaa"));
assertTrue("[08] less_equal", collator::less_equal(ucol)("aaa", "bbb"));
assertFalse("[09] equal_to", collator::equal_to(ucol)("aaa", "bbb"));
assertFalse("[10] not_equal_to", collator::not_equal_to(ucol)("aaa", "aaa"));
assertFalse("[11] greater", collator::greater(ucol)("aaa", "aaa"));
assertFalse("[12] greater", collator::greater(ucol)("aaa", "bbb"));
assertFalse("[13] less", collator::less(ucol)("aaa", "aaa"));
assertFalse("[14] less", collator::less(ucol)("bbb", "aaa"));
assertFalse("[15] greater_equal", collator::greater_equal(ucol)("aaa", "bbb"));
assertFalse("[16] less_equal", collator::less_equal(ucol)("bbb", "aaa"));
}
namespace {
constexpr char16_t TEXT_CHAR16[] = u"char16";
#if !U_CHAR16_IS_TYPEDEF && (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 180000)
constexpr uint16_t TEXT_UINT16[] = { 0x75, 0x69, 0x6e, 0x74, 0x31, 0x36, 0x00 };
#endif
#if U_SIZEOF_WCHAR_T==2
constexpr wchar_t TEXT_WCHAR[] = L"wchar";
#endif
constexpr char TEXT_CHAR[] = "char";
#if defined(__cpp_char8_t)
constexpr char8_t TEXT_CHAR8[] = u8"char8";
#endif
} // namespace
// Verify that the Collator predicates handle all string types.
void CollationTest::TestCollatorPredicateTypes() {
IcuTestErrorCode status(*this, "TestCollatorPredicateTypes");
setRootCollator(status);
status.assertSuccess();
const auto equal_to = coll->equal_to();
assertTrue("char16_t", equal_to(TEXT_CHAR16, TEXT_CHAR16));
assertTrue("u16string_view", equal_to(std::u16string_view{TEXT_CHAR16}, TEXT_CHAR16));
#if !U_CHAR16_IS_TYPEDEF && (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 180000)
assertTrue("uint16_t", equal_to(TEXT_UINT16, TEXT_UINT16));
assertTrue("basic_string_view<uint16_t>",
equal_to(std::basic_string_view<uint16_t>{TEXT_UINT16}, TEXT_UINT16));
#endif
#if U_SIZEOF_WCHAR_T==2
assertTrue("wchar_t", equal_to(TEXT_WCHAR, TEXT_WCHAR));
assertTrue("wstring_view", equal_to(std::wstring_view{TEXT_WCHAR}, TEXT_WCHAR));
#endif
assertTrue("char", equal_to(TEXT_CHAR, TEXT_CHAR));
assertTrue("string_view", equal_to(std::string_view{TEXT_CHAR}, TEXT_CHAR));
#if defined(__cpp_char8_t)
assertTrue("char8_t", equal_to(TEXT_CHAR8, TEXT_CHAR8));
assertTrue("u8string_view", equal_to(std::u8string_view{TEXT_CHAR8}, TEXT_CHAR8));
#endif
assertTrue("UnicodeString", equal_to(UnicodeString::readOnlyAlias(TEXT_CHAR16), TEXT_CHAR16));
assertTrue("string", equal_to(std::string{TEXT_CHAR}, TEXT_CHAR));
}
// Verify that the UCollator predicates handle all string types.
void CollationTest::TestUCollatorPredicateTypes() {
using namespace U_HEADER_NESTED_NAMESPACE;
IcuTestErrorCode status(*this, "TestUCollatorPredicateTypes");
setRootCollator(status);
status.assertSuccess();
const auto equal_to = collator::equal_to(coll->toUCollator());
assertTrue("char16_t", equal_to(TEXT_CHAR16, TEXT_CHAR16));
assertTrue("u16string_view", equal_to(std::u16string_view{TEXT_CHAR16}, TEXT_CHAR16));
#if !U_CHAR16_IS_TYPEDEF && (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 180000)
assertTrue("uint16_t", equal_to(TEXT_UINT16, TEXT_UINT16));
assertTrue("basic_string_view<uint16_t>",
equal_to(std::basic_string_view<uint16_t>{TEXT_UINT16}, TEXT_UINT16));
#endif
#if U_SIZEOF_WCHAR_T==2
assertTrue("wchar_t", equal_to(TEXT_WCHAR, TEXT_WCHAR));
assertTrue("wstring_view", equal_to(std::wstring_view{TEXT_WCHAR}, TEXT_WCHAR));
#endif
assertTrue("char", equal_to(TEXT_CHAR, TEXT_CHAR));
assertTrue("string_view", equal_to(std::string_view{TEXT_CHAR}, TEXT_CHAR));
#if defined(__cpp_char8_t)
assertTrue("char8_t", equal_to(TEXT_CHAR8, TEXT_CHAR8));
assertTrue("u8string_view", equal_to(std::u8string_view{TEXT_CHAR8}, TEXT_CHAR8));
#endif
assertTrue("UnicodeString", equal_to(UnicodeString::readOnlyAlias(TEXT_CHAR16), TEXT_CHAR16));
assertTrue("string", equal_to(std::string{TEXT_CHAR}, TEXT_CHAR));
}
#endif // !UCONFIG_NO_COLLATION