diff --git a/icu4c/source/i18n/unicode/uspoof.h b/icu4c/source/i18n/unicode/uspoof.h
index 9fcfcd3ede8..002e704d4ac 100644
--- a/icu4c/source/i18n/unicode/uspoof.h
+++ b/icu4c/source/i18n/unicode/uspoof.h
@@ -477,7 +477,7 @@ typedef enum USpoofChecks {
*/
USPOOF_CHAR_LIMIT = 64,
- /**
+ /**
* Check that an identifier does not mix numbers from different numbering systems.
* For more information, see UTS 39 section 5.3.
*
@@ -485,6 +485,27 @@ typedef enum USpoofChecks {
*/
USPOOF_MIXED_NUMBERS = 128,
+ /**
+ * Check that an identifier does not have a combining character following a character in which that
+ * combining character would be hidden; for example 'i' followed by a U+0307 combining dot.
+ *
+ * More specifically, the following characters are forbidden from preceding a U+0307:
+ *
+ * - Those with the Soft_Dotted Unicode property (which includes 'i' and 'j')
+ * - Latin lowercase letter 'l'
+ * - Dotless 'i' and 'j' ('ı' and 'ȷ', U+0131 and U+0237)
+ * - Any character whose confusable prototype ends with such a character
+ * (Soft_Dotted, 'l', 'ı', or 'ȷ')
+ *
+ * In addition, combining characters are allowed between the above characters and U+0307 except those
+ * with combining class 0 or combining class "Above" (230, same class as U+0307).
+ *
+ * This list and the number of combing characters considered by this check may grow over time.
+ *
+ * @draft ICU 62
+ */
+ USPOOF_HIDDEN_OVERLAY = 256,
+
/**
* Enable all spoof checks.
*
diff --git a/icu4c/source/i18n/uspoof.cpp b/icu4c/source/i18n/uspoof.cpp
index 2e8b29e1bba..c66a6e1ffde 100644
--- a/icu4c/source/i18n/uspoof.cpp
+++ b/icu4c/source/i18n/uspoof.cpp
@@ -558,6 +558,13 @@ int32_t checkImpl(const SpoofImpl* This, const UnicodeString& id, CheckResult* c
checkResult->fNumerics = numerics; // UnicodeSet::operator=
}
+ if (0 != (This->fChecks & USPOOF_HIDDEN_OVERLAY)) {
+ int32_t index = This->findHiddenOverlay(id, *status);
+ if (index != -1) {
+ result |= USPOOF_HIDDEN_OVERLAY;
+ }
+ }
+
if (0 != (This->fChecks & USPOOF_CHAR_LIMIT)) {
int32_t i;
diff --git a/icu4c/source/i18n/uspoof_impl.cpp b/icu4c/source/i18n/uspoof_impl.cpp
index 7c68612b0aa..2c1f088b12d 100644
--- a/icu4c/source/i18n/uspoof_impl.cpp
+++ b/icu4c/source/i18n/uspoof_impl.cpp
@@ -377,6 +377,43 @@ URestrictionLevel SpoofImpl::getRestrictionLevel(const UnicodeString& input, UEr
return USPOOF_MINIMALLY_RESTRICTIVE;
}
+int32_t SpoofImpl::findHiddenOverlay(const UnicodeString& input, UErrorCode&) const {
+ bool sawLeadCharacter = false;
+ for (int32_t i=0; iconfusableLookup(cp, skelStr);
+ UChar32 finalCp = skelStr.char32At(skelStr.moveIndex32(skelStr.length(), -1));
+ if (finalCp != cp && isIllegalCombiningDotLeadCharacterNoLookup(finalCp)) {
+ return true;
+ }
+ return false;
+}
+
// Convert a text format hex number. Utility function used by builder code. Static.
@@ -532,24 +569,25 @@ uspoof_cleanupDefaultData(void) {
if (gDefaultSpoofData) {
// Will delete, assuming all user-level spoof checkers were closed.
gDefaultSpoofData->removeReference();
- gDefaultSpoofData = NULL;
+ gDefaultSpoofData = nullptr;
gSpoofInitDefaultOnce.reset();
}
return TRUE;
}
static void U_CALLCONV uspoof_loadDefaultData(UErrorCode& status) {
- UDataMemory *udm = udata_openChoice(NULL, "cfu", "confusables",
+ UDataMemory *udm = udata_openChoice(nullptr, "cfu", "confusables",
spoofDataIsAcceptable,
- NULL, // context, would receive dataVersion if supplied.
+ nullptr, // context, would receive dataVersion if supplied.
&status);
if (U_FAILURE(status)) { return; }
gDefaultSpoofData = new SpoofData(udm, status);
if (U_FAILURE(status)) {
delete gDefaultSpoofData;
+ gDefaultSpoofData = nullptr;
return;
}
- if (gDefaultSpoofData == NULL) {
+ if (gDefaultSpoofData == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
diff --git a/icu4c/source/i18n/uspoof_impl.h b/icu4c/source/i18n/uspoof_impl.h
index a6b1e73e22d..2985c7317f4 100644
--- a/icu4c/source/i18n/uspoof_impl.h
+++ b/icu4c/source/i18n/uspoof_impl.h
@@ -83,6 +83,9 @@ public:
void getNumerics(const UnicodeString& input, UnicodeSet& result, UErrorCode& status) const;
URestrictionLevel getRestrictionLevel(const UnicodeString& input, UErrorCode& status) const;
+ int32_t findHiddenOverlay(const UnicodeString& input, UErrorCode& status) const;
+ bool isIllegalCombiningDotLeadCharacter(UChar32 cp) const;
+
/** parse a hex number. Untility used by the builders. */
static UChar32 ScanHex(const UChar *s, int32_t start, int32_t limit, UErrorCode &status);
diff --git a/icu4c/source/test/intltest/itspoof.cpp b/icu4c/source/test/intltest/itspoof.cpp
index 1e2ec174c8b..13f993959d2 100644
--- a/icu4c/source/test/intltest/itspoof.cpp
+++ b/icu4c/source/test/intltest/itspoof.cpp
@@ -92,6 +92,7 @@ void IntlTestSpoof::runIndexedTest( int32_t index, UBool exec, const char* &name
TESTCASE_AUTO(testBug12815);
TESTCASE_AUTO(testBug13314_MixedNumbers);
TESTCASE_AUTO(testBug13328_MixedCombiningMarks);
+ TESTCASE_AUTO(testCombiningDot);
TESTCASE_AUTO_END;
}
@@ -710,4 +711,45 @@ void IntlTestSpoof::testBug13328_MixedCombiningMarks() {
failedChecks);
}
+void IntlTestSpoof::testCombiningDot() {
+ UErrorCode status = U_ZERO_ERROR;
+ LocalUSpoofCheckerPointer sc(uspoof_open(&status));
+ TEST_ASSERT_SUCCESS(status);
+ uspoof_setChecks(sc.getAlias(), USPOOF_HIDDEN_OVERLAY, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ static const struct TestCase {
+ bool shouldFail;
+ const char16_t* input;
+ } cases[] = {
+ {false, u"i"},
+ {false, u"j"},
+ {false, u"l"},
+ {true, u"i\u0307"},
+ {true, u"j\u0307"},
+ {true, u"l\u0307"},
+ {true, u"ı\u0307"},
+ {true, u"ȷ\u0307"},
+ {true, u"𝚤\u0307"},
+ {true, u"𝑗\u0307"},
+ {false, u"m\u0307"},
+ {true, u"1\u0307"},
+ {true, u"ij\u0307"},
+ {true, u"i\u0307\u0307"},
+ {true, u"abci\u0307def"},
+ {false, u"i\u0301\u0307"}, // U+0301 has combining class ABOVE (230)
+ {true, u"i\u0320\u0307"}, // U+0320 has combining class BELOW
+ {true, u"i\u0320\u0321\u0307"}, // U+0321 also has combining class BELOW
+ {false, u"i\u0320\u0301\u0307"},
+ {false, u"iz\u0307"},
+ };
+
+ for (auto& cas : cases) {
+ int32_t failedChecks = uspoof_check2(sc.getAlias(), cas.input, -1, nullptr, &status);
+ TEST_ASSERT_SUCCESS(status);
+ int32_t expected = cas.shouldFail ? USPOOF_HIDDEN_OVERLAY : 0;
+ assertEquals(cas.input, expected, failedChecks);
+ }
+}
+
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION && !UCONFIG_NO_FILE_IO */
diff --git a/icu4c/source/test/intltest/itspoof.h b/icu4c/source/test/intltest/itspoof.h
index f3212a19821..47cda32cf38 100644
--- a/icu4c/source/test/intltest/itspoof.h
+++ b/icu4c/source/test/intltest/itspoof.h
@@ -54,6 +54,8 @@ public:
void testBug13328_MixedCombiningMarks();
+ void testCombiningDot();
+
// Internal function to run a single skeleton test case.
void checkSkeleton(const USpoofChecker *sc, uint32_t flags,
const char *input, const char *expected, int32_t lineNum);
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java b/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java
index 28afb1adbd4..4059b213080 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java
@@ -441,6 +441,28 @@ public class SpoofChecker {
*/
public static final int MIXED_NUMBERS = 128;
+ /**
+ * Check that an identifier does not have a combining character following a character in which that
+ * combining character would be hidden; for example 'i' followed by a U+0307 combining dot.
+ *
+ * More specifically, the following characters are forbidden from preceding a U+0307:
+ *
+ * - Those with the Soft_Dotted Unicode property (which includes 'i' and 'j')
+ * - Latin lowercase letter 'l'
+ * - Dotless 'i' and 'j' ('ı' and 'ȷ', U+0131 and U+0237)
+ * - Any character whose confusable prototype ends with such a character
+ * (Soft_Dotted, 'l', 'ı', or 'ȷ')
+ *
+ * In addition, combining characters are allowed between the above characters and U+0307 except those
+ * with combining class 0 or combining class "Above" (230, same class as U+0307).
+ *
+ * This list and the number of combing characters considered by this check may grow over time.
+ *
+ * @draft ICU 62
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final int HIDDEN_OVERLAY = 256;
+
// Update CheckResult.toString() when a new check is added.
/**
@@ -1300,6 +1322,13 @@ public class SpoofChecker {
}
}
+ if (0 != (this.fChecks & HIDDEN_OVERLAY)) {
+ int index = findHiddenOverlay(text);
+ if (index != -1) {
+ result |= HIDDEN_OVERLAY;
+ }
+ }
+
if (0 != (this.fChecks & CHAR_LIMIT)) {
int i;
int c;
@@ -1657,6 +1686,44 @@ public class SpoofChecker {
return RestrictionLevel.MINIMALLY_RESTRICTIVE;
}
+ int findHiddenOverlay(String input) {
+ boolean sawLeadCharacter = false;
+ StringBuilder sb = new StringBuilder();
+ for (int i=0; i