ICU-22332 bidiSkeleton and LTR- and RTL-confusabilities

2025-04-16 10:17:23 +00:00 · 2023-08-11 17:19:10 +02:00 · 2023-08-11 17:19:10 +02:00 · f79fe9347a
commit f79fe9347a
parent fb0f36203a
8 changed files with 708 additions and 28 deletions
--- a/icu4c/source/i18n/unicode/uspoof.h
+++ b/icu4c/source/i18n/unicode/uspoof.h
@ -19,6 +19,7 @@
 #ifndef USPOOF_H
 #define USPOOF_H

+#include "unicode/ubidi.h"
 #include "unicode/utypes.h"
 #include "unicode/uset.h"
 #include "unicode/parseerr.h"
@ -83,6 +84,25 @@
 * the instance should be created once (e.g., upon application startup), and the efficient
 * {@link uspoof_areConfusable} method can be used at runtime.
 *
+ * If the paragraph direction used to display the strings is known, the bidi function should be used instead:
+ *
+ * \code{.c}
+ * UErrorCode status = U_ZERO_ERROR;
+ * // These strings look identical when rendered in a left-to-right context.
+ * // They look distinct in a left-to-right context.
+ * UChar* str1 = (UChar*) u"A1\u05D0";  // A1א
+ * UChar* str2 = (UChar*) u"A\u05D01";  // Aא1
+ *
+ * USpoofChecker* sc = uspoof_open(&status);
+ * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
+ *
+ * int32_t bitmask = uspoof_areBidiConfusable(sc, UBIDI_LTR, str1, -1, str2, -1, &status);
+ * UBool result = bitmask != 0;
+ * // areBidiConfusable: 1 (status: U_ZERO_ERROR)
+ * printf("areBidiConfusable: %d (status: %s)\n", result, u_errorName(status));
+ * uspoof_close(sc);
+ * \endcode
+ *
 * <p>
 * The type {@link LocalUSpoofCheckerPointer} is exposed for C++ programmers.  It will automatically call
 * {@link uspoof_close} when the object goes out of scope:
@ -519,7 +539,7 @@ typedef enum USpoofChecks {


    /**
-     * Constants from UAX #39 for use in {@link uspoof_setRestrictionLevel}, and
+     * Constants from UTS #39 for use in {@link uspoof_setRestrictionLevel}, and
     * for returned identifier restriction levels in check results.
     *
     * @stable ICU 51
@ -633,8 +653,8 @@ uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLeng
 /**
  * Open a Spoof Checker from the source form of the spoof data.
  * The input corresponds to the Unicode data file confusables.txt
-  * as described in Unicode UAX #39.  The syntax of the source data
-  * is as described in UAX #39 for this file, and the content of
+  * as described in Unicode Technical Standard #39.  The syntax of the source data
+  * is as described in UTS #39 for this file, and the content of
  * this file is acceptable input.
  *
  * The character encoding of the (char *) input text is UTF-8.
@ -1111,7 +1131,7 @@ uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *


 /**
- * Check the whether two specified strings are visually confusable.
+ * Check whether two specified strings are visually confusable.
 *
 * If the strings are confusable, the return value will be nonzero, as long as
 * {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks().
@ -1159,7 +1179,58 @@ uspoof_areConfusable(const USpoofChecker *sc,
                     const UChar *id2, int32_t length2,
                     UErrorCode *status);

-
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Check whether two specified strings are visually confusable when
+ * displayed in a context with the given paragraph direction.
+ *
+ * If the strings are confusable, the return value will be nonzero, as long as
+ * {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks().
+ *
+ * The bits in the return value correspond to flags for each of the classes of
+ * confusables applicable to the two input strings.  According to UTS 39
+ * section 4, the possible flags are:
+ *
+ * <ul>
+ *   <li>{@link USPOOF_SINGLE_SCRIPT_CONFUSABLE}</li>
+ *   <li>{@link USPOOF_MIXED_SCRIPT_CONFUSABLE}</li>
+ *   <li>{@link USPOOF_WHOLE_SCRIPT_CONFUSABLE}</li>
+ * </ul>
+ *
+ * If one or more of the above flags were not listed in uspoof_setChecks(), this
+ * function will never report that class of confusable.  The check
+ * {@link USPOOF_CONFUSABLE} enables all three flags.
+ *
+ *
+ * @param sc      The USpoofChecker
+ * @param direction The paragraph direction with which the identifiers are
+ *                displayed.  Must be either UBIDI_LTR or UBIDI_RTL.
+ * @param id1     The first of the two identifiers to be compared for
+ *                confusability.  The strings are in UTF-16 format.
+ * @param length1 the length of the first identifier, expressed in
+ *                16 bit UTF-16 code units, or -1 if the string is
+ *                nul terminated.
+ * @param id2     The second of the two identifiers to be compared for
+ *                confusability.  The identifiers are in UTF-16 format.
+ * @param length2 The length of the second identifiers, expressed in
+ *                16 bit UTF-16 code units, or -1 if the string is
+ *                nul terminated.
+ * @param status  The error code, set if an error occurred while attempting to
+ *                perform the check.
+ *                Confusability of the identifiers is not reported here,
+ *                but through this function's return value.
+ * @return        An integer value with bit(s) set corresponding to
+ *                the type of confusability found, as defined by
+ *                enum USpoofChecks.  Zero is returned if the identifiers
+ *                are not confusable.
+ *
+ * @draft ICU 74
+ */
+U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction,
+                                                  const UChar *id1, int32_t length1,
+                                                  const UChar *id2, int32_t length2,
+                                                  UErrorCode *status);
+#endif /* U_HIDE_DRAFT_API */

 /**
 * A version of {@link uspoof_areConfusable} accepting strings in UTF-8 format.
@ -1192,14 +1263,45 @@ uspoof_areConfusableUTF8(const USpoofChecker *sc,
                         const char *id2, int32_t length2,
                         UErrorCode *status);

-
-
+#ifndef U_HIDE_DRAFT_API
+/**
+ * A version of {@link uspoof_areBidiConfusable} accepting strings in UTF-8 format.
+ *
+ * @param sc      The USpoofChecker
+ * @param direction The paragraph direction with which the identifiers are
+ *                displayed.  Must be either UBIDI_LTR or UBIDI_RTL.
+ * @param id1     The first of the two identifiers to be compared for
+ *                confusability.  The strings are in UTF-8 format.
+ * @param length1 the length of the first identifiers, in bytes, or -1
+ *                if the string is nul terminated.
+ * @param id2     The second of the two identifiers to be compared for
+ *                confusability.  The strings are in UTF-8 format.
+ * @param length2 The length of the second string in bytes, or -1
+ *                if the string is nul terminated.
+ * @param status  The error code, set if an error occurred while attempting to
+ *                perform the check.
+ *                Confusability of the strings is not reported here,
+ *                but through this function's return value.
+ * @return        An integer value with bit(s) set corresponding to
+ *                the type of confusability found, as defined by
+ *                enum USpoofChecks.  Zero is returned if the strings
+ *                are not confusable.
+ *
+ * @draft ICU 74
+ *
+ * @see uspoof_areBidiConfusable
+ */
+U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction,
+                                                      const char *id1, int32_t length1,
+                                                      const char *id2, int32_t length2,
+                                                      UErrorCode *status);
+#endif /* U_HIDE_DRAFT_API */

 /**
 *  Get the "skeleton" for an identifier.
 *  Skeletons are a transformation of the input identifier;
 * Two identifiers are confusable if their skeletons are identical.
- *  See Unicode UAX #39 for additional information.
+ *  See Unicode Technical Standard #39 for additional information.
 *
 *  Using skeletons directly makes it possible to quickly check
 *  whether an identifier is confusable with any of some large
@ -1233,11 +1335,50 @@ uspoof_getSkeleton(const USpoofChecker *sc,
                   UChar *dest, int32_t destCapacity,
                   UErrorCode *status);

+#ifndef U_HIDE_DRAFT_API
+/**
+ *  Get the "bidiSkeleton" for an identifier and a direction.
+ *  Skeletons are a transformation of the input identifier;
+ *  Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;
+ *  they are RTL-confusable if their RTL bidiSkeletons are identical.
+ *  See Unicode Technical Standard #39 for additional information:
+ *  https://www.unicode.org/reports/tr39/#Confusable_Detection.
+ *
+ *  Using skeletons directly makes it possible to quickly check
+ *  whether an identifier is confusable with any of some large
+ *  set of existing identifiers, by creating an efficiently
+ *  searchable collection of the skeletons.
+ *
+ * @param sc      The USpoofChecker.
+ * @param direction The context direction with which the identifier will be
+ *                displayed.  Must be either UBIDI_LTR or UBIDI_RTL.
+ * @param id      The input identifier whose skeleton will be computed.
+ * @param length  The length of the input identifier, expressed in 16 bit
+ *                UTF-16 code units, or -1 if the string is zero terminated.
+ * @param dest    The output buffer, to receive the skeleton string.
+ * @param destCapacity  The length of the output buffer, in 16 bit units.
+ *                The destCapacity may be zero, in which case the function will
+ *                return the actual length of the skeleton.
+ * @param status  The error code, set if an error occurred while attempting to
+ *                perform the check.
+ * @return        The length of the skeleton string.  The returned length
+ *                is always that of the complete skeleton, even when the
+ *                supplied buffer is too small (or of zero length)
+ *
+ * @draft ICU 74
+ * @see uspoof_areBidiConfusable
+ */
+U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc,
+                                                UBiDiDirection direction,
+                                                const UChar *id, int32_t length,
+                                                UChar *dest, int32_t destCapacity, UErrorCode *status);
+#endif /* U_HIDE_DRAFT_API */
+
 /**
 *  Get the "skeleton" for an identifier.
 *  Skeletons are a transformation of the input identifier;
 *  Two identifiers are confusable if their skeletons are identical.
- *  See Unicode UAX #39 for additional information.
+ *  See Unicode Technical Standard #39 for additional information.
 *
 *  Using skeletons directly makes it possible to quickly check
 *  whether an identifier is confusable with any of some large
@ -1273,6 +1414,46 @@ uspoof_getSkeletonUTF8(const USpoofChecker *sc,
                       char *dest, int32_t destCapacity,
                       UErrorCode *status);

+#ifndef U_HIDE_DRAFT_API
+/**
+ *  Get the "bidiSkeleton" for an identifier and a direction.
+ *  Skeletons are a transformation of the input identifier;
+ *  Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;
+ *  they are RTL-confusable if their RTL bidiSkeletons are identical.
+ *  See Unicode Technical Standard #39 for additional information:
+ *  https://www.unicode.org/reports/tr39/#Confusable_Detection.
+ *
+ *  Using skeletons directly makes it possible to quickly check
+ *  whether an identifier is confusable with any of some large
+ *  set of existing identifiers, by creating an efficiently
+ *  searchable collection of the skeletons.
+ *
+ * @param sc      The USpoofChecker
+ * @param direction The context direction with which the identifier will be
+ *                displayed.  Must be either UBIDI_LTR or UBIDI_RTL.
+ * @param id      The UTF-8 format identifier whose skeleton will be computed.
+ * @param length  The length of the input string, in bytes,
+ *                or -1 if the string is zero terminated.
+ * @param dest    The output buffer, to receive the skeleton string.
+ * @param destCapacity  The length of the output buffer, in bytes.
+ *                The destCapacity may be zero, in which case the function will
+ *                return the actual length of the skeleton.
+ * @param status  The error code, set if an error occurred while attempting to
+ *                perform the check.  Possible Errors include U_INVALID_CHAR_FOUND
+ *                for invalid UTF-8 sequences, and
+ *                U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small
+ *                to hold the complete skeleton.
+ * @return        The length of the skeleton string, in bytes.  The returned length
+ *                is always that of the complete skeleton, even when the
+ *                supplied buffer is too small (or of zero length)
+ *
+ * @draft ICU 74
+ */
+U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction,
+                                                    const char *id, int32_t length, char *dest,
+                                                    int32_t destCapacity, UErrorCode *status);
+#endif /* U_HIDE_DRAFT_API */
+
 /**
  * Get the set of Candidate Characters for Inclusion in Identifiers, as defined
  * in http://unicode.org/Public/security/latest/xidmodifications.txt
@ -1510,11 +1691,42 @@ uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
                                  const icu::UnicodeString &s2,
                                  UErrorCode *status);

+#ifndef U_HIDE_DRAFT_API
+/**
+ * A version of {@link uspoof_areBidiConfusable} accepting UnicodeStrings.
+ *
+ * @param sc      The USpoofChecker
+ * @param direction The paragraph direction with which the identifiers are
+ *                displayed.  Must be either UBIDI_LTR or UBIDI_RTL.
+ * @param s1     The first of the two identifiers to be compared for
+ *                confusability.  The strings are in UTF-8 format.
+ * @param s2     The second of the two identifiers to be compared for
+ *                confusability.  The strings are in UTF-8 format.
+ * @param status  The error code, set if an error occurred while attempting to
+ *                perform the check.
+ *                Confusability of the identifiers is not reported here,
+ *                but through this function's return value.
+ * @return        An integer value with bit(s) set corresponding to
+ *                the type of confusability found, as defined by
+ *                enum USpoofChecks.  Zero is returned if the identifiers
+ *                are not confusable.
+ *
+ * @draft ICU 74
+ *
+ * @see uspoof_areBidiConfusable
+ */
+U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc,
+                                                               UBiDiDirection direction,
+                                                               const icu::UnicodeString &s1,
+                                                               const icu::UnicodeString &s2,
+                                                               UErrorCode *status);
+#endif /* U_HIDE_DRAFT_API */
+
 /**
 *  Get the "skeleton" for an identifier.
 *  Skeletons are a transformation of the input identifier;
 *  Two identifiers are confusable if their skeletons are identical.
- *  See Unicode UAX #39 for additional information.
+ *  See Unicode Technical Standard #39 for additional information.
 *
 *  Using skeletons directly makes it possible to quickly check
 *  whether an identifier is confusable with any of some large
@ -1540,6 +1752,36 @@ uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
                                icu::UnicodeString &dest,
                                UErrorCode *status);

+#ifndef U_HIDE_DRAFT_API
+/**
+ *  Get the "bidiSkeleton" for an identifier and a direction.
+ *  Skeletons are a transformation of the input identifier;
+ *  Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;
+ *  they are RTL-confusable if their RTL bidiSkeletons are identical.
+ *  See Unicode Technical Standard #39 for additional information.
+ *  https://www.unicode.org/reports/tr39/#Confusable_Detection.
+ *
+ *  Using skeletons directly makes it possible to quickly check
+ *  whether an identifier is confusable with any of some large
+ *  set of existing identifiers, by creating an efficiently
+ *  searchable collection of the skeletons.
+ *
+ * @param sc      The USpoofChecker.
+ * @param direction The context direction with which the identifier will be
+ *                displayed.  Must be either UBIDI_LTR or UBIDI_RTL.
+ * @param id      The input identifier whose bidiSkeleton will be computed.
+ * @param dest    The output identifier, to receive the skeleton string.
+ * @param status  The error code, set if an error occurred while attempting to
+ *                perform the check.
+ * @return        A reference to the destination (skeleton) string.
+ *
+ * @draft ICU 74.0
+ */
+U_I18N_API icu::UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeString(
+    const USpoofChecker *sc, UBiDiDirection direction, const icu::UnicodeString &id,
+    icu::UnicodeString &dest, UErrorCode *status);
+#endif /* U_HIDE_DRAFT_API */
+
 /**
  * Get the set of Candidate Characters for Inclusion in Identifiers, as defined
  * in http://unicode.org/Public/security/latest/xidmodifications.txt
--- a/icu4c/source/i18n/uspoof.cpp
+++ b/icu4c/source/i18n/uspoof.cpp
@ -15,6 +15,7 @@
 *
 *   Unicode Spoof Detection
 */
+#include "unicode/ubidi.h"
 #include "unicode/utypes.h"
 #include "unicode/normalizer2.h"
 #include "unicode/uspoof.h"
@ -538,6 +539,90 @@ uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
    return result;
 }

+U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction,
+                                                  const char16_t *id1, int32_t length1,
+                                                  const char16_t *id2, int32_t length2,
+                                                   UErrorCode *status) {
+    UnicodeString id1Str((length1 == -1), id1, length1); // Aliasing constructor
+    UnicodeString id2Str((length2 == -1), id2, length2); // Aliasing constructor
+    if (id1Str.isBogus() || id2Str.isBogus()) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+    return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status);
+}
+
+U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction,
+                                                      const char *id1, int32_t length1, const char *id2,
+                                                      int32_t length2, UErrorCode *status) {
+    if (length1 < -1 || length2 < -1) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+    UnicodeString id1Str = UnicodeString::fromUTF8(
+        StringPiece(id1, length1 >= 0 ? length1 : static_cast<int32_t>(uprv_strlen(id1))));
+    UnicodeString id2Str = UnicodeString::fromUTF8(
+        StringPiece(id2, length2 >= 0 ? length2 : static_cast<int32_t>(uprv_strlen(id2))));
+    return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status);
+}
+
+U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc,
+                                                               UBiDiDirection direction,
+                                                               const icu::UnicodeString &id1,
+                                                               const icu::UnicodeString &id2,
+                                                               UErrorCode *status) {
+    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+    //
+    // See section 4 of UTS 39 for the algorithm for checking whether two strings are confusable,
+    //   and for definitions of the types (single, whole, mixed-script) of confusables.
+
+    // We only care about a few of the check flags.  Ignore the others.
+    // If no tests relevant to this function have been specified, return an error.
+    // TODO:  is this really the right thing to do?  It's probably an error on the caller's part,
+    //        but logically we would just return 0 (no error).
+    if ((This->fChecks & USPOOF_CONFUSABLE) == 0) {
+        *status = U_INVALID_STATE_ERROR;
+        return 0;
+    }
+
+    // Compute the skeletons and check for confusability.
+    UnicodeString id1Skeleton;
+    uspoof_getBidiSkeletonUnicodeString(sc, direction, id1, id1Skeleton, status);
+    UnicodeString id2Skeleton;
+    uspoof_getBidiSkeletonUnicodeString(sc, direction, id2, id2Skeleton, status);
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+    if (id1Skeleton != id2Skeleton) {
+        return 0;
+    }
+
+    // If we get here, the strings are confusable.  Now we just need to set the flags for the appropriate
+    // classes of confusables according to UTS 39 section 4. Start by computing the resolved script sets
+    // of id1 and id2.
+    ScriptSet id1RSS;
+    This->getResolvedScriptSet(id1, id1RSS, *status);
+    ScriptSet id2RSS;
+    This->getResolvedScriptSet(id2, id2RSS, *status);
+
+    // Turn on all applicable flags
+    uint32_t result = 0;
+    if (id1RSS.intersects(id2RSS)) {
+        result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
+    } else {
+        result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
+        if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) {
+            result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
+        }
+    }
+
+    // Turn off flags that the user doesn't want
+    return result & This->fChecks;
+}
+

 U_CAPI int32_t U_EXPORT2
 uspoof_checkUnicodeString(const USpoofChecker *sc,
@ -697,6 +782,60 @@ uspoof_getSkeleton(const USpoofChecker *sc,
    return destStr.length();
 }

+U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc, UBiDiDirection direction,
+                                                const UChar *id, int32_t length, UChar *dest,
+                                                int32_t destCapacity, UErrorCode *status) {
+    UnicodeString idStr((length == -1), id, length); // Aliasing constructor
+    if (idStr.isBogus()) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+    UnicodeString destStr;
+    uspoof_getBidiSkeletonUnicodeString(sc, direction, idStr, destStr, status);
+    return destStr.extract(dest, destCapacity, *status);
+}
+
+
+
+U_I18N_API UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeString(const USpoofChecker *sc,
+                                                                        UBiDiDirection direction,
+                                                                        const UnicodeString &id,
+                                                                        UnicodeString &dest,
+                                                                        UErrorCode *status) {
+    dest.remove();
+    if (direction != UBIDI_LTR && direction != UBIDI_RTL) {
+      *status = U_ILLEGAL_ARGUMENT_ERROR;
+      return dest;
+    }
+    UBiDi *bidi = ubidi_open();
+    ubidi_setPara(bidi, id.getBuffer(), id.length(), direction,
+                  /*embeddingLevels*/ nullptr, status);
+    if (U_FAILURE(*status)) {
+        ubidi_close(bidi);
+        return dest;
+    }
+    UnicodeString reordered;
+    int32_t const size = ubidi_getProcessedLength(bidi);
+    UChar* const reorderedBuffer = reordered.getBuffer(size);
+    if (reorderedBuffer == nullptr) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+        ubidi_close(bidi);
+        return dest;
+    }
+    ubidi_writeReordered(bidi, reorderedBuffer, size,
+                         UBIDI_KEEP_BASE_COMBINING | UBIDI_DO_MIRRORING, status);
+    reordered.releaseBuffer(size);
+    ubidi_close(bidi);
+
+    if (U_FAILURE(*status)) {
+        return dest;
+    }
+
+    // The type parameter is deprecated since ICU 58; any number may be passed.
+    constexpr uint32_t deprecatedType = 58;
+    return uspoof_getSkeletonUnicodeString(sc, deprecatedType, reordered, dest, status);
+}
+


 U_I18N_API UnicodeString &  U_EXPORT2
@ -730,12 +869,8 @@ uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
    return dest;
 }

-
-U_CAPI int32_t U_EXPORT2
-uspoof_getSkeletonUTF8(const USpoofChecker *sc,
-                       uint32_t type,
-                       const char *id,  int32_t length,
-                       char *dest, int32_t destCapacity,
+U_CAPI int32_t U_EXPORT2 uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *id,
+                                                int32_t length, char *dest, int32_t destCapacity,
                       UErrorCode *status) {
    SpoofImpl::validateThis(sc, *status);
    if (U_FAILURE(*status)) {
@ -746,7 +881,8 @@ uspoof_getSkeletonUTF8(const USpoofChecker *sc,
        return 0;
    }

-    UnicodeString srcStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : static_cast<int32_t>(uprv_strlen(id))));
+    UnicodeString srcStr = UnicodeString::fromUTF8(
+        StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id))));
    UnicodeString destStr;
    uspoof_getSkeletonUnicodeString(sc, type, srcStr, destStr, status);
    if (U_FAILURE(*status)) {
@ -754,8 +890,28 @@ uspoof_getSkeletonUTF8(const USpoofChecker *sc,
    }

    int32_t lengthInUTF8 = 0;
-    u_strToUTF8(dest, destCapacity, &lengthInUTF8,
-                destStr.getBuffer(), destStr.length(), status);
+    u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status);
+    return lengthInUTF8;
+}
+
+U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction,
+                                                    const char *id, int32_t length, char *dest,
+                                                    int32_t destCapacity, UErrorCode *status) {
+    if (length < -1) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    UnicodeString srcStr = UnicodeString::fromUTF8(
+        StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id))));
+    UnicodeString destStr;
+    uspoof_getBidiSkeletonUnicodeString(sc, direction, srcStr, destStr, status);
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+
+    int32_t lengthInUTF8 = 0;
+    u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status);
    return lengthInUTF8;
 }

--- a/icu4c/source/test/cintltst/spooftest.c
+++ b/icu4c/source/test/cintltst/spooftest.c
@ -545,6 +545,26 @@ static void TestUSpoofCAPI(void) {

    TEST_TEARDOWN;

+    /*
+     * uspoof_areBidiConfusable()
+     */
+    TEST_SETUP
+        int32_t checkResults;
+
+        checkResults = uspoof_areBidiConfusable(sc, UBIDI_LTR, scLatin, -1, scMixed, -1, &status);
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);
+
+        checkResults = uspoof_areBidiConfusable(sc, UBIDI_LTR, goodGreek, -1, scLatin, -1, &status);
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT_EQ(0, checkResults);
+
+        checkResults = uspoof_areBidiConfusable(sc, UBIDI_LTR, lll_Latin_a, -1, lll_Latin_b, -1, &status);
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults);
+
+    TEST_TEARDOWN;
+
    /*
     * areConfusableUTF8
     */
@ -577,6 +597,38 @@ static void TestUSpoofCAPI(void) {

    TEST_TEARDOWN;

+    /*
+     * areBidiConfusableUTF8
+     */
+    TEST_SETUP
+        int32_t checkResults;
+        char s1[200];
+        char s2[200];
+
+
+        u_strToUTF8(s1, sizeof(s1), NULL, scLatin, -1, &status);
+        u_strToUTF8(s2, sizeof(s2), NULL, scMixed, -1, &status);
+        TEST_ASSERT_SUCCESS(status);
+        checkResults = uspoof_areBidiConfusableUTF8(sc, UBIDI_LTR, s1, -1, s2, -1, &status);
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT_EQ(USPOOF_MIXED_SCRIPT_CONFUSABLE, checkResults);
+
+        u_strToUTF8(s1, sizeof(s1), NULL, goodGreek, -1, &status);
+        u_strToUTF8(s2, sizeof(s2), NULL, scLatin, -1, &status);
+        TEST_ASSERT_SUCCESS(status);
+        checkResults = uspoof_areBidiConfusableUTF8(sc, UBIDI_LTR, s1, -1, s2, -1, &status);
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT_EQ(0, checkResults);
+
+        u_strToUTF8(s1, sizeof(s1), NULL, lll_Latin_a, -1, &status);
+        u_strToUTF8(s2, sizeof(s2), NULL, lll_Latin_b, -1, &status);
+        TEST_ASSERT_SUCCESS(status);
+        checkResults = uspoof_areBidiConfusableUTF8(sc, UBIDI_LTR, s1, -1, s2, -1, &status);
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, checkResults);
+
+    TEST_TEARDOWN;
+

  /*
   * getSkeleton
@ -602,6 +654,31 @@ static void TestUSpoofCAPI(void) {

    TEST_TEARDOWN;

+
+    /*
+     * getBidiSkeleton
+     */
+
+    TEST_SETUP
+        UChar dest[100];
+        int32_t   skelLength;
+
+        skelLength = uspoof_getBidiSkeleton(sc, UBIDI_LTR, lll_Latin_a, -1, dest, UPRV_LENGTHOF(dest), &status);
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT_EQ(0, u_strcmp(lll_Skel, dest));
+        TEST_ASSERT_EQ(u_strlen(lll_Skel), skelLength);
+
+        skelLength = uspoof_getBidiSkeletonUTF8(sc, UBIDI_LTR, goodLatinUTF8, -1, (char *)dest,
+                                                UPRV_LENGTHOF(dest), &status);
+        TEST_ASSERT_SUCCESS(status);
+
+        skelLength = uspoof_getBidiSkeleton(sc, UBIDI_LTR, lll_Latin_a, -1, NULL, 0, &status);
+        TEST_ASSERT_EQ(U_BUFFER_OVERFLOW_ERROR, status);
+        TEST_ASSERT_EQ(3, skelLength);
+        status = U_ZERO_ERROR;
+
+    TEST_TEARDOWN;
+
    /*
     * get Inclusion and Recommended sets
     */
--- a/icu4c/source/test/depstest/dependencies.txt
+++ b/icu4c/source/test/depstest/dependencies.txt
@ -917,7 +917,7 @@ group: charset_detector
 group: spoof_detection
    uspoof.o uspoof_build.o uspoof_conf.o uspoof_impl.o scriptset.o
  deps
-    uniset_props regex unorm uscript
+    uniset_props regex unorm uscript ubidi

 group: alphabetic_index
    alphaindex.o
--- a/icu4c/source/test/intltest/itspoof.cpp
+++ b/icu4c/source/test/intltest/itspoof.cpp
@ -95,6 +95,7 @@ void IntlTestSpoof::runIndexedTest( int32_t index, UBool exec, const char* &name
    TESTCASE_AUTO_BEGIN;
    TESTCASE_AUTO(testSpoofAPI);
    TESTCASE_AUTO(testSkeleton);
+    TESTCASE_AUTO(testBidiSkeleton);
    TESTCASE_AUTO(testAreConfusable);
    TESTCASE_AUTO(testInvisible);
    TESTCASE_AUTO(testConfData);
@ -154,10 +155,13 @@ void IntlTestSpoof::testSpoofAPI() {
    TEST_TEARDOWN;
 }

+#define CHECK_SKELETON(type, input, expected)                                                           \
+    UPRV_BLOCK_MACRO_BEGIN { checkSkeleton(sc, type, input, expected, __LINE__); }                      \
+    UPRV_BLOCK_MACRO_END

-#define CHECK_SKELETON(type, input, expected) UPRV_BLOCK_MACRO_BEGIN { \
-    checkSkeleton(sc, type, input, expected, __LINE__); \
-} UPRV_BLOCK_MACRO_END
+#define CHECK_BIDI_SKELETON(type, input, expected)                                                           \
+    UPRV_BLOCK_MACRO_BEGIN { checkBidiSkeleton(sc, type, input, expected, __LINE__); }                      \
+    UPRV_BLOCK_MACRO_END


 // testSkeleton.   Spot check a number of confusable skeleton substitutions from the 
@ -227,6 +231,15 @@ void IntlTestSpoof::testSkeleton() {
    TEST_TEARDOWN;
 }

+void IntlTestSpoof::testBidiSkeleton() {
+    TEST_SETUP
+    CHECK_BIDI_SKELETON(u"A1<שׂ", UBIDI_LTR, u"Al<ש\u0307");
+    CHECK_BIDI_SKELETON(u"Αשֺ>1", UBIDI_LTR, u"Al<ש\u0307");
+    CHECK_BIDI_SKELETON(u"A1<שׂ", UBIDI_RTL, u"ש\u0307>Al");
+    CHECK_BIDI_SKELETON(u"Αשֺ>1", UBIDI_RTL, u"l<ש\u0307A");
+    TEST_TEARDOWN;
+}
+

 //
 //  Run a single confusable skeleton transformation test case.
@ -252,6 +265,31 @@ void IntlTestSpoof::checkSkeleton(const USpoofChecker *sc, uint32_t type,
    }
 }

+//
+//  Run a single confusable bidiSkeleton transformation test case.
+//
+void IntlTestSpoof::checkBidiSkeleton(const USpoofChecker *sc, const UnicodeString &input,
+                                      UBiDiDirection direction, const UnicodeString &expected,
+                                      int32_t lineNum) {
+    UnicodeString uInput = input.unescape();
+    UnicodeString uExpected = expected.unescape();
+
+    UErrorCode status = U_ZERO_ERROR;
+    UnicodeString actual;
+    uspoof_getBidiSkeletonUnicodeString(sc, direction, uInput, actual, &status);
+    if (U_FAILURE(status)) {
+        errln("File %s, Line %d, Test case from line %d, status is %s", __FILE__, __LINE__, lineNum,
+              u_errorName(status));
+        return;
+    }
+    if (uExpected != actual) {
+        errln("File %s, Line %d, Test case from line %d, Actual and Expected skeletons differ.",
+              __FILE__, __LINE__, lineNum);
+        errln(UnicodeString(" Actual   Skeleton: \"") + actual + UnicodeString("\"\n") +
+              UnicodeString(" Expected Skeleton: \"") + uExpected + UnicodeString("\""));
+    }
+}
+
 void IntlTestSpoof::testAreConfusable() {
    TEST_SETUP
        UnicodeString s1("A long string that will overflow stack buffers.  A long string that will overflow stack buffers. "
@ -265,6 +303,20 @@ void IntlTestSpoof::testAreConfusable() {
    TEST_TEARDOWN;
 }

+void IntlTestSpoof::testAreBidiConfusable() {
+    TEST_SETUP
+        const UnicodeString jHyphen2(u"J-2");
+        // The following string has RLMs around the 2–, flipping it; it uses an
+        // EN DASH instead of the HYPHEN-MINUS above.
+        const UnicodeString j2Dash(u"J\u200F2\u2013\u200F");
+        TEST_ASSERT(j2Dash == u"J‏2–‏");
+        int32_t result = uspoof_areBidiConfusableUnicodeString(sc, UBIDI_LTR, jHyphen2, j2Dash, &status);
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT_EQ(USPOOF_SINGLE_SCRIPT_CONFUSABLE, result);
+
+    TEST_TEARDOWN;
+}
+
 void IntlTestSpoof::testInvisible() {
    TEST_SETUP
        UnicodeString  s = UnicodeString("abcd\\u0301ef").unescape();
--- a/icu4c/source/test/intltest/itspoof.h
+++ b/icu4c/source/test/intltest/itspoof.h
@ -30,8 +30,12 @@ public:

    void  testSkeleton();

+    void testBidiSkeleton();
+
    void testAreConfusable();
    
+    void testAreBidiConfusable();
+
    void testInvisible();

    void testConfData();
@ -56,9 +60,11 @@ public:

    void testCombiningDot();

-    // Internal function to run a single skeleton test case.
-    void  checkSkeleton(const USpoofChecker *sc, uint32_t flags, 
-                        const char *input, const char *expected, int32_t lineNum);
+    // Internal functions to run a single skeleton test case.
+    void checkSkeleton(const USpoofChecker *sc, uint32_t flags, const char *input, const char *expected,
+                       int32_t lineNum);
+    void checkBidiSkeleton(const USpoofChecker *sc, const UnicodeString &input, UBiDiDirection direction,
+                           const UnicodeString  &expected, int32_t lineNum);
 };

 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_NORMALIZATION && !UCONFIG_NO_FILE_IO
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/SpoofChecker.java
@ -81,6 +81,22 @@ import com.ibm.icu.util.ULocale;
 * application startup), and the more efficient {@link SpoofChecker#areConfusable} method can be used at runtime.
 *
 * <p>
+ * If the paragraph direction used to display the strings is known, it should be passed to {@link SpoofChecker#areConfusable}:
+ *
+ * <pre>
+ * <code>
+ * // These strings look identical when rendered in a left-to-right context.
+ * // They look distinct in a right-to-left context.
+ * String s1 = "A1\u05D0";  // A1א
+ * String s2 = "A\u05D01";  // Aא1
+ *
+ * SpoofChecker sc = new SpoofChecker.Builder().setChecks(SpoofChecker.CONFUSABLE).build();
+ * int result = sc.areConfusable(Bidi.DIRECTION_LEFT_TO_RIGHT, s1, s2);
+ * System.out.println(result != 0);  // true
+ * </code>
+ * </pre>
+ *
+ * <p>
 * UTS 39 defines two strings to be <em>confusable</em> if they map to the same skeleton. A <em>skeleton</em> is a
 * sequence of families of confusable characters, where each family has a single exemplar character.
 * {@link SpoofChecker#getSkeleton} computes the skeleton for a particular string, so the following snippet is
@ -1422,7 +1438,7 @@ public class SpoofChecker {
    }

    /**
-     * Check the whether two specified strings are visually confusable. The types of confusability to be tested - single
+     * Check whether two specified strings are visually confusable. The types of confusability to be tested - single
     * script, mixed script, or whole script - are determined by the check options set for the SpoofChecker.
     *
     * The tests to be performed are controlled by the flags SINGLE_SCRIPT_CONFUSABLE MIXED_SCRIPT_CONFUSABLE
@ -1442,7 +1458,7 @@ public class SpoofChecker {
     */
    public int areConfusable(String s1, String s2) {
        //
-        // See section 4 of UAX 39 for the algorithm for checking whether two strings are confusable,
+        // See section 4 of UTS #39 for the algorithm for checking whether two strings are confusable,
        // and for definitions of the types (single, whole, mixed-script) of confusables.

        // We only care about a few of the check flags. Ignore the others.
@ -1479,12 +1495,104 @@ public class SpoofChecker {
            }
        }

+        // Turn off flags that the user doesn't want
+        return result & fChecks;
+    }
+
+    /**
+     * Check whether two specified strings are visually when displayed in a paragraph with the given direction.
+     * The types of confusability to be tested—single script, mixed script, or whole script—are determined by the check options set for the SpoofChecker.
+     *
+     * The tests to be performed are controlled by the flags SINGLE_SCRIPT_CONFUSABLE MIXED_SCRIPT_CONFUSABLE
+     * WHOLE_SCRIPT_CONFUSABLE At least one of these tests must be selected.
+     *
+     * ANY_CASE is a modifier for the tests. Select it if the identifiers may be of mixed case. If identifiers are case
+     * folded for comparison and display to the user, do not select the ANY_CASE option.
+     *
+     *
+     * @param direction The paragraph direction with which the identifiers are displayed.
+     *                  Must be either {@link Bidi#DIRECTION_LEFT_TO_RIGHT} or {@link Bidi#DIRECTION_RIGHT_TO_LEFT}.
+     * @param s1
+     *            The first of the two strings to be compared for confusability.
+     * @param s2
+     *            The second of the two strings to be compared for confusability.
+     * @return Non-zero if s1 and s1 are confusable. If not 0, the value will indicate the type(s) of confusability
+     *         found, as defined by spoof check test constants.
+     * @draft ICU 74
+     */
+    public int areConfusable(int direction, CharSequence s1, CharSequence s2) {
+        //
+        // See section 4 of UTS #39 for the algorithm for checking whether two strings are confusable,
+        // and for definitions of the types (single, whole, mixed-script) of confusables.
+
+        // We only care about a few of the check flags. Ignore the others.
+        // If no tests relevant to this function have been specified, signal an error.
+        // TODO: is this really the right thing to do? It's probably an error on
+        // the caller's part, but logically we would just return 0 (no error).
+        if ((this.fChecks & CONFUSABLE) == 0) {
+            throw new IllegalArgumentException("No confusable checks are enabled.");
+        }
+
+        // Compute the skeletons and check for confusability.
+        String s1Skeleton = getBidiSkeleton(direction, s1);
+        String s2Skeleton = getBidiSkeleton(direction, s2);
+        if (!s1Skeleton.equals(s2Skeleton)) {
+            return 0;
+        }
+
+        // If we get here, the strings are confusable. Now we just need to set the flags for the appropriate classes
+        // of confusables according to UTS 39 section 4.
+        // Start by computing the resolved script sets of s1 and s2.
+        ScriptSet s1RSS = new ScriptSet();
+        getResolvedScriptSet(s1, s1RSS);
+        ScriptSet s2RSS = new ScriptSet();
+        getResolvedScriptSet(s2, s2RSS);
+
+        // Turn on all applicable flags
+        int result = 0;
+        if (s1RSS.intersects(s2RSS)) {
+            result |= SINGLE_SCRIPT_CONFUSABLE;
+        } else {
+            result |= MIXED_SCRIPT_CONFUSABLE;
+            if (!s1RSS.isEmpty() && !s2RSS.isEmpty()) {
+                result |= WHOLE_SCRIPT_CONFUSABLE;
+            }
+        }
+
        // Turn off flags that the user doesn't want
        result &= fChecks;

        return result;
    }

+    /**
+     * Get the "bidiSkeleton" for an identifier string and a direction.
+     * Skeletons are a transformation of the input string;
+     * Two identifiers are LTR-confusable if their LTR bidiSkeletons are identical;
+     * they are RTL-confusable if their RTL bidiSkeletons are identical.
+     * See Unicode Technical Standard #39 for additional information:
+     * https://www.unicode.org/reports/tr39/#Confusable_Detection.
+     *
+     * Using skeletons directly makes it possible to quickly check whether an identifier is confusable with any of some
+     * large set of existing identifiers, by creating an efficiently searchable collection of the skeletons.
+     *
+     * Skeletons are computed using the algorithm and data described in UTS #39.
+     *
+     * @param direction The paragraph direction with which the string is displayed.
+     *                  Must be either {@link Bidi#DIRECTION_LEFT_TO_RIGHT} or {@link Bidi#DIRECTION_RIGHT_TO_LEFT}.
+     * @param str The input string whose bidiSkeleton will be generated.
+     * @return The output skeleton string.
+     *
+     * @draft ICU 74
+     */
+    public String getBidiSkeleton(int direction, CharSequence str) {
+        if (direction != Bidi.DIRECTION_LEFT_TO_RIGHT && direction != Bidi.DIRECTION_RIGHT_TO_LEFT) {
+            throw new IllegalArgumentException("direction should be DIRECTION_LEFT_TO_RIGHT or DIRECTION_RIGHT_TO_LEFT");
+        }
+        Bidi bidi = new Bidi(str.toString(), direction);
+        return getSkeleton(bidi.writeReordered(Bidi.KEEP_BASE_COMBINING | Bidi.DO_MIRRORING));
+    }
+
    /**
     * Get the "skeleton" for an identifier string. Skeletons are a transformation of the input string; Two strings are
     * confusable if their skeletons are identical. See Unicode UAX 39 for additional information.
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/text/SpoofCheckerTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/text/SpoofCheckerTest.java
@ -36,6 +36,7 @@ import com.ibm.icu.impl.Utility;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.lang.UScript;
+import com.ibm.icu.text.Bidi;
 import com.ibm.icu.text.Normalizer2;
 import com.ibm.icu.text.SpoofChecker;
 import com.ibm.icu.text.SpoofChecker.CheckResult;
@ -455,6 +456,16 @@ public class SpoofCheckerTest extends TestFmwk {

    }

+    @Test
+    public void TestBidiSkeleton() {
+        final SpoofChecker sc = new SpoofChecker.Builder().build();
+        final String testName = "TestBidiSkeleton";
+        checkBidiSkeleton(sc, Bidi.DIRECTION_LEFT_TO_RIGHT, "A1<שׂ", "Al<ש\u0307", testName);
+        checkBidiSkeleton(sc, Bidi.DIRECTION_LEFT_TO_RIGHT, "Αשֺ>1", "Al<ש\u0307", testName);
+        checkBidiSkeleton(sc, Bidi.DIRECTION_RIGHT_TO_LEFT, "A1<שׂ", "ש\u0307>Al", testName);
+        checkBidiSkeleton(sc, Bidi.DIRECTION_RIGHT_TO_LEFT, "Αשֺ>1", "l<ש\u0307A", testName);
+    }
+
    // Internal function to run a single skeleton test case.
    //
    // Run a single confusable skeleton transformation test case.
@ -470,6 +481,19 @@ public class SpoofCheckerTest extends TestFmwk {
        assertEquals(testName + " test at line " + lineNumberOfTest + " :  Expected (escaped): " + expected, uExpected, actual);
    }

+    // Internal function to run a single skeleton test case.
+    //
+    // Run a single confusable skeleton transformation test case.
+    //
+    void checkBidiSkeleton(SpoofChecker sc, int direction, String input, String expected, String testName) {
+        assertEquals(
+            "bidiSkeleton(" +
+            (direction == Bidi.DIRECTION_LEFT_TO_RIGHT ? "LTR" : "RTL") +
+            ", \"" + input +"\")",
+            expected,
+            sc.getBidiSkeleton(direction, input));
+    }
+
    @Test
    public void TestAreConfusable() {
        SpoofChecker sc = new SpoofChecker.Builder().setChecks(SpoofChecker.CONFUSABLE).build();
@ -480,6 +504,21 @@ public class SpoofCheckerTest extends TestFmwk {
        assertEquals("", SpoofChecker.SINGLE_SCRIPT_CONFUSABLE, sc.areConfusable(s1, s2));
    }

+    @Test
+    public void TestAreBidiConfusable() {
+        SpoofChecker sc = new SpoofChecker.Builder().setChecks(SpoofChecker.CONFUSABLE).build();
+        final String jHyphen2 = "J-2";
+        // The following string has RLMs around the 2–, flipping it; it uses an
+        // EN DASH instead of the HYPHEN-MINUS above.
+        final String j2Dash = "J\u200F2\u2013\u200F";
+        assertEquals("Unescaped display of j2Dash", "J‏2–‏", j2Dash);
+
+        assertEquals(
+            "Expected single-script confusability",
+            SpoofChecker.SINGLE_SCRIPT_CONFUSABLE,
+            sc.areConfusable(Bidi.DIRECTION_LEFT_TO_RIGHT, jHyphen2, j2Dash));
+    }
+
    @Test
    public void TestConfusableFlagVariants() {
        // The spoof checker should only return those tests that the user requested.  This test makes sure that