ICU-20777 Merge the likelySubtags implemention

Change testdata/likelySubtags.txt to consider FAIL line ICU-20777 Fix Java Tests ICU-20777 Fix all issues ICU-20777 Incase timeout ICU-20777 ICU-20777 Skip Data Driven test
2025-04-11 08:01:32 +00:00 · 2023-08-15 20:23:28 -07:00 · 2023-08-15 20:23:28 -07:00 · ffc449de62
commit ffc449de62
parent 27181e36a6
24 changed files with 3958 additions and 1493 deletions
--- a/icu4c/source/common/BUILD.bazel
+++ b/icu4c/source/common/BUILD.bazel
@ -603,7 +603,9 @@ cc_library(
        "locbased.cpp",
        "locid.cpp",
        "loclikely.cpp",
+        "loclikelysubtags.cpp",
        "locmap.cpp",
+        "lsr.cpp",
        "resbund.cpp",
        "resource.cpp",
        "uloc.cpp",
--- a/icu4c/source/common/localematcher.cpp
+++ b/icu4c/source/common/localematcher.cpp
@ -307,7 +307,7 @@ LSR getMaximalLsrOrUnd(const XLikelySubtags &likelySubtags, const Locale &locale
    if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) {
        return UND_LSR;
    } else {
-        return likelySubtags.makeMaximizedLsrFrom(locale, errorCode);
+        return likelySubtags.makeMaximizedLsrFrom(locale, false, errorCode);
    }
 }

--- a/icu4c/source/common/locid.cpp
+++ b/icu4c/source/common/locid.cpp
@ -2080,6 +2080,10 @@ Locale::addLikelySubtags(UErrorCode& status) {

 void
 Locale::minimizeSubtags(UErrorCode& status) {
+    Locale::minimizeSubtags(false, status);
+}
+void
+Locale::minimizeSubtags(bool favorScript, UErrorCode& status) {
    if (U_FAILURE(status)) {
        return;
    }
@ -2087,7 +2091,7 @@ Locale::minimizeSubtags(UErrorCode& status) {
    CharString minimizedLocaleID;
    {
        CharStringByteSink sink(&minimizedLocaleID);
-        ulocimp_minimizeSubtags(fullName, sink, &status);
+        ulocimp_minimizeSubtags(fullName, sink, favorScript, &status);
    }

    if (U_FAILURE(status)) {
--- a/icu4c/source/common/loclikely.cpp
+++ b/icu4c/source/common/loclikely.cpp
@ -31,82 +31,10 @@
 #include "charstr.h"
 #include "cmemory.h"
 #include "cstring.h"
+#include "loclikelysubtags.h"
 #include "ulocimp.h"
 #include "ustr_imp.h"

-/**
- * These are the canonical strings for unknown languages, scripts and regions.
- **/
-static const char* const unknownLanguage = "und";
-static const char* const unknownScript = "Zzzz";
-static const char* const unknownRegion = "ZZ";
-
-/**
- * This function looks for the localeID in the likelySubtags resource.
- *
- * @param localeID The tag to find.
- * @param buffer A buffer to hold the matching entry
- * @param bufferLength The length of the output buffer
- * @return A pointer to "buffer" if found, or a null pointer if not.
- */
-static const char*  U_CALLCONV
-findLikelySubtags(const char* localeID,
-                  char* buffer,
-                  int32_t bufferLength,
-                  UErrorCode* err) {
-    const char* result = nullptr;
-
-    if (!U_FAILURE(*err)) {
-        int32_t resLen = 0;
-        const char16_t* s = nullptr;
-        UErrorCode tmpErr = U_ZERO_ERROR;
-        icu::LocalUResourceBundlePointer subtags(ures_openDirect(nullptr, "likelySubtags", &tmpErr));
-        if (U_SUCCESS(tmpErr)) {
-            icu::CharString und;
-            if (localeID != nullptr) {
-                if (*localeID == '\0') {
-                    localeID = unknownLanguage;
-                } else if (*localeID == '_') {
-                    und.append(unknownLanguage, *err);
-                    und.append(localeID, *err);
-                    if (U_FAILURE(*err)) {
-                        return nullptr;
-                    }
-                    localeID = und.data();
-                }
-            }
-            s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr);
-
-            if (U_FAILURE(tmpErr)) {
-                /*
-                 * If a resource is missing, it's not really an error, it's
-                 * just that we don't have any data for that particular locale ID.
-                 */
-                if (tmpErr != U_MISSING_RESOURCE_ERROR) {
-                    *err = tmpErr;
-                }
-            }
-            else if (resLen >= bufferLength) {
-                /* The buffer should never overflow. */
-                *err = U_INTERNAL_PROGRAM_ERROR;
-            }
-            else {
-                u_UCharsToChars(s, buffer, resLen + 1);
-                if (resLen >= 3 &&
-                    uprv_strnicmp(buffer, unknownLanguage, 3) == 0 &&
-                    (resLen == 3 || buffer[3] == '_')) {
-                    uprv_memmove(buffer, buffer + 3, resLen - 3 + 1);
-                }
-                result = buffer;
-            }
-        } else {
-            *err = tmpErr;
-        }
-    }
-
-    return result;
-}
-
 /**
 * Append a tag to a buffer, adding the separator if necessary.  The buffer
 * must be large enough to contain the resulting tag plus any separator
@ -360,57 +288,6 @@ error:
    }
 }

-/**
- * Create a tag string from the supplied parameters.  The lang, script and region
- * parameters may be nullptr pointers. If they are, their corresponding length parameters
- * must be less than or equal to 0.  If the lang parameter is an empty string, the
- * default value for an unknown language is written to the output buffer.
- *
- * If the length of the new string exceeds the capacity of the output buffer, 
- * the function copies as many bytes to the output buffer as it can, and returns
- * the error U_BUFFER_OVERFLOW_ERROR.
- *
- * If an illegal argument is provided, the function returns the error
- * U_ILLEGAL_ARGUMENT_ERROR.
- *
- * @param lang The language tag to use.
- * @param langLength The length of the language tag.
- * @param script The script tag to use.
- * @param scriptLength The length of the script tag.
- * @param region The region tag to use.
- * @param regionLength The length of the region tag.
- * @param trailing Any trailing data to append to the new tag.
- * @param trailingLength The length of the trailing data.
- * @param sink The output sink receiving the tag string.
- * @param err A pointer to a UErrorCode for error reporting.
- **/
-static void U_CALLCONV
-createTagString(
-    const char* lang,
-    int32_t langLength,
-    const char* script,
-    int32_t scriptLength,
-    const char* region,
-    int32_t regionLength,
-    const char* trailing,
-    int32_t trailingLength,
-    icu::ByteSink& sink,
-    UErrorCode* err)
-{
-    createTagStringWithAlternates(
-                lang,
-                langLength,
-                script,
-                scriptLength,
-                region,
-                regionLength,
-                trailing,
-                trailingLength,
-                nullptr,
-                sink,
-                err);
-}
-
 /**
 * Parse the language, script, and region subtags from a tag string, and copy the
 * results into the corresponding output parameters. The buffers are null-terminated,
@ -494,13 +371,6 @@ parseTagString(
    *scriptLength = subtagLength;

    if (*scriptLength > 0) {
-        if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
-            /**
-             * If the script part is the "unknown" script, then don't return it.
-             **/
-            *scriptLength = 0;
-        }
-
        /*
         * Move past any separator.
         */
@ -517,14 +387,7 @@ parseTagString(

    *regionLength = subtagLength;

-    if (*regionLength > 0) {
-        if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
-            /**
-             * If the region part is the "unknown" region, then don't return it.
-             **/
-            *regionLength = 0;
-        }
-    } else if (*position != 0 && *position != '@') {
+    if (*regionLength <= 0 && *position != 0 && *position != '@') {
        /* back up over consumed trailing separator */
        --position;
    }
@ -546,264 +409,6 @@ error:
    goto exit;
 }

-static UBool U_CALLCONV
-createLikelySubtagsString(
-    const char* lang,
-    int32_t langLength,
-    const char* script,
-    int32_t scriptLength,
-    const char* region,
-    int32_t regionLength,
-    const char* variants,
-    int32_t variantsLength,
-    icu::ByteSink& sink,
-    UErrorCode* err) {
-    /**
-     * ULOC_FULLNAME_CAPACITY will provide enough capacity
-     * that we can build a string that contains the language,
-     * script and region code without worrying about overrunning
-     * the user-supplied buffer.
-     **/
-    char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
-
-    if(U_FAILURE(*err)) {
-        goto error;
-    }
-
-    /**
-     * Try the language with the script and region first.
-     **/
-    if (scriptLength > 0 && regionLength > 0) {
-
-        const char* likelySubtags = nullptr;
-
-        icu::CharString tagBuffer;
-        {
-            icu::CharStringByteSink sink(&tagBuffer);
-            createTagString(
-                lang,
-                langLength,
-                script,
-                scriptLength,
-                region,
-                regionLength,
-                nullptr,
-                0,
-                sink,
-                err);
-        }
-        if(U_FAILURE(*err)) {
-            goto error;
-        }
-
-        likelySubtags =
-            findLikelySubtags(
-                tagBuffer.data(),
-                likelySubtagsBuffer,
-                sizeof(likelySubtagsBuffer),
-                err);
-        if(U_FAILURE(*err)) {
-            goto error;
-        }
-
-        if (likelySubtags != nullptr) {
-            /* Always use the language tag from the
-               maximal string, since it may be more
-               specific than the one provided. */
-            createTagStringWithAlternates(
-                        nullptr,
-                        0,
-                        nullptr,
-                        0,
-                        nullptr,
-                        0,
-                        variants,
-                        variantsLength,
-                        likelySubtags,
-                        sink,
-                        err);
-            return true;
-        }
-    }
-
-    /**
-     * Try the language with just the script.
-     **/
-    if (scriptLength > 0) {
-
-        const char* likelySubtags = nullptr;
-
-        icu::CharString tagBuffer;
-        {
-            icu::CharStringByteSink sink(&tagBuffer);
-            createTagString(
-                lang,
-                langLength,
-                script,
-                scriptLength,
-                nullptr,
-                0,
-                nullptr,
-                0,
-                sink,
-                err);
-        }
-        if(U_FAILURE(*err)) {
-            goto error;
-        }
-
-        likelySubtags =
-            findLikelySubtags(
-                tagBuffer.data(),
-                likelySubtagsBuffer,
-                sizeof(likelySubtagsBuffer),
-                err);
-        if(U_FAILURE(*err)) {
-            goto error;
-        }
-
-        if (likelySubtags != nullptr) {
-            /* Always use the language tag from the
-               maximal string, since it may be more
-               specific than the one provided. */
-            createTagStringWithAlternates(
-                        nullptr,
-                        0,
-                        nullptr,
-                        0,
-                        region,
-                        regionLength,
-                        variants,
-                        variantsLength,
-                        likelySubtags,
-                        sink,
-                        err);
-            return true;
-        }
-    }
-
-    /**
-     * Try the language with just the region.
-     **/
-    if (regionLength > 0) {
-
-        const char* likelySubtags = nullptr;
-
-        icu::CharString tagBuffer;
-        {
-            icu::CharStringByteSink sink(&tagBuffer);
-            createTagString(
-                lang,
-                langLength,
-                nullptr,
-                0,
-                region,
-                regionLength,
-                nullptr,
-                0,
-                sink,
-                err);
-        }
-        if(U_FAILURE(*err)) {
-            goto error;
-        }
-
-        likelySubtags =
-            findLikelySubtags(
-                tagBuffer.data(),
-                likelySubtagsBuffer,
-                sizeof(likelySubtagsBuffer),
-                err);
-        if(U_FAILURE(*err)) {
-            goto error;
-        }
-
-        if (likelySubtags != nullptr) {
-            /* Always use the language tag from the
-               maximal string, since it may be more
-               specific than the one provided. */
-            createTagStringWithAlternates(
-                        nullptr,
-                        0,
-                        script,
-                        scriptLength,
-                        nullptr,
-                        0,
-                        variants,
-                        variantsLength,
-                        likelySubtags,
-                        sink,
-                        err);
-            return true;
-        }
-    }
-
-    /**
-     * Finally, try just the language.
-     **/
-    {
-        const char* likelySubtags = nullptr;
-
-        icu::CharString tagBuffer;
-        {
-            icu::CharStringByteSink sink(&tagBuffer);
-            createTagString(
-                lang,
-                langLength,
-                nullptr,
-                0,
-                nullptr,
-                0,
-                nullptr,
-                0,
-                sink,
-                err);
-        }
-        if(U_FAILURE(*err)) {
-            goto error;
-        }
-
-        likelySubtags =
-            findLikelySubtags(
-                tagBuffer.data(),
-                likelySubtagsBuffer,
-                sizeof(likelySubtagsBuffer),
-                err);
-        if(U_FAILURE(*err)) {
-            goto error;
-        }
-
-        if (likelySubtags != nullptr) {
-            /* Always use the language tag from the
-               maximal string, since it may be more
-               specific than the one provided. */
-            createTagStringWithAlternates(
-                        nullptr,
-                        0,
-                        script,
-                        scriptLength,
-                        region,
-                        regionLength,
-                        variants,
-                        variantsLength,
-                        likelySubtags,
-                        sink,
-                        err);
-            return true;
-        }
-    }
-
-    return false;
-
-error:
-
-    if (!U_FAILURE(*err)) {
-        *err = U_ILLEGAL_ARGUMENT_ERROR;
-    }
-
-    return false;
-}
-
 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \
    int32_t count = 0; \
    int32_t i; \
@ -836,7 +441,6 @@ _uloc_addLikelySubtags(const char* localeID,
    const char* trailing = "";
    int32_t trailingLength = 0;
    int32_t trailingIndex = 0;
-    UBool success = false;

    if(U_FAILURE(*err)) {
        goto error;
@ -862,6 +466,9 @@ _uloc_addLikelySubtags(const char* localeID,

        goto error;
    }
+    if (langLength > 3) {
+        goto error;
+    }

    /* Find the length of the trailing portion. */
    while (_isIDSeparator(localeID[trailingIndex])) {
@ -871,30 +478,33 @@ _uloc_addLikelySubtags(const char* localeID,
    trailingLength = (int32_t)uprv_strlen(trailing);

    CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
-
-    success =
-        createLikelySubtagsString(
-            lang,
-            langLength,
-            script,
-            scriptLength,
-            region,
-            regionLength,
+    {
+        const icu::XLikelySubtags* likelySubtags = icu::XLikelySubtags::getSingleton(*err);
+        if(U_FAILURE(*err)) {
+            goto error;
+        }
+        icu::LSR lsr = likelySubtags->makeMaximizedLsrFrom(icu::Locale::createFromName(localeID), true, *err);
+        const char* language = lsr.language;
+        if (uprv_strcmp(language, "und") == 0) {
+            language = "";
+        }
+        createTagStringWithAlternates(
+            language,
+            (int32_t)uprv_strlen(language),
+            lsr.script,
+            (int32_t)uprv_strlen(lsr.script),
+            lsr.region,
+            (int32_t)uprv_strlen(lsr.region),
            trailing,
            trailingLength,
+            nullptr,
            sink,
            err);
-
-    if (!success) {
-        const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
-
-        /*
-         * If we get here, we need to return localeID.
-         */
-        sink.Append(localeID, localIDLength);
+        if(U_FAILURE(*err)) {
+            goto error;
+        }
    }
-
-    return success;
+    return true;

 error:

@ -913,6 +523,7 @@ static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*)
 static void
 _uloc_minimizeSubtags(const char* localeID,
                      icu::ByteSink& sink,
+                      bool favorScript,
                      UErrorCode* err) {
    icu::CharString maximizedTagBuffer;

@ -925,7 +536,6 @@ _uloc_minimizeSubtags(const char* localeID,
    const char* trailing = "";
    int32_t trailingLength = 0;
    int32_t trailingIndex = 0;
-    UBool successGetMax = false;

    if(U_FAILURE(*err)) {
        goto error;
@ -964,213 +574,38 @@ _uloc_minimizeSubtags(const char* localeID,
    CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);

    {
-        icu::CharString base;
-        {
-            icu::CharStringByteSink baseSink(&base);
-            createTagString(
-                lang,
-                langLength,
-                script,
-                scriptLength,
-                region,
-                regionLength,
-                nullptr,
-                0,
-                baseSink,
-                err);
-        }
-
-        /**
-         * First, we need to first get the maximization
-         * from AddLikelySubtags.
-         **/
-        {
-            icu::CharStringByteSink maxSink(&maximizedTagBuffer);
-            successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err);
-        }
-    }
-
-    if(U_FAILURE(*err)) {
-        goto error;
-    }
-
-    if (!successGetMax) {
-        /**
-         * If we got here, return the locale ID parameter unchanged.
-         **/
-        const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
-        sink.Append(localeID, localeIDLength);
-        return;
-    }
-
-    // In the following, the lang, script, region are referring to those in
-    // the maximizedTagBuffer, not the one in the localeID.
-    langLength = sizeof(lang);
-    scriptLength = sizeof(script);
-    regionLength = sizeof(region);
-    parseTagString(
-        maximizedTagBuffer.data(),
-        lang,
-        &langLength,
-        script,
-        &scriptLength,
-        region,
-        &regionLength,
-        err);
-    if(U_FAILURE(*err)) {
-        goto error;
-    }
-
-    /**
-     * Start first with just the language.
-     **/
-    {
-        icu::CharString tagBuffer;
-        {
-            icu::CharStringByteSink tagSink(&tagBuffer);
-            createLikelySubtagsString(
-                lang,
-                langLength,
-                nullptr,
-                0,
-                nullptr,
-                0,
-                nullptr,
-                0,
-                tagSink,
-                err);
-        }
-
+        const icu::XLikelySubtags* likelySubtags = icu::XLikelySubtags::getSingleton(*err);
        if(U_FAILURE(*err)) {
            goto error;
        }
-        else if (!tagBuffer.isEmpty() &&
-                 uprv_strnicmp(
-                    maximizedTagBuffer.data(),
-                    tagBuffer.data(),
-                    tagBuffer.length()) == 0) {
-
-            createTagString(
-                        lang,
-                        langLength,
-                        nullptr,
-                        0,
-                        nullptr,
-                        0,
-                        trailing,
-                        trailingLength,
-                        sink,
-                        err);
-            return;
-        }
-    }
-
-    /**
-     * Next, try the language and region.
-     **/
-    if (regionLength > 0) {
-
-        icu::CharString tagBuffer;
-        {
-            icu::CharStringByteSink tagSink(&tagBuffer);
-            createLikelySubtagsString(
-                lang,
-                langLength,
-                nullptr,
-                0,
-                region,
-                regionLength,
-                nullptr,
-                0,
-                tagSink,
-                err);
-        }
-
+        icu::LSR lsr = likelySubtags->minimizeSubtags(
+            {lang, langLength},
+            {script, scriptLength},
+            {region, regionLength},
+            favorScript,
+            *err);
        if(U_FAILURE(*err)) {
            goto error;
        }
-        else if (!tagBuffer.isEmpty() &&
-                 uprv_strnicmp(
-                    maximizedTagBuffer.data(),
-                    tagBuffer.data(),
-                    tagBuffer.length()) == 0) {
-
-            createTagString(
-                        lang,
-                        langLength,
-                        nullptr,
-                        0,
-                        region,
-                        regionLength,
-                        trailing,
-                        trailingLength,
-                        sink,
-                        err);
-            return;
+        const char* language = lsr.language;
+        if (uprv_strcmp(language, "und") == 0) {
+            language = "";
        }
-    }
-
-    /**
-     * Finally, try the language and script.  This is our last chance,
-     * since trying with all three subtags would only yield the
-     * maximal version that we already have.
-     **/
-    if (scriptLength > 0) {
-        icu::CharString tagBuffer;
-        {
-            icu::CharStringByteSink tagSink(&tagBuffer);
-            createLikelySubtagsString(
-                lang,
-                langLength,
-                script,
-                scriptLength,
-                nullptr,
-                0,
-                nullptr,
-                0,
-                tagSink,
-                err);
-        }
-
+        createTagStringWithAlternates(
+            language,
+            (int32_t)uprv_strlen(language),
+            lsr.script,
+            (int32_t)uprv_strlen(lsr.script),
+            lsr.region,
+            (int32_t)uprv_strlen(lsr.region),
+            trailing,
+            trailingLength,
+            nullptr,
+            sink,
+            err);
        if(U_FAILURE(*err)) {
            goto error;
        }
-        else if (!tagBuffer.isEmpty() &&
-                 uprv_strnicmp(
-                    maximizedTagBuffer.data(),
-                    tagBuffer.data(),
-                    tagBuffer.length()) == 0) {
-
-            createTagString(
-                        lang,
-                        langLength,
-                        script,
-                        scriptLength,
-                        nullptr,
-                        0,
-                        trailing,
-                        trailingLength,
-                        sink,
-                        err);
-            return;
-        }
-    }
-
-    {
-        /**
-         * If we got here, return the max + trail.
-         **/
-        createTagString(
-                    lang,
-                    langLength,
-                    script,
-                    scriptLength,
-                    region,
-                    regionLength,
-                    trailing,
-                    trailingLength,
-                    sink,
-                    err);
        return;
    }

@ -1271,7 +706,7 @@ uloc_minimizeSubtags(const char* localeID,
    icu::CheckedArrayByteSink sink(
            minimizedLocaleID, minimizedLocaleIDCapacity);

-    ulocimp_minimizeSubtags(localeID, sink, status);
+    ulocimp_minimizeSubtags(localeID, sink, false, status);
    int32_t reslen = sink.NumberOfBytesAppended();

    if (U_FAILURE(*status)) {
@ -1291,6 +726,7 @@ uloc_minimizeSubtags(const char* localeID,
 U_CAPI void U_EXPORT2
 ulocimp_minimizeSubtags(const char* localeID,
                        icu::ByteSink& sink,
+                        bool favorScript,
                        UErrorCode* status) {
    PreflightingLocaleIDBuffer localeBuffer;
    do {
@ -1298,7 +734,7 @@ ulocimp_minimizeSubtags(const char* localeID,
            localeBuffer.getCapacity(), status);
    } while (localeBuffer.needToTryAgain(status));
    
-    _uloc_minimizeSubtags(localeBuffer.getBuffer(), sink, status);
+    _uloc_minimizeSubtags(localeBuffer.getBuffer(), sink, favorScript, status);
 }

 // Pairs of (language subtag, + or -) for finding out fast if common languages
--- a/icu4c/source/common/loclikelysubtags.cpp
+++ b/icu4c/source/common/loclikelysubtags.cpp
@ -24,6 +24,7 @@
 #include "uniquecharstr.h"
 #include "uresdata.h"
 #include "uresimp.h"
+#include "uvector.h"

 U_NAMESPACE_BEGIN

@ -304,7 +305,7 @@ private:
        encoded %= 27 * 27;
        if (encoded < 27) {
            // Selected M49 code index, find the code from "m49" resource.
-            return  m49IndexToCode(m49Array, value, 2, errorCode);
+            return  m49IndexToCode(m49Array, value, encoded, errorCode);
        }
        char region[2];
        region[0] = 'A' + ((encoded % 27) - 1);
@ -339,15 +340,52 @@ private:
 namespace {

 XLikelySubtags *gLikelySubtags = nullptr;
+UVector *gMacroregions = nullptr;
 UInitOnce gInitOnce {};

 UBool U_CALLCONV cleanup() {
    delete gLikelySubtags;
    gLikelySubtags = nullptr;
+    delete gMacroregions;
+    gMacroregions = nullptr;
    gInitOnce.reset();
    return true;
 }

+static const char16_t RANGE_MARKER = 0x7E; /* '~' */
+UVector* loadMacroregions(UErrorCode &status) {
+    LocalPointer<UVector> newMacroRegions(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
+
+    LocalUResourceBundlePointer supplementalData(ures_openDirect(nullptr,"supplementalData",&status));
+    LocalUResourceBundlePointer idValidity(ures_getByKey(supplementalData.getAlias(),"idValidity",nullptr,&status));
+    LocalUResourceBundlePointer regionList(ures_getByKey(idValidity.getAlias(),"region",nullptr,&status));
+    LocalUResourceBundlePointer regionMacro(ures_getByKey(regionList.getAlias(),"macroregion",nullptr,&status));
+
+    if (U_FAILURE(status)) {
+        return nullptr;
+    }
+
+    while (U_SUCCESS(status) && ures_hasNext(regionMacro.getAlias())) {
+        UnicodeString regionName = ures_getNextUnicodeString(regionMacro.getAlias(),nullptr,&status);
+        int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER);
+        char16_t buf[6];
+        regionName.extract(buf,6,status);
+        if ( rangeMarkerLocation > 0 ) {
+            char16_t endRange = regionName.charAt(rangeMarkerLocation+1);
+            buf[rangeMarkerLocation] = 0;
+            while ( buf[rangeMarkerLocation-1] <= endRange && U_SUCCESS(status)) {
+                LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status);
+                newMacroRegions->adoptElement(newRegion.orphan(),status);
+                buf[rangeMarkerLocation-1]++;
+            }
+        } else {
+            LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status);
+            newMacroRegions->adoptElement(newRegion.orphan(),status);
+        }
+    }
+    return newMacroRegions.orphan();
+}
+
 }  // namespace

 void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
@ -357,10 +395,14 @@ void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
    data.load(errorCode);
    if (U_FAILURE(errorCode)) { return; }
    gLikelySubtags = new XLikelySubtags(data);
-    if (gLikelySubtags == nullptr) {
+    gMacroregions = loadMacroregions(errorCode);
+    if (U_FAILURE(errorCode) || gLikelySubtags == nullptr || gMacroregions == nullptr) {
+        delete gLikelySubtags;
+        delete gMacroregions;
        errorCode = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
+
    ucln_common_registerCleanup(UCLN_COMMON_LIKELY_SUBTAGS, cleanup);
 }

@ -411,15 +453,28 @@ XLikelySubtags::~XLikelySubtags() {
    delete[] lsrs;
 }

-LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const {
+LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale,
+                                         bool returnInputIfUnmatch,
+                                         UErrorCode &errorCode) const {
    const char *name = locale.getName();
    if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') {  // name.startsWith("@x=")
        // Private use language tag x-subtag-subtag... which CLDR changes to
        // und-x-subtag-subtag...
        return LSR(name, "", "", LSR::EXPLICIT_LSR);
    }
-    return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
-                            locale.getVariant(), errorCode);
+    LSR max = makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
+                            locale.getVariant(), returnInputIfUnmatch, errorCode);
+
+    if (uprv_strlen(max.language) == 0 &&
+        uprv_strlen(max.script) == 0 &&
+        uprv_strlen(max.region) == 0) {
+        // No match. ICU API mandate us to
+        // If the provided ULocale instance is already in the maximal form, or
+        // there is no data available available for maximization, it will be
+        // returned.
+        return LSR(locale.getLanguage(), locale.getScript(), locale.getCountry(), LSR::EXPLICIT_LSR, errorCode);
+    }
+    return max;
 }

 namespace {
@ -432,7 +487,9 @@ const char *getCanonical(const CharStringMap &aliases, const char *alias) {
 }  // namespace

 LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region,
-                                     const char *variant, UErrorCode &errorCode) const {
+                                     const char *variant,
+                                     bool returnInputIfUnmatch,
+                                     UErrorCode &errorCode) const {
    // Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
    // They should match only themselves,
    // not other locales with what looks like the same language and script subtags.
@ -472,64 +529,91 @@ LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, c
    language = getCanonical(languageAliases, language);
    // (We have no script mappings.)
    region = getCanonical(regionAliases, region);
-    return maximize(language, script, region);
+    return maximize(language, script, region, returnInputIfUnmatch, errorCode);
 }

-LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region) const {
-    if (uprv_strcmp(language, "und") == 0) {
+LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region,
+                             bool returnInputIfUnmatch,
+                             UErrorCode &errorCode) const {
+    return maximize({language, (int32_t)uprv_strlen(language)},
+                    {script, (int32_t)uprv_strlen(script)},
+                    {region, (int32_t)uprv_strlen(region)},
+                    returnInputIfUnmatch,
+                    errorCode);
+}
+
+bool XLikelySubtags::isMacroregion(StringPiece& region, UErrorCode& errorCode) const {
+    // In Java, we use Region class. In C++, since Region is under i18n,
+    // we read the same data used by Region into gMacroregions avoid dependency
+    // from common to i18n/region.cpp
+    if (U_FAILURE(errorCode)) { return false; }
+    umtx_initOnce(gInitOnce, &XLikelySubtags::initLikelySubtags, errorCode);
+    if (U_FAILURE(errorCode)) { return false; }
+    UnicodeString str(UnicodeString::fromUTF8(region));
+    return gMacroregions->contains((void *)&str);
+}
+
+LSR XLikelySubtags::maximize(StringPiece language, StringPiece script, StringPiece region,
+                             bool returnInputIfUnmatch,
+                             UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) {
+        return LSR(language, script, region, LSR::EXPLICIT_LSR, errorCode);
+    }
+    if (language.compare("und") == 0) {
        language = "";
    }
-    if (uprv_strcmp(script, "Zzzz") == 0) {
+    if (script.compare("Zzzz") == 0) {
        script = "";
    }
-    if (uprv_strcmp(region, "ZZ") == 0) {
+    if (region.compare("ZZ") == 0) {
        region = "";
    }
-    if (*script != 0 && *region != 0 && *language != 0) {
-        return LSR(language, script, region, LSR::EXPLICIT_LSR);  // already maximized
+    if (!script.empty() && !region.empty() && !language.empty()) {
+        return LSR(language, script, region, LSR::EXPLICIT_LSR, errorCode);  // already maximized
    }
+    bool retainLanguage = false;
+    bool retainScript = false;
+    bool retainRegion = false;

-    uint32_t retainOldMask = 0;
    BytesTrie iter(trie);
    uint64_t state;
    int32_t value;
    // Small optimization: Array lookup for first language letter.
    int32_t c0;
-    if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 &&
-            language[1] != 0 &&  // language.length() >= 2
+    if (0 <= (c0 = uprv_lowerOrdinal(language.data()[0])) && c0 <= 25 &&
+            language.length() >= 2 &&
            (state = trieFirstLetterStates[c0]) != 0) {
        value = trieNext(iter.resetToState64(state), language, 1);
    } else {
        value = trieNext(iter, language, 0);
    }
+    bool matchLanguage = (value >= 0);
+    bool matchScript = false;
    if (value >= 0) {
-        if (*language != 0) {
-            retainOldMask |= 4;
-        }
+        retainLanguage = !language.empty();
        state = iter.getState64();
    } else {
-        retainOldMask |= 4;
+        retainLanguage = true;
        iter.resetToState64(trieUndState);  // "und" ("*")
        state = 0;
    }

+    if (value >= 0 && !script.empty()) {
+        matchScript = true;
+    }
    if (value > 0) {
        // Intermediate or final value from just language.
        if (value == SKIP_SCRIPT) {
            value = 0;
        }
-        if (*script != 0) {
-            retainOldMask |= 2;
-        }
+        retainScript = !script.empty();
    } else {
        value = trieNext(iter, script, 0);
        if (value >= 0) {
-            if (*script != 0) {
-                retainOldMask |= 2;
-            }
+            retainScript = !script.empty();
            state = iter.getState64();
        } else {
-            retainOldMask |= 2;
+            retainScript = true;
            if (state == 0) {
                iter.resetToState64(trieUndZzzzState);  // "und-Zzzz" ("**")
            } else {
@ -541,19 +625,19 @@ LSR XLikelySubtags::maximize(const char *language, const char *script, const cha
        }
    }

+    bool matchRegion = false;
    if (value > 0) {
        // Final value from just language or language+script.
-        if (*region != 0) {
-            retainOldMask |= 1;
-        }
+        retainRegion = !region.empty();
    } else {
        value = trieNext(iter, region, 0);
        if (value >= 0) {
-            if (*region != 0) {
-                retainOldMask |= 1;
+            if (!region.empty() && !isMacroregion(region, errorCode)) {
+                retainRegion = true;
+                matchRegion = true;
            }
        } else {
-            retainOldMask |= 1;
+            retainRegion = true;
            if (state == 0) {
                value = defaultLsrIndex;
            } else {
@ -564,28 +648,33 @@ LSR XLikelySubtags::maximize(const char *language, const char *script, const cha
        }
    }
    U_ASSERT(value < lsrsLength);
-    const LSR &result = lsrs[value];
+    const LSR &matched = lsrs[value];

-    if (*language == 0) {
-        language = "und";
+    if (returnInputIfUnmatch &&
+        (!(matchLanguage || matchScript || (matchRegion && language.empty())))) {
+      return LSR("", "", "", LSR::EXPLICIT_LSR, errorCode);  // no matching.
+    }
+    if (language.empty()) {
+        language = StringPiece("und");
    }

-    if (retainOldMask == 0) {
+    if (!(retainLanguage || retainScript || retainRegion)) {
        // Quickly return a copy of the lookup-result LSR
        // without new allocation of the subtags.
-        return LSR(result.language, result.script, result.region, result.flags);
+        return LSR(matched.language, matched.script, matched.region, matched.flags);
    }
-    if ((retainOldMask & 4) == 0) {
-        language = result.language;
+    if (!retainLanguage) {
+        language = matched.language;
    }
-    if ((retainOldMask & 2) == 0) {
-        script = result.script;
+    if (!retainScript) {
+        script = matched.script;
    }
-    if ((retainOldMask & 1) == 0) {
-        region = result.region;
+    if (!retainRegion) {
+        region = matched.region;
    }
+    int32_t retainMask = (retainLanguage ? 4 : 0) + (retainScript ? 2 : 0) + (retainRegion ? 1 : 0);
    // retainOldMask flags = LSR explicit-subtag flags
-    return LSR(language, script, region, retainOldMask);
+    return LSR(language, script, region, retainMask, errorCode);
 }

 int32_t XLikelySubtags::compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const {
@ -721,57 +810,97 @@ int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
    default: return -1;
    }
 }
-
-// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
-// in loclikely.cpp to this new code, including activating this
-// minimizeSubtags() function. The LocaleMatcher does not minimize.
-#if 0
-LSR XLikelySubtags::minimizeSubtags(const char *languageIn, const char *scriptIn,
-                                    const char *regionIn, ULocale.Minimize fieldToFavor,
-                                    UErrorCode &errorCode) const {
-    LSR result = maximize(languageIn, scriptIn, regionIn);
-
-    // We could try just a series of checks, like:
-    // LSR result2 = addLikelySubtags(languageIn, "", "");
-    // if result.equals(result2) return result2;
-    // However, we can optimize 2 of the cases:
-    //   (languageIn, "", "")
-    //   (languageIn, "", regionIn)
-
-    // value00 = lookup(result.language, "", "")
-    BytesTrie iter = new BytesTrie(trie);
-    int value = trieNext(iter, result.language, 0);
-    U_ASSERT(value >= 0);
-    if (value == 0) {
-        value = trieNext(iter, "", 0);
-        U_ASSERT(value >= 0);
-        if (value == 0) {
-            value = trieNext(iter, "", 0);
+int32_t XLikelySubtags::trieNext(BytesTrie &iter, StringPiece s, int32_t i) {
+    UStringTrieResult result;
+    uint8_t c;
+    if (s.length() == i) {
+        result = iter.next(u'*');
+    } else {
+        c = s.data()[i];
+        for (;;) {
+            c = uprv_invCharToAscii(c);
+            // EBCDIC: If s[i] is not an invariant character,
+            // then c is now 0 and will simply not match anything, which is harmless.
+            if (i+1 != s.length()) {
+                if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) {
+                    return -1;
+                }
+                c = s.data()[++i];
+            } else {
+                // last character of this subtag
+                result = iter.next(c | 0x80);
+                break;
+            }
        }
    }
-    U_ASSERT(value > 0);
-    LSR value00 = lsrs[value];
-    boolean favorRegionOk = false;
-    if (result.script.equals(value00.script)) { //script is default
-        if (result.region.equals(value00.region)) {
-            return new LSR(result.language, "", "", LSR.DONT_CARE_FLAGS);
-        } else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
-            return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
-        } else {
-            favorRegionOk = true;
-        }
+    switch (result) {
+    case USTRINGTRIE_NO_MATCH: return -1;
+    case USTRINGTRIE_NO_VALUE: return 0;
+    case USTRINGTRIE_INTERMEDIATE_VALUE:
+        U_ASSERT(iter.getValue() == SKIP_SCRIPT);
+        return SKIP_SCRIPT;
+    case USTRINGTRIE_FINAL_VALUE: return iter.getValue();
+    default: return -1;
    }
-
-    // The last case is not as easy to optimize.
-    // Maybe do later, but for now use the straightforward code.
-    LSR result2 = maximize(languageIn, scriptIn, "");
-    if (result2.equals(result)) {
-        return new LSR(result.language, result.script, "", LSR.DONT_CARE_FLAGS);
-    } else if (favorRegionOk) {
-        return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
-    }
-    return result;
 }
-#endif
+
+LSR XLikelySubtags::minimizeSubtags(StringPiece language, StringPiece script,
+                                    StringPiece region,
+                                    bool favorScript,
+                                    UErrorCode &errorCode) const {
+    LSR max = maximize(language, script, region, true, errorCode);
+    if (U_FAILURE(errorCode)) {
+        return max;
+    }
+    // If no match, return it.
+    if (uprv_strlen(max.language) == 0 &&
+        uprv_strlen(max.script) == 0 &&
+        uprv_strlen(max.region) == 0) {
+        // No match. ICU API mandate us to
+        // "If this Locale is already in the minimal form, or not valid, or
+        // there is no data available for minimization, the Locale will be
+        // unchanged."
+        return LSR(language, script, region, LSR::EXPLICIT_LSR, errorCode);
+    }
+    // try language
+    LSR test = maximize(max.language, "", "", true, errorCode);
+    if (U_FAILURE(errorCode)) {
+        return max;
+    }
+    if (test.isEquivalentTo(max)) {
+        return LSR(max.language, "", "", LSR::DONT_CARE_FLAGS, errorCode);
+    }
+
+    if (!favorScript) {
+        // favor Region
+        // try language and region
+        test = maximize(max.language, "", max.region, true, errorCode);
+        if (U_FAILURE(errorCode)) {
+            return max;
+        }
+        if (test.isEquivalentTo(max)) {
+            return LSR(max.language, "", max.region, LSR::DONT_CARE_FLAGS, errorCode);
+        }
+    }
+    // try language and script
+    test = maximize(max.language, max.script, "", true, errorCode);
+    if (U_FAILURE(errorCode)) {
+        return max;
+    }
+    if (test.isEquivalentTo(max)) {
+        return LSR(max.language, max.script, "", LSR::DONT_CARE_FLAGS, errorCode);
+    }
+    if (favorScript) {
+        // try language and region
+        test = maximize(max.language, "", max.region, true, errorCode);
+        if (U_FAILURE(errorCode)) {
+            return max;
+        }
+        if (test.isEquivalentTo(max)) {
+            return LSR(max.language, "", max.region, LSR::DONT_CARE_FLAGS, errorCode);
+        }
+    }
+    return LSR(max.language, max.script, max.region, LSR::DONT_CARE_FLAGS, errorCode);
+}

 U_NAMESPACE_END
--- a/icu4c/source/common/loclikelysubtags.h
+++ b/icu4c/source/common/loclikelysubtags.h
@ -11,6 +11,7 @@
 #include "unicode/utypes.h"
 #include "unicode/bytestrie.h"
 #include "unicode/locid.h"
+#include "unicode/stringpiece.h"
 #include "unicode/uobject.h"
 #include "unicode/ures.h"
 #include "charstrmap.h"
@ -47,7 +48,9 @@ public:
    static const XLikelySubtags *getSingleton(UErrorCode &errorCode);

    // VisibleForTesting
-    LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const;
+    LSR makeMaximizedLsrFrom(const Locale &locale,
+                             bool returnInputIfUnmatch,
+                             UErrorCode &errorCode) const;

    /**
     * Tests whether lsr is "more likely" than other.
@ -61,13 +64,9 @@ public:
     */
    int32_t compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const;

-    // TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code
-    // in loclikely.cpp to this new code, including activating this
-    // minimizeSubtags() function. The LocaleMatcher does not minimize.
-#if 0
-    LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn,
-                        ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const;
-#endif
+    LSR minimizeSubtags(StringPiece language, StringPiece script, StringPiece region,
+                        bool favorScript,
+                        UErrorCode &errorCode) const;

    // visible for LocaleDistance
    const LocaleDistanceData &getDistanceData() const { return distanceData; }
@ -80,16 +79,25 @@ private:
    static void initLikelySubtags(UErrorCode &errorCode);

    LSR makeMaximizedLsr(const char *language, const char *script, const char *region,
-                         const char *variant, UErrorCode &errorCode) const;
+                         const char *variant,
+                         bool returnInputIfUnmatch,
+                         UErrorCode &errorCode) const;

    /**
     * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
     */
-    LSR maximize(const char *language, const char *script, const char *region) const;
+    LSR maximize(const char *language, const char *script, const char *region,
+                 bool returnInputIfUnmatch,
+                 UErrorCode &errorCode) const;
+    LSR maximize(StringPiece language, StringPiece script, StringPiece region,
+                 bool returnInputIfUnmatch,
+                 UErrorCode &errorCode) const;

    int32_t getLikelyIndex(const char *language, const char *script) const;
+    bool isMacroregion(StringPiece& region, UErrorCode &errorCode) const;

    static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i);
+    static int32_t trieNext(BytesTrie &iter, StringPiece s, int32_t i);

    UResourceBundle *langInfoBundle;
    // We could store the strings by value, except that if there were few enough strings,
--- a/icu4c/source/common/lsr.cpp
+++ b/icu4c/source/common/lsr.cpp
@ -31,6 +31,26 @@ LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t
    }
 }

+LSR::LSR(StringPiece lang, StringPiece scr, StringPiece r, int32_t f,
+         UErrorCode &errorCode) :
+        language(nullptr), script(nullptr), region(nullptr),
+        regionIndex(indexForRegion(r.data())), flags(f) {
+    if (U_SUCCESS(errorCode)) {
+        CharString data;
+        data.append(lang, errorCode).append('\0', errorCode);
+        int32_t scriptOffset = data.length();
+        data.append(scr, errorCode).append('\0', errorCode);
+        int32_t regionOffset = data.length();
+        data.append(r, errorCode);
+        owned = data.cloneData(errorCode);
+        if (U_SUCCESS(errorCode)) {
+            language = owned;
+            script = owned + scriptOffset;
+            region = owned + regionOffset;
+        }
+    }
+}
+
 LSR::LSR(LSR &&other) noexcept :
        language(other.language), script(other.script), region(other.region), owned(other.owned),
        regionIndex(other.regionIndex), flags(other.flags),
--- a/icu4c/source/common/lsr.h
+++ b/icu4c/source/common/lsr.h
@ -7,6 +7,7 @@
 #ifndef __LSR_H__
 #define __LSR_H__

+#include "unicode/stringpiece.h"
 #include "unicode/utypes.h"
 #include "unicode/uobject.h"
 #include "cstring.h"
@ -45,6 +46,8 @@ struct LSR final : public UMemory {
     */
    LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t f,
        UErrorCode &errorCode);
+    LSR(StringPiece lang, StringPiece scr, StringPiece r, int32_t f,
+        UErrorCode &errorCode);
    LSR(LSR &&other) noexcept;
    LSR(const LSR &other) = delete;
    inline ~LSR() {
--- a/icu4c/source/common/ulocimp.h
+++ b/icu4c/source/common/ulocimp.h
@ -237,6 +237,7 @@ ulocimp_addLikelySubtags(const char* localeID,
 *
 * @param localeID The locale to minimize
 * @param sink The output sink receiving the maximized locale
+ * @param favorScript favor to keep script if true, region if false.
 * @param err Error information if minimizing the locale failed.  If the length
 * of the localeID and the null-terminator is greater than the maximum allowed size,
 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
@ -245,6 +246,7 @@ ulocimp_addLikelySubtags(const char* localeID,
 U_CAPI void U_EXPORT2
 ulocimp_minimizeSubtags(const char* localeID,
                        icu::ByteSink& sink,
+                        bool favorScript,
                        UErrorCode* err);

 U_CAPI const char * U_EXPORT2
--- a/icu4c/source/common/unicode/locid.h
+++ b/icu4c/source/common/unicode/locid.h
@ -1113,6 +1113,15 @@ protected: /* only protected for testing purposes. DO NOT USE. */
     * @internal
     */
    void setFromPOSIXID(const char *posixID);
+    /**
+     * Minimize the subtags for this Locale, per the algorithm described
+     * @param favorScript favor to keep script if true, to keep region if false.
+     * @param status  error information if maximizing this Locale failed.
+     *                If this Locale is not well-formed, the error code is
+     *                U_ILLEGAL_ARGUMENT_ERROR.
+     * @internal
+     */
+    void minimizeSubtags(bool favorScript, UErrorCode& status);
 #endif  /* U_HIDE_INTERNAL_API */

 private:
--- a/icu4c/source/test/cintltst/cloctst.c
+++ b/icu4c/source/test/cintltst/cloctst.c
@ -4605,8 +4605,8 @@ const char* const full_data[][3] = {
    "am"
  }, {
    "und_Ethi_ER",
-    "am_Ethi_ER",
-    "am_ER"
+    "ti_Ethi_ER",
+    "ti_ER"
  }, {
    "und_FI",
    "fi_Latn_FI",
@ -5381,8 +5381,8 @@ const char* const full_data[][3] = {
    "trv"
  }, {
    "und_Latn_HK",
-    "zh_Latn_HK",
-    "zh_Latn_HK"
+    "en_Latn_HK",
+    "en_HK"
  }, {
    "und_Latn_AQ",
    "_Latn_AQ",
--- a/icu4c/source/test/depstest/dependencies.txt
+++ b/icu4c/source/test/depstest/dependencies.txt
@ -655,17 +655,19 @@ group: resourcebundle
    localebuilder.o
    ulocale.o
    ulocbuilder.o
+    loclikelysubtags.o
  deps
    udata ucol_swp
    sort stringenumeration uhash uvector
    uscript_props propname
    bytesinkutil
    errorcode
+    lsr

 group: localematcher
    localematcher.o
  deps
-    resourcebundle localeprioritylist loclikelysubtags locdistance lsr
+    resourcebundle localeprioritylist locdistance

 group: localeprioritylist
    localeprioritylist.o
@ -675,12 +677,7 @@ group: localeprioritylist
 group: locdistance
    locdistance.o
  deps
-    loclikelysubtags
-
-group: loclikelysubtags
-    loclikelysubtags.o
-  deps
-    lsr resourcebundle
+    resourcebundle

 group: lsr
    lsr.o
--- a/icu4c/source/test/intltest/loctest.cpp
+++ b/icu4c/source/test/intltest/loctest.cpp
@ -11,6 +11,7 @@
 #include <iterator>
 #include <set>
 #include <utility>
+#include <cctype>

 #include "loctest.h"
 #include "unicode/localebuilder.h"
@ -234,6 +235,7 @@ void LocaleTest::runIndexedTest( int32_t index, UBool exec, const char* &name, c
    TESTCASE_AUTO(TestAddLikelySubtags);
    TESTCASE_AUTO(TestMinimizeSubtags);
    TESTCASE_AUTO(TestAddLikelyAndMinimizeSubtags);
+    TESTCASE_AUTO(TestDataDrivenLikelySubtags);
    TESTCASE_AUTO(TestKeywordVariants);
    TESTCASE_AUTO(TestCreateUnicodeKeywords);
    TESTCASE_AUTO(TestKeywordVariantParsing);
@ -1711,6 +1713,11 @@ LocaleTest::TestAddLikelyAndMinimizeSubtags() {
        const char* const add;
        const char* const remove;
    } full_data[] = {
+        {
+            "und",
+            "en_Latn_US",
+            "en"
+        },
        {
            "und_AQ",
            "_Latn_AQ",
@ -2517,8 +2524,8 @@ LocaleTest::TestAddLikelyAndMinimizeSubtags() {
            "am"
        }, {
            "und_Ethi_ER",
-            "am_Ethi_ER",
-            "am_ER"
+            "ti_Ethi_ER",
+            "ti_ER"
        }, {
            "und_FI",
            "fi_Latn_FI",
@ -3293,8 +3300,8 @@ LocaleTest::TestAddLikelyAndMinimizeSubtags() {
            "trv"
        }, {
            "und_Latn_HK",
-            "zh_Latn_HK",
-            "zh_Latn_HK"
+            "en_Latn_HK",
+            "en_HK"
        }, {
            "und_Latn_AQ",
            "_Latn_AQ",
@ -3865,7 +3872,6 @@ LocaleTest::TestAddLikelyAndMinimizeSubtags() {
    }
 }

-
 void
 LocaleTest::TestKeywordVariants() {
    static const struct {
@ -5546,6 +5552,184 @@ void LocaleTest::TestLocaleCanonicalizationFromFile()
    }
 }

+std::string trim(const std::string &s) {
+    auto start = s.begin();
+    while (start != s.end() && std::isspace(*start)) {
+        start++;
+    }
+
+    auto end = s.end();
+    do {
+        end--;
+    } while (std::distance(start, end) > 0 && std::isspace(*end));
+
+    return std::string(start, end + 1);
+}
+
+// A testing helper class which favorScript when minimizeSubtags.
+class FavorScriptLocale : public Locale {
+public:
+    FavorScriptLocale(const Locale& l) :Locale(l) { }
+    void minimizeSubtags(UErrorCode& status) {
+        Locale::minimizeSubtags(true, status);
+    }
+};
+
+
+bool isKnownSourceFor20777(const std::string& s) {
+  return s == "und-001" ||
+      s == "und-AQ" ||
+      s == "und-CC" ||
+      s == "und-SL" ||
+      s == "und-SS" ||
+      s == "und-ZM" ||
+      s.find("und-Latn-") == 0;
+}
+
+void U_CALLCONV
+testLikelySubtagsLineFn(void *context,
+               char *fields[][2], int32_t fieldCount,
+               UErrorCode *pErrorCode) {
+    (void)fieldCount;
+    LocaleTest* THIS = (LocaleTest*)context;
+    std::string source(trim(std::string(fields[0][0], fields[0][1]-fields[0][0])));
+    std::string addLikely(trim(std::string(fields[1][0], fields[1][1]-fields[1][0])));
+    std::string removeFavorScript(trim(std::string(fields[2][0], fields[2][1]-fields[2][0])));
+    if (removeFavorScript.length() == 0) {
+        removeFavorScript = addLikely;
+    }
+    std::string removeFavorRegion(trim(std::string(fields[3][0], fields[3][1]-fields[3][0])));
+
+    if (removeFavorRegion.length() == 0) {
+        removeFavorRegion = removeFavorScript;
+    }
+    Locale l = Locale::forLanguageTag(source, *pErrorCode);
+    if (U_FAILURE(*pErrorCode)) {
+        THIS->errln("forLanguageTag(%s) return error %x %s", source.c_str(),
+                    *pErrorCode, u_errorName(*pErrorCode));
+        *pErrorCode = U_ZERO_ERROR;
+        return;
+    }
+
+    Locale actualMax(l);
+    actualMax.addLikelySubtags(*pErrorCode);
+    if (addLikely == "FAIL") {
+        if (uprv_strcmp(l.getName(), actualMax.getName()) != 0) {
+            THIS->errln("addLikelySubtags('%s') return should return the same but return '%s'",
+                        l.getName(), actualMax.getName());
+        }
+    } else {
+        std::string max = actualMax.toLanguageTag<std::string>(*pErrorCode);
+        if (U_FAILURE(*pErrorCode)) {
+            THIS->errln("toLanguageTag(%s) return error %x %s", actualMax.getName(),
+                        *pErrorCode, u_errorName(*pErrorCode));
+            *pErrorCode = U_ZERO_ERROR;
+        } else {
+            if (max != addLikely) {
+                if (isKnownSourceFor20777(source)) {
+                    THIS->logKnownIssue(
+                        "ICU-20777", "addLikelySubtags('%s') should return '%s' but got '%s'",
+                        source.c_str(), addLikely.c_str(), max.c_str());
+                } else {
+                    THIS->errln("addLikelySubtags('%s') should return '%s' but got '%s'",
+                                source.c_str(), addLikely.c_str(), max.c_str());
+                }
+            }
+        }
+    }
+
+    Locale actualMin(l);
+    actualMin.minimizeSubtags(*pErrorCode);
+    if (removeFavorRegion == "FAIL") {
+        if (uprv_strcmp(l.getName(), actualMin.getName()) != 0) {
+            THIS->errln("minimizeSubtags('%s') return should return the same but return '%s'",
+                        l.getName(), actualMin.getName());
+        }
+    } else {
+        std::string min = actualMin.toLanguageTag<std::string>(*pErrorCode);
+        if (U_FAILURE(*pErrorCode)) {
+            THIS->errln("toLanguageTag(%s) return error %x %s", actualMin.getName(),
+                        *pErrorCode, u_errorName(*pErrorCode));
+            *pErrorCode = U_ZERO_ERROR;
+        } else {
+            if (min != removeFavorRegion) {
+                if (isKnownSourceFor20777(source)) {
+                    THIS->logKnownIssue(
+                        "ICU-20777", "minimizeSubtags('%s') should return '%s' but got '%s'",
+                        source.c_str(), removeFavorRegion.c_str(), min.c_str());
+                } else {
+                    THIS->errln("minimizeSubtags('%s') should return '%s' but got '%s'",
+                                source.c_str(), removeFavorRegion.c_str(), min.c_str());
+                }
+            }
+        }
+    }
+
+    FavorScriptLocale actualMinFavorScript(l);
+    actualMinFavorScript.minimizeSubtags(*pErrorCode);
+    if (removeFavorScript == "FAIL") {
+        if (uprv_strcmp(l.getName(), actualMinFavorScript.getName()) != 0) {
+            THIS->errln("minimizeSubtags('%s') return should return the same but return '%s'",
+                        l.getName(), actualMinFavorScript.getName());
+        }
+    } else {
+        std::string min = actualMinFavorScript.toLanguageTag<std::string>(*pErrorCode);
+        if (U_FAILURE(*pErrorCode)) {
+            THIS->errln("toLanguageTag(%s) favor script return error %x %s", actualMinFavorScript.getName(),
+                        *pErrorCode, u_errorName(*pErrorCode));
+            *pErrorCode = U_ZERO_ERROR;
+        } else {
+            if (min != removeFavorScript) {
+                if (isKnownSourceFor20777(source)) {
+                    THIS->logKnownIssue(
+                    "ICU-20777",
+                        "minimizeSubtags('%s') favor script should return '%s' but got '%s'",
+                        source.c_str(), removeFavorScript.c_str(), min.c_str());
+                } else {
+                    THIS->errln("minimizeSubtags('%s') favor script should return '%s' but got '%s'",
+                                source.c_str(), removeFavorScript.c_str(), min.c_str());
+                }
+            }
+        }
+    }
+}
+
+void
+LocaleTest::TestDataDrivenLikelySubtags() {
+    if (quick) {
+        // This test is too slow to run. Only run in -e mode.
+        return;
+    }
+    IcuTestErrorCode errorCode(*this, "TestDataDrivenLikelySubtags()");
+    const char* name = "likelySubtags.txt";
+    const char *sourceTestDataPath = getSourceTestData(errorCode);
+    if (errorCode.errIfFailureAndReset("unable to find the source/test/testdata "
+                                       "folder (getSourceTestData())")) {
+        return;
+    }
+    CharString path(sourceTestDataPath, errorCode);
+    path.appendPathPart(name, errorCode);
+    LocalStdioFilePointer testFile(fopen(path.data(), "r"));
+    if (testFile.isNull()) {
+        errln("unable to open %s", path.data());
+        return;
+    }
+
+    // Columns (c1, c2,...) are separated by semicolons.
+    // Leading and trailing spaces and tabs in each column are ignored.
+    // Comments are indicated with hash marks.
+    const int32_t kNumFields = 4;
+    char *fields[kNumFields][2];
+
+    u_parseDelimitedFile(path.data(), ';', fields, kNumFields, testLikelySubtagsLineFn,
+                         this, errorCode);
+    if (errorCode.errIfFailureAndReset("error parsing %s", name)) {
+        return;
+    }
+}
+
+
+
 void LocaleTest::TestKnownCanonicalizedListCorrect()
 {
    IcuTestErrorCode status(*this, "TestKnownCanonicalizedListCorrect");
--- a/icu4c/source/test/intltest/loctest.h
+++ b/icu4c/source/test/intltest/loctest.h
@ -131,6 +131,7 @@ public:
    void TestAddLikelySubtags();
    void TestMinimizeSubtags();
    void TestAddLikelyAndMinimizeSubtags();
+    void TestDataDrivenLikelySubtags();

    void TestForLanguageTag();
    void TestForLanguageTagLegacyTagBug21676();
--- a/icu4c/source/test/testdata/likelySubtags.txt
+++ b/icu4c/source/test/testdata/likelySubtags.txt
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/LocaleDistance.java
@ -225,8 +225,8 @@ public class LocaleDistance {
    // VisibleForTesting
    public int testOnlyDistance(ULocale desired, ULocale supported,
            int threshold, FavorSubtag favorSubtag) {
-        LSR supportedLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported);
-        LSR desiredLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired);
+        LSR supportedLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(supported, false);
+        LSR desiredLSR = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(desired, false);
        int indexAndDistance = getBestIndexAndDistance(desiredLSR, new LSR[] { supportedLSR }, 1,
                shiftDistance(threshold), favorSubtag, LocaleMatcher.Direction.WITH_ONE_WAY);
        return getDistanceFloor(indexAndDistance);
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/locale/XLikelySubtags.java
@ -15,6 +15,7 @@ import com.ibm.icu.impl.ICUData;
 import com.ibm.icu.impl.ICUResourceBundle;
 import com.ibm.icu.impl.UResource;
 import com.ibm.icu.util.BytesTrie;
+import com.ibm.icu.util.Region;
 import com.ibm.icu.util.ULocale;

 public final class XLikelySubtags {
@ -180,7 +181,7 @@ public final class XLikelySubtags {
    }

    // VisibleForTesting
-    public LSR makeMaximizedLsrFrom(ULocale locale) {
+    public LSR makeMaximizedLsrFrom(ULocale locale, boolean returnInputIfUnmatch) {
        String name = locale.getName();  // Faster than .toLanguageTag().
        if (name.startsWith("@x=")) {
            String tag = locale.toLanguageTag();
@ -189,8 +190,12 @@ public final class XLikelySubtags {
            // und-x-subtag-subtag...
            return new LSR(tag, "", "", LSR.EXPLICIT_LSR);
        }
-        return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
-                locale.getVariant());
+        LSR max = makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
+                locale.getVariant(), returnInputIfUnmatch);
+        if (max.language.isEmpty() && max.script.isEmpty() && max.region.isEmpty()) {
+            return new LSR(locale.getLanguage(), locale.getScript(), locale.getCountry(), LSR.EXPLICIT_LSR);
+        }
+        return max;
    }

    public LSR makeMaximizedLsrFrom(Locale locale) {
@ -201,10 +206,10 @@ public final class XLikelySubtags {
            return new LSR(tag, "", "", LSR.EXPLICIT_LSR);
        }
        return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(),
-                locale.getVariant());
+                locale.getVariant(), false);
    }

-    private LSR makeMaximizedLsr(String language, String script, String region, String variant) {
+    private LSR makeMaximizedLsr(String language, String script, String region, String variant, boolean returnInputIfUnmatch) {
        // Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
        // They should match only themselves,
        // not other locales with what looks like the same language and script subtags.
@ -248,13 +253,23 @@ public final class XLikelySubtags {
        language = getCanonical(languageAliases, language);
        // (We have no script mappings.)
        region = getCanonical(regionAliases, region);
-        return maximize(language, script, region);
+        return maximize(language, script, region, returnInputIfUnmatch);
+    }
+
+    /**
+     * Helper method to find out a region is a macroregion
+     */
+    private boolean isMacroregion(String region) {
+        Region.RegionType type = Region.getInstance(region).getType();
+        return type == Region.RegionType.WORLD ||
+            type == Region.RegionType.CONTINENT ||
+            type == Region.RegionType.SUBCONTINENT ;
    }

    /**
     * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN".
     */
-    private LSR maximize(String language, String script, String region) {
+    private LSR maximize(String language, String script, String region, boolean returnInputIfUnmatch) {
        if (language.equals("und")) {
            language = "";
        }
@ -268,7 +283,9 @@ public final class XLikelySubtags {
            return new LSR(language, script, region, LSR.EXPLICIT_LSR);  // already maximized
        }

-        int retainOldMask = 0;
+        boolean retainLanguage = false;
+        boolean retainScript = false;
+        boolean retainRegion = false;
        BytesTrie iter = new BytesTrie(trie);
        long state;
        int value;
@ -280,34 +297,33 @@ public final class XLikelySubtags {
        } else {
            value = trieNext(iter, language, 0);
        }
+        boolean matchLanguage = (value >= 0);
+        boolean matchScript = false;
        if (value >= 0) {
-            if (!language.isEmpty()) {
-                retainOldMask |= 4;
-            }
+            retainLanguage = ! language.isEmpty();
            state = iter.getState64();
        } else {
-            retainOldMask |= 4;
+            retainLanguage = true;
            iter.resetToState64(trieUndState);  // "und" ("*")
            state = 0;
        }

+        if (value >= 0 && !script.isEmpty()) {
+            matchScript = true;
+        }
        if (value > 0) {
            // Intermediate or final value from just language.
            if (value == SKIP_SCRIPT) {
                value = 0;
            }
-            if (!script.isEmpty()) {
-                retainOldMask |= 2;
-            }
+            retainScript = ! script.isEmpty();
        } else {
            value = trieNext(iter, script, 0);
            if (value >= 0) {
-                if (!script.isEmpty()) {
-                    retainOldMask |= 2;
-                }
+                retainScript = ! script.isEmpty();
                state = iter.getState64();
            } else {
-                retainOldMask |= 2;
+                retainScript = true;
                if (state == 0) {
                    iter.resetToState64(trieUndZzzzState);  // "und-Zzzz" ("**")
                } else {
@ -319,19 +335,19 @@ public final class XLikelySubtags {
            }
        }

+        boolean matchRegion = false;
        if (value > 0) {
            // Final value from just language or language+script.
-            if (!region.isEmpty()) {
-                retainOldMask |= 1;
-            }
+            retainRegion = ! region.isEmpty();
        } else {
            value = trieNext(iter, region, 0);
            if (value >= 0) {
-                if (!region.isEmpty()) {
-                    retainOldMask |= 1;
+                if (!region.isEmpty() && !isMacroregion(region)) {
+                    retainRegion = true;
+                    matchRegion = true;
                }
            } else {
-                retainOldMask |= 1;
+                retainRegion = true;
                if (state == 0) {
                    value = defaultLsrIndex;
                } else {
@ -343,25 +359,30 @@ public final class XLikelySubtags {
        }
        LSR result = lsrs[value];

+        if (returnInputIfUnmatch &&
+            (!(matchLanguage || matchScript || (matchRegion && language.isEmpty())))) {
+            return new LSR("", "", "", LSR.EXPLICIT_LSR);  // no matching.
+        }
        if (language.isEmpty()) {
            language = "und";
        }

-        if (retainOldMask == 0) {
+        if (! (retainLanguage || retainScript || retainRegion)) {
            assert result.flags == LSR.IMPLICIT_LSR;
            return result;
        }
-        if ((retainOldMask & 4) == 0) {
+        if (!retainLanguage) {
            language = result.language;
        }
-        if ((retainOldMask & 2) == 0) {
+        if (!retainScript) {
            script = result.script;
        }
-        if ((retainOldMask & 1) == 0) {
+        if (!retainRegion) {
            region = result.region;
        }
+        int retainMask = (retainLanguage ? 4 : 0) + (retainScript ? 2 : 0) + (retainRegion ? 1 : 0);
        // retainOldMask flags = LSR explicit-subtag flags
-        return new LSR(language, script, region, retainOldMask);
+        return new LSR(language, script, region, retainMask);
    }

    /**
@ -502,50 +523,37 @@ public final class XLikelySubtags {
        }
    }

-    LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn,
+    public LSR minimizeSubtags(String languageIn, String scriptIn, String regionIn,
            ULocale.Minimize fieldToFavor) {
-        LSR result = maximize(languageIn, scriptIn, regionIn);
-
-        // We could try just a series of checks, like:
-        // LSR result2 = addLikelySubtags(languageIn, "", "");
-        // if result.equals(result2) return result2;
-        // However, we can optimize 2 of the cases:
-        //   (languageIn, "", "")
-        //   (languageIn, "", regionIn)
-
-        // value00 = lookup(result.language, "", "")
-        BytesTrie iter = new BytesTrie(trie);
-        int value = trieNext(iter, result.language, 0);
-        assert value >= 0;
-        if (value == 0) {
-            value = trieNext(iter, "", 0);
-            assert value >= 0;
-            if (value == 0) {
-                value = trieNext(iter, "", 0);
+        LSR max = maximize(languageIn, scriptIn, regionIn, true);
+        if (max.language.isEmpty() && max.region.isEmpty() && max.script.isEmpty()) {
+            // Cannot match, return as is
+            return new LSR(languageIn, scriptIn, regionIn, LSR.EXPLICIT_LSR);
+        }
+        LSR test = maximize(max.language, "", "", true);
+        if (test.isEquivalentTo(max)) {
+            return new LSR(max.language, "", "", LSR.DONT_CARE_FLAGS);
+        }
+        if (ULocale.Minimize.FAVOR_REGION == fieldToFavor) {
+            test = maximize(max.language, "", max.region, true);
+            if (test.isEquivalentTo(max)) {
+                return new LSR(max.language, "", max.region, LSR.DONT_CARE_FLAGS);
+            }
+            test = maximize(max.language, max.script, "", true);
+            if (test.isEquivalentTo(max)) {
+                return new LSR(max.language, max.script, "", LSR.DONT_CARE_FLAGS);
+            }
+        } else {
+            test = maximize(max.language, max.script, "", true);
+            if (test.isEquivalentTo(max)) {
+                return new LSR(max.language, max.script, "", LSR.DONT_CARE_FLAGS);
+            }
+            test = maximize(max.language, "", max.region, true);
+            if (test.isEquivalentTo(max)) {
+                return new LSR(max.language, "", max.region, LSR.DONT_CARE_FLAGS);
            }
        }
-        assert value > 0;
-        LSR value00 = lsrs[value];
-        boolean favorRegionOk = false;
-        if (result.script.equals(value00.script)) { //script is default
-            if (result.region.equals(value00.region)) {
-                return new LSR(result.language, "", "", LSR.DONT_CARE_FLAGS);
-            } else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) {
-                return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
-            } else {
-                favorRegionOk = true;
-            }
-        }
-
-        // The last case is not as easy to optimize.
-        // Maybe do later, but for now use the straightforward code.
-        LSR result2 = maximize(languageIn, scriptIn, "");
-        if (result2.equals(result)) {
-            return new LSR(result.language, result.script, "", LSR.DONT_CARE_FLAGS);
-        } else if (favorRegionOk) {
-            return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS);
-        }
-        return result;
+        return new LSR(max.language, max.script, max.region, LSR.DONT_CARE_FLAGS);
    }

    private Map<String, LSR> getTable() {
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/LocaleMatcher.java
@ -796,7 +796,7 @@ public final class LocaleMatcher {
        if (locale.equals(UND_ULOCALE)) {
            return UND_LSR;
        } else {
-            return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
+            return XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale, false);
        }
    }

--- a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
@ -42,15 +42,16 @@ import com.ibm.icu.impl.locale.BaseLocale;
 import com.ibm.icu.impl.locale.Extension;
 import com.ibm.icu.impl.locale.InternalLocaleBuilder;
 import com.ibm.icu.impl.locale.KeyTypeData;
+import com.ibm.icu.impl.locale.LSR;
 import com.ibm.icu.impl.locale.LanguageTag;
 import com.ibm.icu.impl.locale.LocaleExtensions;
 import com.ibm.icu.impl.locale.LocaleSyntaxException;
 import com.ibm.icu.impl.locale.ParseStatus;
 import com.ibm.icu.impl.locale.UnicodeLocaleExtension;
+import com.ibm.icu.impl.locale.XLikelySubtags;
 import com.ibm.icu.lang.UScript;
 import com.ibm.icu.text.LocaleDisplayNames;
 import com.ibm.icu.text.LocaleDisplayNames.DialectHandling;
-
 /**
 * {@icuenhanced java.util.Locale}.{@icu _usage_}
 *
@ -2722,12 +2723,10 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
            trailing = loc.localeID.substring(trailingIndex);
        }

-        String newLocaleID =
-                createLikelySubtagsString(
-                        tags[0],
-                        tags[1],
-                        tags[2],
-                        trailing);
+        LSR max = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(
+            new ULocale(loc.getLanguage(), loc.getScript(), loc.getCountry()), true);
+        String newLocaleID = createTagString(max.language, max.script, max.region,
+            trailing);

        return newLocaleID == null ? loc : new ULocale(newLocaleID);
    }
@ -2819,148 +2818,22 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
    @Deprecated
    public static ULocale minimizeSubtags(ULocale loc, Minimize fieldToFavor) {
        String[] tags = new String[3];
+        String trailing = null;

        int trailingIndex = parseTagString(
                loc.localeID,
                tags);

-        String originalLang = tags[0];
-        String originalScript = tags[1];
-        String originalRegion = tags[2];
-        String originalTrailing = null;
-
        if (trailingIndex < loc.localeID.length()) {
-            /*
-             * Create a String that contains everything
-             * after the language, script, and region.
-             */
-            originalTrailing = loc.localeID.substring(trailingIndex);
+            trailing = loc.localeID.substring(trailingIndex);
        }

-        /**
-         * First, we need to first get the maximization
-         * by adding any likely subtags.
-         **/
-        String maximizedLocaleID =
-                createLikelySubtagsString(
-                        originalLang,
-                        originalScript,
-                        originalRegion,
-                        null);
+        LSR lsr = XLikelySubtags.INSTANCE.minimizeSubtags(
+            loc.getLanguage(), loc.getScript(), loc.getCountry(), fieldToFavor);
+        String newLocaleID = createTagString(lsr.language, lsr.script, lsr.region,
+            trailing);

-        /**
-         * If maximization fails, there's nothing
-         * we can do.
-         **/
-        if (isEmptyString(maximizedLocaleID)) {
-            return loc;
-        }
-        else {
-            /**
-             * Start first with just the language.
-             **/
-            String tag =
-                    createLikelySubtagsString(
-                            originalLang,
-                            null,
-                            null,
-                            null);
-
-            if (tag.equals(maximizedLocaleID)) {
-                String newLocaleID =
-                        createTagString(
-                                originalLang,
-                                null,
-                                null,
-                                originalTrailing);
-
-                return new ULocale(newLocaleID);
-            }
-        }
-
-        /**
-         * Next, try the language and region.
-         **/
-        if (fieldToFavor == Minimize.FAVOR_REGION) {
-            if (originalRegion.length() != 0) {
-                String tag =
-                        createLikelySubtagsString(
-                                originalLang,
-                                null,
-                                originalRegion,
-                                null);
-
-                if (tag.equals(maximizedLocaleID)) {
-                    String newLocaleID =
-                            createTagString(
-                                    originalLang,
-                                    null,
-                                    originalRegion,
-                                    originalTrailing);
-
-                    return new ULocale(newLocaleID);
-                }
-            }
-            if (originalScript.length() != 0){
-                String tag =
-                        createLikelySubtagsString(
-                                originalLang,
-                                originalScript,
-                                null,
-                                null);
-
-                if (tag.equals(maximizedLocaleID)) {
-                    String newLocaleID =
-                            createTagString(
-                                    originalLang,
-                                    originalScript,
-                                    null,
-                                    originalTrailing);
-
-                    return new ULocale(newLocaleID);
-                }
-            }
-        } else { // FAVOR_SCRIPT, so
-            if (originalScript.length() != 0){
-                String tag =
-                        createLikelySubtagsString(
-                                originalLang,
-                                originalScript,
-                                null,
-                                null);
-
-                if (tag.equals(maximizedLocaleID)) {
-                    String newLocaleID =
-                            createTagString(
-                                    originalLang,
-                                    originalScript,
-                                    null,
-                                    originalTrailing);
-
-                    return new ULocale(newLocaleID);
-                }
-            }
-            if (originalRegion.length() != 0) {
-                String tag =
-                        createLikelySubtagsString(
-                                originalLang,
-                                null,
-                                originalRegion,
-                                null);
-
-                if (tag.equals(maximizedLocaleID)) {
-                    String newLocaleID =
-                            createTagString(
-                                    originalLang,
-                                    null,
-                                    originalRegion,
-                                    originalTrailing);
-
-                    return new ULocale(newLocaleID);
-                }
-            }
-        }
-        return loc;
+        return newLocaleID == null ? loc : new ULocale(newLocaleID);
    }

    /**
@ -3007,10 +2880,9 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
     * @return The new tag string.
     **/
    private static String createTagString(String lang, String script, String region,
-            String trailing, String alternateTags) {
+            String trailing) {

        LocaleIDParser parser = null;
-        boolean regionAppended = false;

        StringBuilder tag = new StringBuilder();

@ -3018,8 +2890,7 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
            appendTag(
                    lang,
                    tag);
-        }
-        else if (isEmptyString(alternateTags)) {
+        } else {
            /*
             * Append the value for an unknown language, if
             * we found no language.
@ -3028,66 +2899,17 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
                    UNDEFINED_LANGUAGE,
                    tag);
        }
-        else {
-            parser = new LocaleIDParser(alternateTags);
-
-            String alternateLang = parser.getLanguage();
-
-            /*
-             * Append the value for an unknown language, if
-             * we found no language.
-             */
-            appendTag(
-                    !isEmptyString(alternateLang) ? alternateLang : UNDEFINED_LANGUAGE,
-                            tag);
-        }

        if (!isEmptyString(script)) {
            appendTag(
                    script,
                    tag);
        }
-        else if (!isEmptyString(alternateTags)) {
-            /*
-             * Parse the alternateTags string for the script.
-             */
-            if (parser == null) {
-                parser = new LocaleIDParser(alternateTags);
-            }
-
-            String alternateScript = parser.getScript();
-
-            if (!isEmptyString(alternateScript)) {
-                appendTag(
-                        alternateScript,
-                        tag);
-            }
-        }

        if (!isEmptyString(region)) {
            appendTag(
                    region,
                    tag);
-
-            regionAppended = true;
-        }
-        else if (!isEmptyString(alternateTags)) {
-            /*
-             * Parse the alternateTags string for the region.
-             */
-            if (parser == null) {
-                parser = new LocaleIDParser(alternateTags);
-            }
-
-            String alternateRegion = parser.getCountry();
-
-            if (!isEmptyString(alternateRegion)) {
-                appendTag(
-                        alternateRegion,
-                        tag);
-
-                regionAppended = true;
-            }
        }

        if (trailing != null && trailing.length() > 1) {
@ -3107,7 +2929,7 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
                separators = 1;
            }

-            if (regionAppended) {
+            if (!isEmptyString(region)) {
                /*
                 * If we appended a region, we may need to strip
                 * the extra separator from the variant portion.
@ -3134,21 +2956,6 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
        return tag.toString();
    }

-    /**
-     * Create a tag string from the supplied parameters.  The lang, script and region
-     * parameters may be null references.If the lang parameter is an empty string, the
-     * default value for an unknown language is written to the output buffer.
-     *
-     * @param lang The language tag to use.
-     * @param script The script tag to use.
-     * @param region The region tag to use.
-     * @param trailing Any trailing data to append to the new tag.
-     * @return The new String.
-     **/
-    static String createTagString(String lang, String script, String region, String trailing) {
-        return createTagString(lang, script, region, trailing, null);
-    }
-
    /**
     * Parse the language, script, and region subtags from a tag string, and return the results.
     *
@ -3214,144 +3021,6 @@ public final class ULocale implements Serializable, Comparable<ULocale> {
        }
    }

-    private static String lookupLikelySubtags(String localeId) {
-        UResourceBundle bundle =
-                UResourceBundle.getBundleInstance(
-                        ICUData.ICU_BASE_NAME, "likelySubtags");
-        try {
-            return bundle.getString(localeId);
-        }
-        catch(MissingResourceException e) {
-            return null;
-        }
-    }
-
-    private static String createLikelySubtagsString(String lang, String script, String region,
-            String variants) {
-
-        /**
-         * Try the language with the script and region first.
-         */
-        if (!isEmptyString(script) && !isEmptyString(region)) {
-
-            String searchTag =
-                    createTagString(
-                            lang,
-                            script,
-                            region,
-                            null);
-
-            String likelySubtags = lookupLikelySubtags(searchTag);
-
-            /*
-            if (likelySubtags == null) {
-                if (likelySubtags2 != null) {
-                    System.err.println("Tag mismatch: \"(null)\" \"" + likelySubtags2 + "\"");
-                }
-            }
-            else if (likelySubtags2 == null) {
-                System.err.println("Tag mismatch: \"" + likelySubtags + "\" \"(null)\"");
-            }
-            else if (!likelySubtags.equals(likelySubtags2)) {
-                System.err.println("Tag mismatch: \"" + likelySubtags + "\" \"" + likelySubtags2
-                    + "\"");
-            }
-             */
-            if (likelySubtags != null) {
-                // Always use the language tag from the
-                // maximal string, since it may be more
-                // specific than the one provided.
-                return createTagString(
-                        null,
-                        null,
-                        null,
-                        variants,
-                        likelySubtags);
-            }
-        }
-
-        /**
-         * Try the language with just the script.
-         **/
-        if (!isEmptyString(script)) {
-
-            String searchTag =
-                    createTagString(
-                            lang,
-                            script,
-                            null,
-                            null);
-
-            String likelySubtags = lookupLikelySubtags(searchTag);
-            if (likelySubtags != null) {
-                // Always use the language tag from the
-                // maximal string, since it may be more
-                // specific than the one provided.
-                return createTagString(
-                        null,
-                        null,
-                        region,
-                        variants,
-                        likelySubtags);
-            }
-        }
-
-        /**
-         * Try the language with just the region.
-         **/
-        if (!isEmptyString(region)) {
-
-            String searchTag =
-                    createTagString(
-                            lang,
-                            null,
-                            region,
-                            null);
-
-            String likelySubtags = lookupLikelySubtags(searchTag);
-
-            if (likelySubtags != null) {
-                // Always use the language tag from the
-                // maximal string, since it may be more
-                // specific than the one provided.
-                return createTagString(
-                        null,
-                        script,
-                        null,
-                        variants,
-                        likelySubtags);
-            }
-        }
-
-        /**
-         * Finally, try just the language.
-         **/
-        {
-            String searchTag =
-                    createTagString(
-                            lang,
-                            null,
-                            null,
-                            null);
-
-            String likelySubtags = lookupLikelySubtags(searchTag);
-
-            if (likelySubtags != null) {
-                // Always use the language tag from the
-                // maximal string, since it may be more
-                // specific than the one provided.
-                return createTagString(
-                        null,
-                        script,
-                        region,
-                        variants,
-                        likelySubtags);
-            }
-        }
-
-        return null;
-    }
-
    // --------------------------------
    //      BCP47/OpenJDK APIs
    // --------------------------------
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/data/likelySubtags.txt
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/data/likelySubtags.txt
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleMatcherTest.java
@ -24,7 +24,6 @@ import org.junit.Test;
 import org.junit.runner.RunWith;

 import com.ibm.icu.dev.test.TestFmwk;
-import com.ibm.icu.dev.tool.locale.LikelySubtagsBuilder;
 import com.ibm.icu.impl.locale.XCldrStub.FileUtilities;
 import com.ibm.icu.impl.locale.XLikelySubtags;
 import com.ibm.icu.util.LocaleMatcher;
@ -869,19 +868,12 @@ public class LocaleMatcherTest extends TestFmwk {
        long start = System.nanoTime();
        for (int i = iterations; i > 0; --i) {
            for (ULocale locale : list) {
-                XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale);
+                XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(locale, false);
            }
        }
        return System.nanoTime() - start;
    }

-    @Test
-    public void testLikelySubtagsLoadedDataSameAsBuiltFromScratch() {
-        XLikelySubtags.Data built = LikelySubtagsBuilder.build();
-        XLikelySubtags.Data loaded = XLikelySubtags.Data.load();
-        assertEquals("run LocaleDistanceBuilder and update ICU4C langInfo.txt", built, loaded);
-    }
-
    private static final class TestCase implements Cloneable {
        private static final String ENDL = System.getProperties().getProperty("line.separator");

--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java
@ -16,11 +16,13 @@ import java.io.BufferedReader;
 import java.io.IOException;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
@ -54,7 +56,10 @@ import com.ibm.icu.util.ULocale.Minimize;
 import com.ibm.icu.util.UResourceBundle;
 import com.ibm.icu.util.VersionInfo;

-@RunWith(JUnit4.class)
+import junitparams.JUnitParamsRunner;
+import junitparams.Parameters;
+
+@RunWith(JUnitParamsRunner.class)
 public class ULocaleTest extends TestFmwk {

    // Ticket #8078 and #11674
@ -1947,7 +1952,7 @@ public class ULocaleTest extends TestFmwk {
                    "de__POSIX_1901"
                }, {
                    "und",
-                    ""
+                    "en"
                }
        };

@ -2760,8 +2765,8 @@ public class ULocaleTest extends TestFmwk {
                    "am"
                }, {
                    "und_Ethi_ER",
-                    "am_Ethi_ER",
-                    "am_ER"
+                    "ti_Ethi_ER",
+                    "ti_ER"
                }, {
                    "und_FI",
                    "fi_Latn_FI",
@ -3536,8 +3541,8 @@ public class ULocaleTest extends TestFmwk {
                    "trv"
                }, {
                    "und_Latn_HK",
-                    "zh_Latn_HK",
-                    "zh_Latn_HK"
+                    "en_Latn_HK",
+                    "en_HK"
                }, {
                    "und_Latn_AQ",
                    "_Latn_AQ",
@ -5417,4 +5422,103 @@ public class ULocaleTest extends TestFmwk {
        }

    }
+
+    boolean isKnownSourceFor20777(String s) {
+        return s.equals("und-001") ||
+            s.equals("und-AQ") ||
+            s.equals("und-CC") ||
+            s.equals("und-SL") ||
+            s.equals("und-SS") ||
+            s.equals("und-ZM") ||
+            s.startsWith("und-Latn-");
+    }
+
+    private static final class TestCase implements Cloneable {
+        private static final String ENDL = System.getProperties().getProperty("line.separator");
+
+        int lineNr = 0;
+
+        String source = "";
+        String addLikely = "";
+        String removeFavorScript = "";
+        String removeFavorRegion = "";
+
+        @Override
+        public TestCase clone() throws CloneNotSupportedException {
+            return (TestCase) super.clone();
+        }
+
+        @Override
+        public String toString() {
+            return (new StringBuilder(source))
+                .append(";")
+                .append(addLikely)
+                .append(";")
+                .append(removeFavorScript)
+                .append(";")
+                .append(removeFavorRegion)
+                .toString();
+        }
+    }
+    static List<TestCase> readLikelySubtagsTestCases() throws Exception {
+        List<TestCase> tests = new ArrayList<>();
+        TestCase test = new TestCase();
+        BufferedReader testFile = TestUtil.getDataReader("likelySubtags.txt");
+        try {
+            String line;
+            while ((line = testFile.readLine()) != null) {
+                if (line.startsWith("#")) continue;
+                String [] fields = line.split("[ \t]?;[ \t]?");
+                if (fields.length < 2) continue;
+                test.source = fields[0];
+                test.addLikely = fields[1];
+                test.removeFavorScript = (fields.length < 3) || fields[2].isEmpty() ? test.addLikely : fields[2];
+                test.removeFavorRegion = (fields.length < 4) || fields[3].isEmpty() ? test.removeFavorScript : fields[3];
+                tests.add(test.clone());
+            }
+        } finally {
+            testFile.close();
+        }
+        return tests;
+    }
+
+    @Test
+    @Parameters(method = "readLikelySubtagsTestCases")
+    public void likelySubtagsDataDriven(TestCase test) {
+        ULocale l = ULocale.forLanguageTag(test.source);
+        if (isKnownSourceFor20777(test.source)) {
+            if (test.addLikely.equals(ULocale.addLikelySubtags(l).toLanguageTag())) {
+                logKnownIssue("ICU-20777", "addLikelySubtags(" + test.source + ")");
+            }
+            if (test.removeFavorRegion.equals(ULocale.minimizeSubtags(l).toLanguageTag())) {
+                logKnownIssue("ICU-20777", "minimizeSubtags(" + test.source + ")");
+            }
+            if (test.removeFavorScript.equals(ULocale.minimizeSubtags(
+                l, ULocale.Minimize.FAVOR_SCRIPT).toLanguageTag())) {
+                logKnownIssue("ICU-20777", "minimizeSubtags(" + test.source + ") - FAVOR_SCRIPT");
+            }
+        } else {
+            if (test.addLikely.equals("FAIL")) {
+                assertEquals("addLikelySubtags(" + test.source + ") should be unchanged",
+                    l, ULocale.addLikelySubtags(l));
+            } else {
+                assertEquals("addLikelySubtags(" + test.source + ")",
+                    test.addLikely, ULocale.addLikelySubtags(l).toLanguageTag());
+            }
+            if (test.removeFavorRegion.equals("FAIL")) {
+                assertEquals("minimizeSubtags(" + test.source + ") should be unchanged",
+                    l, ULocale.minimizeSubtags(l));
+            } else {
+                assertEquals("minimizeSubtags(" + test.source + ")",
+                    test.removeFavorRegion, ULocale.minimizeSubtags(l).toLanguageTag());
+            }
+            if (test.removeFavorScript.equals("FAIL")) {
+                assertEquals("minimizeSubtags(" + test.source + ") - FAVOR_SCRIPT should be unchanged",
+                    l, ULocale.minimizeSubtags(l, ULocale.Minimize.FAVOR_SCRIPT));
+            } else {
+                assertEquals("minimizeSubtags(" + test.source + ") - FAVOR_SCRIPT",
+                    test.removeFavorScript, ULocale.minimizeSubtags(l, ULocale.Minimize.FAVOR_SCRIPT).toLanguageTag());
+            }
+        }
+    }
 }
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/tool/locale/LikelySubtagsBuilder.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/tool/locale/LikelySubtagsBuilder.java
@ -1,317 +0,0 @@
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-package com.ibm.icu.dev.tool.locale;
-
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-
-import com.ibm.icu.impl.ICUData;
-import com.ibm.icu.impl.ICUResourceBundle;
-import com.ibm.icu.impl.UResource;
-import com.ibm.icu.impl.locale.LSR;
-import com.ibm.icu.impl.locale.XCldrStub.HashMultimap;
-import com.ibm.icu.impl.locale.XCldrStub.Multimap;
-import com.ibm.icu.impl.locale.XCldrStub.Multimaps;
-import com.ibm.icu.impl.locale.XLikelySubtags;
-import com.ibm.icu.util.BytesTrieBuilder;
-import com.ibm.icu.util.ICUException;
-
-/**
- * Builds data for XLikelySubtags.
- * Reads source data from ICU resource bundles.
- */
-public class LikelySubtagsBuilder {
-    private static final boolean DEBUG_OUTPUT = LSR.DEBUG_OUTPUT;
-
-    private static ICUResourceBundle getSupplementalDataBundle(String name) {
-        return ICUResourceBundle.getBundleInstance(
-            ICUData.ICU_BASE_NAME, name,
-            ICUResourceBundle.ICU_DATA_CLASS_LOADER, ICUResourceBundle.OpenType.DIRECT);
-    }
-
-    private static final class AliasesBuilder {
-        final Map<String, String> toCanonical = new HashMap<>();
-        final Multimap<String, String> toAliases;
-
-        public Set<String> getAliases(String canonical) {
-            Set<String> aliases = toAliases.get(canonical);
-            return aliases == null ? Collections.singleton(canonical) : aliases;
-        }
-
-        public AliasesBuilder(String type) {
-            ICUResourceBundle metadata = getSupplementalDataBundle("metadata");
-            UResource.Value value = metadata.getValueWithFallback("alias/" + type);
-            UResource.Table aliases = value.getTable();
-            UResource.Key key = new UResource.Key();
-            for (int i = 0; aliases.getKeyAndValue(i, key, value); ++i) {
-                String aliasFrom = key.toString();
-                if (aliasFrom.contains("_") || aliasFrom.contains("-")) {
-                    continue; // only simple aliasing
-                }
-                UResource.Table table = value.getTable();
-                if (table.findValue("reason", value) && value.getString().equals("overlong")) {
-                    continue;
-                }
-                if (!table.findValue("replacement", value)) {
-                    continue;
-                }
-                String aliasTo = value.getString();
-                int spacePos = aliasTo.indexOf(' ');
-                String aliasFirst = spacePos < 0 ? aliasTo : aliasTo.substring(0, spacePos);
-                if (aliasFirst.contains("_")) {
-                    continue; // only simple aliasing
-                }
-                toCanonical.put(aliasFrom, aliasFirst);
-            }
-            if (type.equals("language")) {
-                toCanonical.put("mo", "ro"); // special case
-            }
-            toAliases = Multimaps.invertFrom(toCanonical, HashMultimap.<String, String>create());
-
-            if (DEBUG_OUTPUT) {
-                System.out.println("*** " + type + " aliases");
-                for (Map.Entry<String, String> mapping : new TreeMap<>(toCanonical).entrySet()) {
-                    System.out.println(mapping);
-                }
-            }
-        }
-    }
-
-    private static final class TrieBuilder {
-        byte[] bytes = new byte[24];
-        int length = 0;
-        BytesTrieBuilder tb = new BytesTrieBuilder();
-
-        void addValue(int value) {
-            assert value >= 0;
-            tb.add(bytes, length, value);
-        }
-
-        void addStar() {
-            bytes[length++] = '*';
-        }
-
-        void addSubtag(String s) {
-            assert !s.isEmpty();
-            assert !s.equals("*");
-            int end = s.length() - 1;
-            for (int i = 0;; ++i) {
-                char c = s.charAt(i);
-                assert c <= 0x7f;
-                if (i < end) {
-                    bytes[length++] = (byte) c;
-                } else {
-                    // Mark the last character as a terminator to avoid overlap matches.
-                    bytes[length++] = (byte) (c | 0x80);
-                    break;
-                }
-            }
-        }
-
-        byte[] build() {
-            ByteBuffer buffer = tb.buildByteBuffer(BytesTrieBuilder.Option.SMALL);
-            // Allocate an array with just the necessary capacity,
-            // so that we do not hold on to a larger array for a long time.
-            byte[] bytes = new byte[buffer.remaining()];
-            buffer.get(bytes);
-            if (DEBUG_OUTPUT) {
-                System.out.println("likely subtags trie size: " + bytes.length + " bytes");
-            }
-            return bytes;
-        }
-    }
-
-    // VisibleForTesting
-    public static XLikelySubtags.Data build() {
-        AliasesBuilder languageAliasesBuilder = new AliasesBuilder("language");
-        AliasesBuilder regionAliasesBuilder = new AliasesBuilder("territory");
-
-        Map<String, Map<String, Map<String, LSR>>> langTable =
-                makeTable(languageAliasesBuilder, regionAliasesBuilder);
-
-        TrieBuilder trieBuilder = new TrieBuilder();
-        Map<LSR, Integer> lsrIndexes = new LinkedHashMap<>();
-        // Reserve index 0 as "no value":
-        // The runtime lookup returns 0 for an intermediate match with no value.
-        lsrIndexes.put(new LSR("", "", "", LSR.DONT_CARE_FLAGS), 0);  // arbitrary LSR
-        // Reserve index 1 for SKIP_SCRIPT:
-        // The runtime lookup returns 1 for an intermediate match with a value.
-        // This LSR looks good when printing the data.
-        lsrIndexes.put(new LSR("skip", "script", "", LSR.DONT_CARE_FLAGS), 1);
-        // We could prefill the lsrList with common locales to give them small indexes,
-        // and see if that improves performance a little.
-        for (Map.Entry<String, Map<String, Map<String, LSR>>> ls :  langTable.entrySet()) {
-            trieBuilder.length = 0;
-            String lang = ls.getKey();
-            if (lang.equals("und")) {
-                trieBuilder.addStar();
-            } else {
-                trieBuilder.addSubtag(lang);
-            }
-            Map<String, Map<String, LSR>> scriptTable = ls.getValue();
-            boolean skipScript = false;
-            if (scriptTable.size() == 1) {
-                Map<String, LSR> regionTable = scriptTable.get("");
-                if (regionTable.size() == 1) {
-                    // Prune the script and region levels from language with
-                    // only * for scripts and regions.
-                    int i = uniqueIdForLsr(lsrIndexes, regionTable.get(""));
-                    trieBuilder.addValue(i);
-                    continue;
-                } else {
-                    // Prune the script level from language with only * for scripts
-                    // but with real regions.
-                    // Set an intermediate value as a signal to the lookup code.
-                    trieBuilder.addValue(XLikelySubtags.SKIP_SCRIPT);
-                    skipScript = true;
-                }
-            }
-            int scriptStartLength = trieBuilder.length;
-            for (Map.Entry<String, Map<String, LSR>> sr :  scriptTable.entrySet()) {
-                trieBuilder.length = scriptStartLength;
-                if (!skipScript) {
-                    String script = sr.getKey();
-                    if (script.isEmpty()) {
-                        trieBuilder.addStar();
-                    } else {
-                        trieBuilder.addSubtag(script);
-                    }
-                }
-                Map<String, LSR> regionTable = sr.getValue();
-                if (regionTable.size() == 1) {
-                    // Prune the region level from language+script with only * for regions.
-                    int i = uniqueIdForLsr(lsrIndexes, regionTable.get(""));
-                    trieBuilder.addValue(i);
-                    continue;
-                }
-                int regionStartLength = trieBuilder.length;
-                for (Map.Entry<String, LSR> r2lsr :  regionTable.entrySet()) {
-                    trieBuilder.length = regionStartLength;
-                    String region = r2lsr.getKey();
-                    // Map the whole lang+script+region to a unique, dense index of the LSR.
-                    if (region.isEmpty()) {
-                        trieBuilder.addStar();
-                    } else {
-                        trieBuilder.addSubtag(region);
-                    }
-                    int i = uniqueIdForLsr(lsrIndexes, r2lsr.getValue());
-                    trieBuilder.addValue(i);
-                }
-            }
-        }
-        byte[] trie = trieBuilder.build();
-        LSR[] lsrs = lsrIndexes.keySet().toArray(new LSR[lsrIndexes.size()]);
-        return new XLikelySubtags.Data(
-                languageAliasesBuilder.toCanonical, regionAliasesBuilder.toCanonical, trie, lsrs);
-    }
-
-    private static int uniqueIdForLsr(Map<LSR, Integer> lsrIndexes, LSR lsr) {
-        Integer index = lsrIndexes.get(lsr);
-        if (index != null) {
-            return index.intValue();
-        } else {
-            int i = lsrIndexes.size();
-            lsrIndexes.put(lsr, i);
-            return i;
-        }
-    }
-
-    private static Map<String, Map<String, Map<String, LSR>>> makeTable(
-            AliasesBuilder languageAliasesBuilder, AliasesBuilder regionAliasesBuilder) {
-        Map<String, Map<String, Map<String, LSR>>> result = new TreeMap<>();
-        // set the base data
-        ICUResourceBundle likelySubtags = getSupplementalDataBundle("likelySubtags");
-        UResource.Value value = likelySubtags.getValueWithFallback("");
-        UResource.Table table = value.getTable();
-        UResource.Key key = new UResource.Key();
-        for (int i = 0; table.getKeyAndValue(i, key, value); ++i) {
-            LSR ltp = lsrFromLocaleID(key.toString());  // source
-            final String language = ltp.language;
-            final String script = ltp.script;
-            final String region = ltp.region;
-
-            ltp = lsrFromLocaleID(value.getString());  // target
-            set(result, language, script, region, ltp);
-
-            // now add aliases
-            Collection<String> languageAliases = languageAliasesBuilder.getAliases(language);
-            Collection<String> regionAliases = regionAliasesBuilder.getAliases(region);
-            for (String languageAlias : languageAliases) {
-                for (String regionAlias : regionAliases) {
-                    if (languageAlias.equals(language) && regionAlias.equals(region)) {
-                        continue;
-                    }
-                    set(result, languageAlias, script, regionAlias, ltp);
-                }
-            }
-        }
-        // hack
-        set(result, "und", "Latn", "", new LSR("en", "Latn", "US", LSR.DONT_CARE_FLAGS));
-
-        // hack, ensure that if und-YY => und-Xxxx-YY, then we add Xxxx=>YY to the table
-        // <likelySubtag from="und_GH" to="ak_Latn_GH"/>
-
-        // so und-Latn-GH   =>  ak-Latn-GH
-        Map<String, Map<String, LSR>> undScriptMap = result.get("und");
-        Map<String, LSR> undEmptyRegionMap = undScriptMap.get("");
-        for (Map.Entry<String, LSR> regionEntry : undEmptyRegionMap.entrySet()) {
-            final LSR lsr = regionEntry.getValue();
-            set(result, "und", lsr.script, lsr.region, lsr);
-        }
-        //
-        // check that every level has "" (or "und")
-        if (!result.containsKey("und")) {
-            throw new IllegalArgumentException("failure: base");
-        }
-        for (Map.Entry<String, Map<String, Map<String, LSR>>> langEntry : result.entrySet()) {
-            String lang = langEntry.getKey();
-            final Map<String, Map<String, LSR>> scriptMap = langEntry.getValue();
-            if (!scriptMap.containsKey("")) {
-                throw new IllegalArgumentException("failure: " + lang);
-            }
-            for (Map.Entry<String, Map<String, LSR>> scriptEntry : scriptMap.entrySet()) {
-                String script = scriptEntry.getKey();
-                final Map<String, LSR> regionMap = scriptEntry.getValue();
-                if (!regionMap.containsKey("")) {
-                    throw new IllegalArgumentException("failure: " + lang + "-" + script);
-                }
-            }
-        }
-        return result;
-    }
-
-    // Parses locale IDs in the likelySubtags data, not arbitrary language tags.
-    private static LSR lsrFromLocaleID(String languageIdentifier) {
-        String[] parts = languageIdentifier.split("[-_]");
-        if (parts.length < 1 || parts.length > 3) {
-            throw new ICUException("too many subtags");
-        }
-        String lang = parts[0];
-        String p2 = parts.length < 2 ? "" : parts[1];
-        String p3 = parts.length < 3 ? "" : parts[2];
-        return p2.length() < 4 ?
-                new LSR(lang, "", p2, LSR.DONT_CARE_FLAGS) :
-                new LSR(lang, p2, p3, LSR.DONT_CARE_FLAGS);
-    }
-
-    private static void set(Map<String, Map<String, Map<String, LSR>>> langTable,
-            final String language, final String script, final String region, LSR newValue) {
-        Map<String, Map<String, LSR>> scriptTable = getSubtable(langTable, language);
-        Map<String, LSR> regionTable = getSubtable(scriptTable, script);
-        regionTable.put(region, newValue);
-    }
-
-    private static <K, V, T> Map<V, T> getSubtable(Map<K, Map<V, T>> table, final K subtag) {
-        Map<V, T> subTable = table.get(subtag);
-        if (subTable == null) {
-            table.put(subtag, subTable = new TreeMap<>());
-        }
-        return subTable;
-    }
-}
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/tool/locale/LocaleDistanceBuilder.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/tool/locale/LocaleDistanceBuilder.java
@ -489,7 +489,7 @@ public final class LocaleDistanceBuilder {
        Set<LSR> paradigmLSRs = new LinkedHashSet<>();  // could be TreeSet if LSR were Comparable
        for (String paradigm : paradigms) {
            ULocale pl = new ULocale(paradigm);
-            LSR max = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(pl);
+            LSR max = XLikelySubtags.INSTANCE.makeMaximizedLsrFrom(pl, false);
            // Clear the LSR flags to make the data equality test in
            // LocaleDistanceTest happy.
            paradigmLSRs.add(new LSR(max.language, max.script, max.region, LSR.DONT_CARE_FLAGS));
@ -887,7 +887,7 @@ public final class LocaleDistanceBuilder {
    }

    public static final void main(String[] args) throws IOException {
-        XLikelySubtags.Data likelyData = LikelySubtagsBuilder.build();
+        XLikelySubtags.Data likelyData = XLikelySubtags.Data.load();
        LocaleDistance.Data distanceData = build();
        System.out.println("Writing LocaleDistance.Data to " + TXT_PATH + '/' + TXT_FILE_NAME);
        try (PrintWriter out = openWriter()) {