diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp index b997167b311..e43ee6597d8 100644 --- a/icu4c/source/i18n/measunit_extra.cpp +++ b/icu4c/source/i18n/measunit_extra.cpp @@ -321,6 +321,14 @@ private: class Parser { public: + /** + * Factory function for parsing the given identifier. + * + * @param source The identifier to parse. This function does not make a copy + * of source: the underlying string that source points at, must outlive the + * parser. + * @param status ICU error code. + */ static Parser from(StringPiece source, UErrorCode& status) { if (U_FAILURE(status)) { return Parser(); @@ -340,6 +348,10 @@ public: private: int32_t fIndex = 0; + + // Since we're not owning this memory, whatever is passed to the constructor + // should live longer than this Parser - and the parser shouldn't return any + // references to that string. StringPiece fSource; UCharsTrie fTrie; @@ -399,7 +411,6 @@ private: // 1 = power token seen (will not accept another power token) // 2 = SI prefix token seen (will not accept a power or SI prefix token) int32_t state = 0; - int32_t previ = fIndex; // Maybe read a compound part if (fIndex != 0) { @@ -432,7 +443,6 @@ private: fAfterPer = false; break; } - previ = fIndex; } // Read a unit @@ -449,7 +459,6 @@ private: return; } result.dimensionality *= token.getPower(); - previ = fIndex; state = 1; break; @@ -459,7 +468,6 @@ private: return; } result.siPrefix = token.getSIPrefix(); - previ = fIndex; state = 2; break; @@ -469,7 +477,6 @@ private: case Token::TYPE_SIMPLE_UNIT: result.index = token.getSimpleUnitIndex(); - result.identifier = fSource.substr(previ, fIndex - previ); return; default: @@ -576,7 +583,7 @@ void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& o return; } - output.append(singleUnit.identifier, status); + output.appendInvariantChars(gSimpleUnits[singleUnit.index], status); } /** diff --git a/icu4c/source/i18n/measunit_impl.h b/icu4c/source/i18n/measunit_impl.h index cf0ea63d2af..9657ff2c1d9 100644 --- a/icu4c/source/i18n/measunit_impl.h +++ b/icu4c/source/i18n/measunit_impl.h @@ -69,9 +69,6 @@ struct SingleUnitImpl : public UMemory { /** Simple unit index, unique for every simple unit. */ int32_t index = 0; - /** Simple unit identifier; memory not owned by the SimpleUnit. */ - StringPiece identifier; - /** SI prefix. **/ UMeasureSIPrefix siPrefix = UMEASURE_SI_PREFIX_ONE; diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp index 9306a5660bc..79eb9b461a0 100644 --- a/icu4c/source/test/intltest/measfmttest.cpp +++ b/icu4c/source/test/intltest/measfmttest.cpp @@ -82,6 +82,7 @@ private: void TestInvalidIdentifiers(); void TestCompoundUnitOperations(); void TestIdentifiers(); + void Test21060_AddressSanitizerProblem(); void verifyFormat( const char *description, @@ -204,6 +205,7 @@ void MeasureFormatTest::runIndexedTest( TESTCASE_AUTO(TestInvalidIdentifiers); TESTCASE_AUTO(TestCompoundUnitOperations); TESTCASE_AUTO(TestIdentifiers); + TESTCASE_AUTO(Test21060_AddressSanitizerProblem); TESTCASE_AUTO_END; } @@ -3445,6 +3447,36 @@ void MeasureFormatTest::TestIdentifiers() { } } +// ICU-21060 +void MeasureFormatTest::Test21060_AddressSanitizerProblem() { + UErrorCode status = U_ZERO_ERROR; + MeasureUnit first = MeasureUnit::forIdentifier("one", status); + + // Experimentally, a compound unit like "kilogram-meter" failed. A single + // unit like "kilogram" or "meter" did not fail, did not trigger the + // problem. + MeasureUnit crux = MeasureUnit::forIdentifier("one-per-meter", status); + + // Heap allocation of a new CharString for first.identifier happens here: + first = first.product(crux, status); + + // Constructing second from first's identifier resulted in a failure later, + // as second held a reference to a substring of first's identifier: + MeasureUnit second = MeasureUnit::forIdentifier(first.getIdentifier(), status); + + // Heap is freed here, as an old first.identifier CharString is deallocated + // and a new CharString is allocated: + first = first.product(crux, status); + + // Proving we've had no failure yet: + if (U_FAILURE(status)) return; + + // heap-use-after-free failure happened here, since a SingleUnitImpl had + // held onto a StringPiece pointing at a substring of an identifier that was + // freed above: + second = second.product(crux, status); +} + void MeasureFormatTest::verifyFieldPosition( const char *description,