diff --git a/icu4c/source/common/brkiter.cpp b/icu4c/source/common/brkiter.cpp index 3d1366a6688..b9b6ca65cd4 100644 --- a/icu4c/source/common/brkiter.cpp +++ b/icu4c/source/common/brkiter.cpp @@ -38,6 +38,7 @@ #include "uresimp.h" #include "uassert.h" #include "ubrkimpl.h" +#include "utracimp.h" #include "charstr.h" // ***************************************************************************** @@ -412,14 +413,23 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) BreakIterator *result = NULL; switch (kind) { case UBRK_CHARACTER: - result = BreakIterator::buildInstance(loc, "grapheme", status); + { + UTRACE_ENTRY(UTRACE_UBRK_CREATE_CHARACTER); + result = BreakIterator::buildInstance(loc, "grapheme", status); + UTRACE_EXIT_STATUS(status); + } break; case UBRK_WORD: - result = BreakIterator::buildInstance(loc, "word", status); + { + UTRACE_ENTRY(UTRACE_UBRK_CREATE_WORD); + result = BreakIterator::buildInstance(loc, "word", status); + UTRACE_EXIT_STATUS(status); + } break; case UBRK_LINE: - uprv_strcpy(lbType, "line"); { + UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE); + uprv_strcpy(lbType, "line"); char lbKeyValue[kKeyValueLenMax] = {0}; UErrorCode kvStatus = U_ZERO_ERROR; int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus); @@ -427,13 +437,17 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) uprv_strcat(lbType, "_"); uprv_strcat(lbType, lbKeyValue); } + result = BreakIterator::buildInstance(loc, lbType, status); + + UTRACE_DATA1(UTRACE_INFO, "lb=%s", lbKeyValue); + UTRACE_EXIT_STATUS(status); } - result = BreakIterator::buildInstance(loc, lbType, status); break; case UBRK_SENTENCE: - result = BreakIterator::buildInstance(loc, "sentence", status); -#if !UCONFIG_NO_FILTERED_BREAK_ITERATION { + UTRACE_ENTRY(UTRACE_UBRK_CREATE_SENTENCE); + result = BreakIterator::buildInstance(loc, "sentence", status); +#if !UCONFIG_NO_FILTERED_BREAK_ITERATION char ssKeyValue[kKeyValueLenMax] = {0}; UErrorCode kvStatus = U_ZERO_ERROR; int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus); @@ -444,11 +458,16 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) delete fbiBuilder; } } - } #endif + UTRACE_EXIT_STATUS(status); + } break; case UBRK_TITLE: - result = BreakIterator::buildInstance(loc, "title", status); + { + UTRACE_ENTRY(UTRACE_UBRK_CREATE_TITLE); + result = BreakIterator::buildInstance(loc, "title", status); + UTRACE_EXIT_STATUS(status); + } break; default: status = U_ILLEGAL_ARGUMENT_ERROR; diff --git a/icu4c/source/common/dictbe.cpp b/icu4c/source/common/dictbe.cpp index c769138ae4b..b42cdf03fae 100644 --- a/icu4c/source/common/dictbe.cpp +++ b/icu4c/source/common/dictbe.cpp @@ -18,6 +18,7 @@ #include "unicode/uniset.h" #include "unicode/chariter.h" #include "unicode/ubrk.h" +#include "utracimp.h" #include "uvectr32.h" #include "uvector.h" #include "uassert.h" @@ -194,6 +195,8 @@ ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode : DictionaryBreakEngine(), fDictionary(adoptDictionary) { + UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE); + UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Thai"); fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status); if (U_SUCCESS(status)) { setCharacters(fThaiWordSet); @@ -213,6 +216,7 @@ ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode fEndWordSet.compact(); fBeginWordSet.compact(); fSuffixSet.compact(); + UTRACE_EXIT_STATUS(status); } ThaiBreakEngine::~ThaiBreakEngine() { @@ -436,6 +440,8 @@ LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &s : DictionaryBreakEngine(), fDictionary(adoptDictionary) { + UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE); + UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Laoo"); fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status); if (U_SUCCESS(status)) { setCharacters(fLaoWordSet); @@ -452,6 +458,7 @@ LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &s fMarkSet.compact(); fEndWordSet.compact(); fBeginWordSet.compact(); + UTRACE_EXIT_STATUS(status); } LaoBreakEngine::~LaoBreakEngine() { @@ -632,6 +639,8 @@ BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErro : DictionaryBreakEngine(), fDictionary(adoptDictionary) { + UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE); + UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Mymr"); fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status); if (U_SUCCESS(status)) { setCharacters(fBurmeseWordSet); @@ -645,6 +654,7 @@ BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErro fMarkSet.compact(); fEndWordSet.compact(); fBeginWordSet.compact(); + UTRACE_EXIT_STATUS(status); } BurmeseBreakEngine::~BurmeseBreakEngine() { @@ -825,6 +835,8 @@ KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod : DictionaryBreakEngine(), fDictionary(adoptDictionary) { + UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE); + UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr"); fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status); if (U_SUCCESS(status)) { setCharacters(fKhmerWordSet); @@ -850,6 +862,7 @@ KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod fEndWordSet.compact(); fBeginWordSet.compact(); // fSuffixSet.compact(); + UTRACE_EXIT_STATUS(status); } KhmerBreakEngine::~KhmerBreakEngine() { @@ -1045,6 +1058,8 @@ foundBest: static const uint32_t kuint32max = 0xFFFFFFFF; CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status) : DictionaryBreakEngine(), fDictionary(adoptDictionary) { + UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE); + UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Hani"); // Korean dictionary only includes Hangul syllables fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status); fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status); @@ -1066,6 +1081,7 @@ CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType setCharacters(cjSet); } } + UTRACE_EXIT_STATUS(status); } CjkBreakEngine::~CjkBreakEngine(){ diff --git a/icu4c/source/common/rbbi.cpp b/icu4c/source/common/rbbi.cpp index f80c3e0c624..43ba58ba9e6 100644 --- a/icu4c/source/common/rbbi.cpp +++ b/icu4c/source/common/rbbi.cpp @@ -1117,7 +1117,7 @@ static icu::UInitOnce gRBBIInitOnce = U_INITONCE_INITIALIZER; * Release all static memory held by breakiterator. */ U_CDECL_BEGIN -static UBool U_CALLCONV rbbi_cleanup(void) { +UBool U_CALLCONV rbbi_cleanup(void) { delete gLanguageBreakFactories; gLanguageBreakFactories = nullptr; delete gEmptyString; diff --git a/icu4c/source/common/rbbidata.h b/icu4c/source/common/rbbidata.h index b7de6ce073c..7b9b8d82526 100644 --- a/icu4c/source/common/rbbidata.h +++ b/icu4c/source/common/rbbidata.h @@ -192,6 +192,8 @@ private: U_NAMESPACE_END +U_CFUNC UBool rbbi_cleanup(void); + #endif /* C++ */ #endif diff --git a/icu4c/source/common/unicode/utrace.h b/icu4c/source/common/unicode/utrace.h index 5afcd9f4490..5b4a0497bf1 100644 --- a/icu4c/source/common/unicode/utrace.h +++ b/icu4c/source/common/unicode/utrace.h @@ -177,6 +177,71 @@ typedef enum UTraceFunctionNumber { UTRACE_RES_DATA_LIMIT, #endif // U_HIDE_INTERNAL_API +#ifndef U_HIDE_DRAFT_API + /** + * The lowest break iterator location. + * @draft ICU 67 + */ + UTRACE_UBRK_START=0x4000, + + /** + * Indicates that a character instance of break iterator was created. + * + * @draft ICU 67 + */ + UTRACE_UBRK_CREATE_CHARACTER = UTRACE_UBRK_START, + + /** + * Indicates that a word instance of break iterator was created. + * + * @draft ICU 67 + */ + UTRACE_UBRK_CREATE_WORD, + + /** + * Indicates that a line instance of break iterator was created. + * + * Provides one C-style string to UTraceData: the lb value ("", + * "loose", "strict", or "normal"). + * + * @draft ICU 67 + */ + UTRACE_UBRK_CREATE_LINE, + + /** + * Indicates that a sentence instance of break iterator was created. + * + * @draft ICU 67 + */ + UTRACE_UBRK_CREATE_SENTENCE, + + /** + * Indicates that a title instance of break iterator was created. + * + * @draft ICU 67 + */ + UTRACE_UBRK_CREATE_TITLE, + + /** + * Indicates that an internal dictionary break engine was created. + * + * Provides one C-style string to UTraceData: the script code of what + * the break engine cover ("Hani", "Khmr", "Laoo", "Mymr", or "Thai"). + * + * @draft ICU 67 + */ + UTRACE_UBRK_CREATE_BREAK_ENGINE, + +#endif // U_HIDE_DRAFT_API + +#ifndef U_HIDE_INTERNAL_API + /** + * One more than the highest normal break iterator trace location. + * @internal The numeric value may change over time, see ICU ticket #12420. + */ + UTRACE_UBRK_LIMIT, +#endif // U_HIDE_INTERNAL_API + } UTraceFunctionNumber; /** diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp index a5b8f13e4e8..0ca89f6d64f 100644 --- a/icu4c/source/test/intltest/rbbitst.cpp +++ b/icu4c/source/test/intltest/rbbitst.cpp @@ -14,6 +14,7 @@ #include "unicode/utypes.h" #if !UCONFIG_NO_BREAK_ITERATION +#include #include #include #include @@ -35,6 +36,7 @@ #include "unicode/uscript.h" #include "unicode/ustring.h" #include "unicode/utext.h" +#include "unicode/utrace.h" #include "charstr.h" #include "cmemory.h" @@ -126,6 +128,19 @@ void RBBITest::runIndexedTest( int32_t index, UBool exec, const char* &name, cha TESTCASE_AUTO(TestReverse); TESTCASE_AUTO(TestBug13692); TESTCASE_AUTO(TestDebugRules); + +#if U_ENABLE_TRACING + TESTCASE_AUTO(TestTraceCreateCharacter); + TESTCASE_AUTO(TestTraceCreateWord); + TESTCASE_AUTO(TestTraceCreateSentence); + TESTCASE_AUTO(TestTraceCreateTitle); + TESTCASE_AUTO(TestTraceCreateLine); + TESTCASE_AUTO(TestTraceCreateLineNormal); + TESTCASE_AUTO(TestTraceCreateLineLoose); + TESTCASE_AUTO(TestTraceCreateLineStrict); + TESTCASE_AUTO(TestTraceCreateBreakEngine); +#endif + TESTCASE_AUTO_END; } @@ -4865,6 +4880,182 @@ void RBBITest::TestDebugRules() { #endif } +#if U_ENABLE_TRACING +static std::vector gData; +static std::vector gEntryFn; +static std::vector gExitFn; +static std::vector gDataFn; +static void U_CALLCONV traceData( + const void*, + int32_t fnNumber, + int32_t, + const char *, + va_list args) { + if (UTRACE_UBRK_START <= fnNumber && fnNumber <= UTRACE_UBRK_LIMIT) { + const char* data = va_arg(args, const char*); + gDataFn.push_back(fnNumber); + gData.push_back(data); + } +} + +static void traceEntry(const void *, int32_t fnNumber) { + if (UTRACE_UBRK_START <= fnNumber && fnNumber <= UTRACE_UBRK_LIMIT) { + gEntryFn.push_back(fnNumber); + } +} + +static void traceExit(const void *, int32_t fnNumber, const char *, va_list) { + if (UTRACE_UBRK_START <= fnNumber && fnNumber <= UTRACE_UBRK_LIMIT) { + gExitFn.push_back(fnNumber); + } +} + + +void RBBITest::assertTestTraceResult(int32_t fnNumber, const char* expectedData) { + assertEquals("utrace_entry should be called ", 1, gEntryFn.size()); + assertEquals("utrace_entry should be called with ", fnNumber, gEntryFn[0]); + assertEquals("utrace_exit should be called ", 1, gExitFn.size()); + assertEquals("utrace_exit should be called with ", fnNumber, gExitFn[0]); + + if (expectedData == nullptr) { + assertEquals("utrace_data should not be called ", 0, gDataFn.size()); + assertEquals("utrace_data should not be called ", 0, gData.size()); + } else { + assertEquals("utrace_data should be called ", 1, gDataFn.size()); + assertEquals("utrace_data should be called with ", fnNumber, gDataFn[0]); + assertEquals("utrace_data should be called ", 1, gData.size()); + assertEquals("utrace_data should pass in ", expectedData, gData[0].c_str()); + } +} + +void SetupTestTrace() { + gEntryFn.clear(); + gExitFn.clear(); + gDataFn.clear(); + gData.clear(); + + const void* context = nullptr; + utrace_setFunctions(context, traceEntry, traceExit, traceData); + utrace_setLevel(UTRACE_INFO); +} + +void RBBITest::TestTraceCreateCharacter(void) { + SetupTestTrace(); + IcuTestErrorCode status(*this, "TestTraceCreateCharacter"); + LocalPointer brkitr( + BreakIterator::createCharacterInstance("zh-CN", status)); + status.errIfFailureAndReset(); + assertTestTraceResult(UTRACE_UBRK_CREATE_CHARACTER, nullptr); +} + +void RBBITest::TestTraceCreateTitle(void) { + SetupTestTrace(); + IcuTestErrorCode status(*this, "TestTraceCreateTitle"); + LocalPointer brkitr( + BreakIterator::createTitleInstance("zh-CN", status)); + status.errIfFailureAndReset(); + assertTestTraceResult(UTRACE_UBRK_CREATE_TITLE, nullptr); +} + +void RBBITest::TestTraceCreateSentence(void) { + SetupTestTrace(); + IcuTestErrorCode status(*this, "TestTraceCreateSentence"); + LocalPointer brkitr( + BreakIterator::createSentenceInstance("zh-CN", status)); + status.errIfFailureAndReset(); + assertTestTraceResult(UTRACE_UBRK_CREATE_SENTENCE, nullptr); +} + +void RBBITest::TestTraceCreateWord(void) { + SetupTestTrace(); + IcuTestErrorCode status(*this, "TestTraceCreateWord"); + LocalPointer brkitr( + BreakIterator::createWordInstance("zh-CN", status)); + status.errIfFailureAndReset(); + assertTestTraceResult(UTRACE_UBRK_CREATE_WORD, nullptr); +} + +void RBBITest::TestTraceCreateLine(void) { + SetupTestTrace(); + IcuTestErrorCode status(*this, "TestTraceCreateLine"); + LocalPointer brkitr( + BreakIterator::createLineInstance("zh-CN", status)); + status.errIfFailureAndReset(); + assertTestTraceResult(UTRACE_UBRK_CREATE_LINE, ""); +} + +void RBBITest::TestTraceCreateLineStrict(void) { + SetupTestTrace(); + IcuTestErrorCode status(*this, "TestTraceCreateLineStrict"); + LocalPointer brkitr( + BreakIterator::createLineInstance("zh-CN-u-lb-strict", status)); + status.errIfFailureAndReset(); + assertTestTraceResult(UTRACE_UBRK_CREATE_LINE, "strict"); +} + +void RBBITest::TestTraceCreateLineNormal(void) { + SetupTestTrace(); + IcuTestErrorCode status(*this, "TestTraceCreateLineNormal"); + LocalPointer brkitr( + BreakIterator::createLineInstance("zh-CN-u-lb-normal", status)); + status.errIfFailureAndReset(); + assertTestTraceResult(UTRACE_UBRK_CREATE_LINE, "normal"); +} + +void RBBITest::TestTraceCreateLineLoose(void) { + SetupTestTrace(); + IcuTestErrorCode status(*this, "TestTraceCreateLineLoose"); + LocalPointer brkitr( + BreakIterator::createLineInstance("zh-CN-u-lb-loose", status)); + status.errIfFailureAndReset(); + assertTestTraceResult(UTRACE_UBRK_CREATE_LINE, "loose"); +} + +void RBBITest::TestTraceCreateBreakEngine(void) { + rbbi_cleanup(); + SetupTestTrace(); + IcuTestErrorCode status(*this, "TestTraceCreateBreakEngine"); + LocalPointer brkitr( + BreakIterator::createWordInstance("zh-CN", status)); + status.errIfFailureAndReset(); + assertTestTraceResult(UTRACE_UBRK_CREATE_WORD, nullptr); + + // To word break the following text, BreakIterator will create 5 dictionary + // break engine internally. + brkitr->setText( + u"test " + u"測試 " // Hani + u"សាកល្បង " // Khmr + u"ທົດສອບ " // Laoo + u"စမ်းသပ်မှု " // Mymr + u"ทดสอบ " // Thai + u"test " + ); + + // Loop through all the text. + while (brkitr->next() > 0) ; + + assertEquals("utrace_entry should be called ", 6, gEntryFn.size()); + assertEquals("utrace_exit should be called ", 6, gExitFn.size()); + assertEquals("utrace_data should be called ", 5, gDataFn.size()); + + for (std::vector::size_type i = 0; i < gDataFn.size(); i++) { + assertEquals("utrace_entry should be called ", + UTRACE_UBRK_CREATE_BREAK_ENGINE, gEntryFn[i+1]); + assertEquals("utrace_exit should be called ", + UTRACE_UBRK_CREATE_BREAK_ENGINE, gExitFn[i+1]); + assertEquals("utrace_data should be called ", + UTRACE_UBRK_CREATE_BREAK_ENGINE, gDataFn[i]); + } + + assertEquals("utrace_data should pass ", "Hani", gData[0].c_str()); + assertEquals("utrace_data should pass ", "Khmr", gData[1].c_str()); + assertEquals("utrace_data should pass ", "Laoo", gData[2].c_str()); + assertEquals("utrace_data should pass ", "Mymr", gData[3].c_str()); + assertEquals("utrace_data should pass ", "Thai", gData[4].c_str()); + +} +#endif #endif // #if !UCONFIG_NO_BREAK_ITERATION diff --git a/icu4c/source/test/intltest/rbbitst.h b/icu4c/source/test/intltest/rbbitst.h index 96c2882c5af..8f667e5e74d 100644 --- a/icu4c/source/test/intltest/rbbitst.h +++ b/icu4c/source/test/intltest/rbbitst.h @@ -87,6 +87,18 @@ public: void TestDebug(); void TestProperties(); +#if U_ENABLE_TRACING + void TestTraceCreateCharacter(); + void TestTraceCreateWord(); + void TestTraceCreateSentence(); + void TestTraceCreateTitle(); + void TestTraceCreateLine(); + void TestTraceCreateLineNormal(); + void TestTraceCreateLineStrict(); + void TestTraceCreateLineLoose(); + void TestTraceCreateBreakEngine(); +#endif + /***********************/ private: /** @@ -120,6 +132,11 @@ private: // Test parameters, from the test framework and test invocation. const char* fTestParams; + +#if U_ENABLE_TRACING + void assertTestTraceResult(int32_t fnNumber, const char* expectedData); +#endif + }; #endif /* #if !UCONFIG_NO_BREAK_ITERATION */