diff --git a/icu4c/source/common/rbbi.cpp b/icu4c/source/common/rbbi.cpp index 2615a4b32b5..2f93f6e85ab 100644 --- a/icu4c/source/common/rbbi.cpp +++ b/icu4c/source/common/rbbi.cpp @@ -1,6 +1,6 @@ /* *************************************************************************** -* Copyright (C) 1999-2010 International Business Machines Corporation +* Copyright (C) 1999-2011 International Business Machines Corporation * and others. All rights reserved. *************************************************************************** */ @@ -86,6 +86,32 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(const RBBIDataHeader* data, enum } } + +// +// Construct from precompiled binary rules (tables). This constructor is public API, +// taking the rules as a (const uint8_t *) to match the type produced by getBinaryRules(). +// +RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules, + uint32_t ruleLength, + UErrorCode &status) { + init(); + if (U_FAILURE(status)) { + return; + } + const RBBIDataHeader *data = (const RBBIDataHeader *)compiledRules; + if (data->fLength != ruleLength) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + fData = new RBBIDataWrapper(data, RBBIDataWrapper::kDontAdopt, status); + if (U_FAILURE(status)) {return;} + if(fData == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } +} + + //------------------------------------------------------------------------------- // // Constructor from a UDataMemory handle to precompiled break rules diff --git a/icu4c/source/common/unicode/rbbi.h b/icu4c/source/common/unicode/rbbi.h index 90ec6e6bd6a..f93b57766e9 100644 --- a/icu4c/source/common/unicode/rbbi.h +++ b/icu4c/source/common/unicode/rbbi.h @@ -1,6 +1,6 @@ /* *************************************************************************** -* Copyright (C) 1999-2008 International Business Machines Corporation * +* Copyright (C) 1999-2011 International Business Machines Corporation * * and others. All rights reserved. * *************************************************************************** @@ -240,6 +240,36 @@ public: UErrorCode &status); + + + /** + * Contruct a RuleBasedBreakIterator from a set of precompiled binary rules. + * Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules(). + * Construction of a break iterator in this way is substantially faster than + * constuction from source rules. + * + * Ownership of the storage containing the compiled rules remains with the + * caller of this function. The compiled rules must not be modified or + * deleted during the life of the break iterator. + * + * The compiled rules are not compatible across different major versions of ICU. + * The compiled rules are comaptible only between machines with the same + * byte ordering (little or big endian) and the same base character set family + * (ASCII or EBCDIC). + * + * @see #getBinaryRules + * @param compiledRules A pointer to the compiled break rules to be used. + * @param ruleLength The length of the compiled break rules, in bytes. This + * corresponds to the length value produced by getBinaryRules(). + * @param status Information on any errors encountered, including invalid + * binary rules. + * @draft ICU 4.8 + */ + RuleBasedBreakIterator(const uint8_t *compiledRules, + uint32_t ruleLength, + UErrorCode &status); + + /** * This constructor uses the udata interface to create a BreakIterator * whose internal tables live in a memory-mapped file. "image" is an @@ -599,7 +629,7 @@ public: * @return A pointer to the binary (compiled) rule data. The storage * belongs to the RulesBasedBreakIterator object, not the * caller, and must not be modified or deleted. - * @internal + * @draft ICU 4.8 */ virtual const uint8_t *getBinaryRules(uint32_t &length); diff --git a/icu4c/source/test/intltest/rbbiapts.cpp b/icu4c/source/test/intltest/rbbiapts.cpp index aa52454bd2c..327b0798548 100644 --- a/icu4c/source/test/intltest/rbbiapts.cpp +++ b/icu4c/source/test/intltest/rbbiapts.cpp @@ -1,5 +1,5 @@ /******************************************************************** - * Copyright (c) 1999-2010, International Business Machines + * Copyright (c) 1999-2011, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************** * Date Name Description @@ -19,6 +19,7 @@ #include "rbbidata.h" #include "cstring.h" #include "ubrkimpl.h" +#include "unicode/locid.h" #include "unicode/ustring.h" #include "unicode/utext.h" #include "cmemory.h" @@ -31,8 +32,8 @@ #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\ errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}} -#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ -errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}} +#define TEST_ASSERT(expr) {if ((expr) == FALSE) { \ + errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};} void RBBIAPITest::TestCloneEquals() { @@ -1090,6 +1091,32 @@ void RBBIAPITest::TestCreateFromRBBIData() { errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) ); } } + + // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...) + // + status = U_ZERO_ERROR; + RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status); + TEST_ASSERT_SUCCESS(status); + uint32_t length; + const uint8_t *rules = rb->getBinaryRules(length); + RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length, status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(*rb == *rb2); + UnicodeString words = "one two three "; + rb2->setText(words); + int wordCounter = 0; + while (rb2->next() != UBRK_DONE) { + wordCounter++; + } + TEST_ASSERT(wordCounter == 6); + + status = U_ZERO_ERROR; + RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1, status); + TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); + + delete rb; + delete rb2; + delete rb3; } //---------------------------------------------