ICU-7083 Make RBBI create from compiled rules public.

X-SVN-Rev: 29926
This commit is contained in:
Andy Heninger 2011-04-29 17:49:01 +00:00
parent 8a69ab8bba
commit be5efb8d44
3 changed files with 89 additions and 6 deletions

View file

@ -1,6 +1,6 @@
/*
***************************************************************************
* Copyright (C) 1999-2010 International Business Machines Corporation
* Copyright (C) 1999-2011 International Business Machines Corporation
* and others. All rights reserved.
***************************************************************************
*/
@ -86,6 +86,32 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(const RBBIDataHeader* data, enum
}
}
//
// Construct from precompiled binary rules (tables). This constructor is public API,
// taking the rules as a (const uint8_t *) to match the type produced by getBinaryRules().
//
RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules,
uint32_t ruleLength,
UErrorCode &status) {
init();
if (U_FAILURE(status)) {
return;
}
const RBBIDataHeader *data = (const RBBIDataHeader *)compiledRules;
if (data->fLength != ruleLength) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
fData = new RBBIDataWrapper(data, RBBIDataWrapper::kDontAdopt, status);
if (U_FAILURE(status)) {return;}
if(fData == 0) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
}
//-------------------------------------------------------------------------------
//
// Constructor from a UDataMemory handle to precompiled break rules

View file

@ -1,6 +1,6 @@
/*
***************************************************************************
* Copyright (C) 1999-2008 International Business Machines Corporation *
* Copyright (C) 1999-2011 International Business Machines Corporation *
* and others. All rights reserved. *
***************************************************************************
@ -240,6 +240,36 @@ public:
UErrorCode &status);
/**
* Contruct a RuleBasedBreakIterator from a set of precompiled binary rules.
* Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules().
* Construction of a break iterator in this way is substantially faster than
* constuction from source rules.
*
* Ownership of the storage containing the compiled rules remains with the
* caller of this function. The compiled rules must not be modified or
* deleted during the life of the break iterator.
*
* The compiled rules are not compatible across different major versions of ICU.
* The compiled rules are comaptible only between machines with the same
* byte ordering (little or big endian) and the same base character set family
* (ASCII or EBCDIC).
*
* @see #getBinaryRules
* @param compiledRules A pointer to the compiled break rules to be used.
* @param ruleLength The length of the compiled break rules, in bytes. This
* corresponds to the length value produced by getBinaryRules().
* @param status Information on any errors encountered, including invalid
* binary rules.
* @draft ICU 4.8
*/
RuleBasedBreakIterator(const uint8_t *compiledRules,
uint32_t ruleLength,
UErrorCode &status);
/**
* This constructor uses the udata interface to create a BreakIterator
* whose internal tables live in a memory-mapped file. "image" is an
@ -599,7 +629,7 @@ public:
* @return A pointer to the binary (compiled) rule data. The storage
* belongs to the RulesBasedBreakIterator object, not the
* caller, and must not be modified or deleted.
* @internal
* @draft ICU 4.8
*/
virtual const uint8_t *getBinaryRules(uint32_t &length);

View file

@ -1,5 +1,5 @@
/********************************************************************
* Copyright (c) 1999-2010, International Business Machines
* Copyright (c) 1999-2011, International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************
* Date Name Description
@ -19,6 +19,7 @@
#include "rbbidata.h"
#include "cstring.h"
#include "ubrkimpl.h"
#include "unicode/locid.h"
#include "unicode/ustring.h"
#include "unicode/utext.h"
#include "cmemory.h"
@ -31,8 +32,8 @@
#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}}
#define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
void RBBIAPITest::TestCloneEquals()
{
@ -1090,6 +1091,32 @@ void RBBIAPITest::TestCreateFromRBBIData() {
errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) );
}
}
// getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...)
//
status = U_ZERO_ERROR;
RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
TEST_ASSERT_SUCCESS(status);
uint32_t length;
const uint8_t *rules = rb->getBinaryRules(length);
RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length, status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(*rb == *rb2);
UnicodeString words = "one two three ";
rb2->setText(words);
int wordCounter = 0;
while (rb2->next() != UBRK_DONE) {
wordCounter++;
}
TEST_ASSERT(wordCounter == 6);
status = U_ZERO_ERROR;
RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1, status);
TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
delete rb;
delete rb2;
delete rb3;
}
//---------------------------------------------