ICU-3383 add utility to 'autoquote' unquoted apostrophes in message format

X-SVN-Rev: 17720
This commit is contained in:
Doug Felt 2005-05-27 22:07:16 +00:00
parent dc7aed1d04
commit 70537cf0ca
6 changed files with 207 additions and 1 deletions

View file

@ -31,6 +31,7 @@
#include "unicode/ustring.h"
#include "unicode/ucnv_err.h"
#include "unicode/uchar.h"
#include "unicode/umsg.h"
#include "unicode/rbnf.h"
#include "ustrfmt.h"
#include "cmemory.h"
@ -1169,6 +1170,29 @@ MessageFormat::parseObject( const UnicodeString& source,
result.adoptArray(tmpResult, cnt);
}
UnicodeString
MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) {
UnicodeString result;
if (U_SUCCESS(status)) {
int32_t plen = pattern.length();
const UChar* pat = pattern.getBuffer();
int32_t blen = plen * 2 + 1; // space for null termination, convenience
UChar* buf = result.getBuffer(blen);
if (buf == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
} else {
int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status);
if (U_SUCCESS(status)) {
result.releaseBuffer(len);
}
}
}
if (U_FAILURE(status)) {
result.setToBogus();
}
return result;
}
// -------------------------------------
static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const UnicodeString& defaultRuleSet, UErrorCode& ec) {

View file

@ -30,6 +30,7 @@
#include "unicode/unistr.h"
#include "cpputils.h"
#include "uassert.h"
#include "ustr_imp.h"
U_NAMESPACE_USE
@ -601,5 +602,106 @@ umsg_vparse(const UMessageFormat *fmt,
delete [] args;
}
#define SINGLE_QUOTE ((UChar)0x0027)
#define CURLY_BRACE_LEFT ((UChar)0x007B)
#define CURLY_BRACE_RIGHT ((UChar)0x007D)
#define STATE_INITIAL 0
#define STATE_SINGLE_QUOTE 1
#define STATE_IN_QUOTE 2
#define STATE_MSG_ELEMENT 3
#define MAppend(c) if (len < blen) buffer[len++] = c; else len++
int32_t umsg_autoQuoteApostrophe(const UChar* pattern,
int32_t plen,
UChar* buffer,
int32_t blen,
UErrorCode* ec)
{
int32_t state = STATE_INITIAL;
int32_t braceCount = 0;
int32_t len = 0;
if (ec == NULL || U_FAILURE(*ec)) {
return -1;
}
if (pattern == NULL || plen < -1 || (buffer == NULL && blen > 0)) {
*ec = U_ILLEGAL_ARGUMENT_ERROR;
return -1;
}
if (plen == -1) {
plen = u_strlen(pattern);
}
for (int i = 0; i < plen; ++i) {
UChar c = pattern[i];
switch (state) {
case STATE_INITIAL:
switch (c) {
case SINGLE_QUOTE:
state = STATE_SINGLE_QUOTE;
break;
case CURLY_BRACE_LEFT:
state = STATE_MSG_ELEMENT;
++braceCount;
break;
}
break;
case STATE_SINGLE_QUOTE:
switch (c) {
case SINGLE_QUOTE:
state = STATE_INITIAL;
break;
case CURLY_BRACE_LEFT:
case CURLY_BRACE_RIGHT:
state = STATE_IN_QUOTE;
break;
default:
MAppend(SINGLE_QUOTE);
state = STATE_INITIAL;
break;
}
break;
case STATE_IN_QUOTE:
switch (c) {
case SINGLE_QUOTE:
state = STATE_INITIAL;
break;
}
break;
case STATE_MSG_ELEMENT:
switch (c) {
case CURLY_BRACE_LEFT:
++braceCount;
break;
case CURLY_BRACE_RIGHT:
if (--braceCount == 0) {
state = STATE_INITIAL;
}
break;
}
break;
default: // Never happens.
break;
}
MAppend(c);
}
// End of scan
if (state == STATE_SINGLE_QUOTE || state == STATE_IN_QUOTE) {
MAppend(SINGLE_QUOTE);
}
return u_terminateUChars(buffer, blen, len, ec);
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -573,6 +573,26 @@ public:
Formattable& result,
ParsePosition& pos) const;
/**
* Convert an 'apostrophe-friendly' pattern into a standard
* pattern. Standard patterns treat all apostrophes as
* quotes, which is problematic in some languages, e.g.
* French, where apostrophe is commonly used. This utility
* assumes that only an unpaired apostrophe immediately before
* a brace is a true quote. Other unpaired apostrophes are paired,
* and the resulting standard pattern string is returned.
*
* <p><b>Note</b> it is not guaranteed that the returned pattern
* is indeed a valid pattern. The only effect is to convert
* between patterns having different quoting semantics.
*
* @param pattern the 'apostrophe-friendly' patttern to convert
* @return the standard equivalent of the original pattern
* @since ICU 3.4
*/
static UnicodeString autoQuoteApostrophe(const UnicodeString& pattern,
UErrorCode& status);
/**
* Returns a unique class ID POLYMORPHICALLY. Pure virtual override.
* This method is to implement a simple version of RTTI, since not all

View file

@ -597,6 +597,36 @@ umsg_vparse(const UMessageFormat *fmt,
UErrorCode *status);
/**
* Convert an 'apostrophe-friendly' pattern into a standard
* pattern. Standard patterns treat all apostrophes as
* quotes, which is problematic in some languages, e.g.
* French, where apostrophe is commonly used. This utility
* assumes that only an unpaired apostrophe immediately before
* a brace is a true quote. Other unpaired apostrophes are paired,
* and the resulting standard pattern string is returned.
*
* <p><b>Note</b> it is not guaranteed that the returned pattern
* is indeed a valid pattern. The only effect is to convert
* between patterns having different quoting semantics.
*
* @param pattern the 'apostrophe-friendly' patttern to convert
* @param plen the length of pattern, or -1 if unknown and pattern is null-terminated
* @param buffer the buffer for the result, or NULL if preflight only
* @param blen the length of the buffer, or 0 if preflighting
* @param ec the error code
* @return the length of the resulting text, not including trailing null
* if buffer has room for the trailing null, it is provided, otherwise
* not
* @draft ICU 3.4
*/
U_STABLE int32_t U_EXPORT2
umsg_autoQuoteApostrophe(const UChar* pattern,
int32_t plen,
UChar* buffer,
int32_t blen,
UErrorCode* ec);
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif

View file

@ -28,6 +28,7 @@
#include "unicode/numfmt.h"
#include "unicode/choicfmt.h"
#include "unicode/gregocal.h"
#include <stdio.h>
void
TestMessageFormat::runIndexedTest(int32_t index, UBool exec,
@ -53,6 +54,7 @@ TestMessageFormat::runIndexedTest(int32_t index, UBool exec,
TESTCASE(17,TestUnlimitedArgsAndSubformats);
TESTCASE(18,TestRBNF);
TESTCASE(19,TestTurkishCasing);
TESTCASE(20,testAutoQuoteApostrophe);
default: name = ""; break;
}
}
@ -193,7 +195,6 @@ void TestMessageFormat::testBug2()
#include "unicode/datefmt.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
IntlTest&
operator<<( IntlTest& stream,
@ -1211,4 +1212,32 @@ void TestMessageFormat::TestRBNF(void) {
delete numFmt;
}
void TestMessageFormat::testAutoQuoteApostrophe(void) {
const char* patterns[] = { // pattern, expected pattern
"'", "''",
"''", "''",
"'{", "'{'",
"' {", "'' {",
"'a", "''a",
"'{'a", "'{'a",
"'{a'", "'{a'",
"'{}", "'{}'",
"{'", "{'",
"{'a", "{'a",
"{'a{}'a}'a", "{'a{}'a}''a",
};
int32_t pattern_count = sizeof(patterns)/sizeof(patterns[0]);
for (int i = 0; i < pattern_count; i += 2) {
UErrorCode status = U_ZERO_ERROR;
UnicodeString result = MessageFormat::autoQuoteApostrophe(patterns[i], status);
UnicodeString target(patterns[i+1]);
if (target != result) {
char buf[128];
sprintf(buf, "[%2d] \"%s\" : \"%s\" != \"%s\"\n", i/2, patterns[i], patterns[i+1], result);
errln(buf);
}
}
}
#endif /* #if !UCONFIG_NO_FORMATTING */

View file

@ -86,6 +86,7 @@ public:
void testParse(void);
void testAdopt(void);
void TestTurkishCasing(void);
void testAutoQuoteApostrophe(void);
private:
};