mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-14 17:24:01 +00:00
ICU-3383 add utility to 'autoquote' unquoted apostrophes in message format
X-SVN-Rev: 17720
This commit is contained in:
parent
dc7aed1d04
commit
70537cf0ca
6 changed files with 207 additions and 1 deletions
|
@ -31,6 +31,7 @@
|
|||
#include "unicode/ustring.h"
|
||||
#include "unicode/ucnv_err.h"
|
||||
#include "unicode/uchar.h"
|
||||
#include "unicode/umsg.h"
|
||||
#include "unicode/rbnf.h"
|
||||
#include "ustrfmt.h"
|
||||
#include "cmemory.h"
|
||||
|
@ -1169,6 +1170,29 @@ MessageFormat::parseObject( const UnicodeString& source,
|
|||
result.adoptArray(tmpResult, cnt);
|
||||
}
|
||||
|
||||
UnicodeString
|
||||
MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) {
|
||||
UnicodeString result;
|
||||
if (U_SUCCESS(status)) {
|
||||
int32_t plen = pattern.length();
|
||||
const UChar* pat = pattern.getBuffer();
|
||||
int32_t blen = plen * 2 + 1; // space for null termination, convenience
|
||||
UChar* buf = result.getBuffer(blen);
|
||||
if (buf == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
} else {
|
||||
int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status);
|
||||
if (U_SUCCESS(status)) {
|
||||
result.releaseBuffer(len);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (U_FAILURE(status)) {
|
||||
result.setToBogus();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
|
||||
static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const UnicodeString& defaultRuleSet, UErrorCode& ec) {
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "unicode/unistr.h"
|
||||
#include "cpputils.h"
|
||||
#include "uassert.h"
|
||||
#include "ustr_imp.h"
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
|
@ -601,5 +602,106 @@ umsg_vparse(const UMessageFormat *fmt,
|
|||
delete [] args;
|
||||
}
|
||||
|
||||
#define SINGLE_QUOTE ((UChar)0x0027)
|
||||
#define CURLY_BRACE_LEFT ((UChar)0x007B)
|
||||
#define CURLY_BRACE_RIGHT ((UChar)0x007D)
|
||||
|
||||
#define STATE_INITIAL 0
|
||||
#define STATE_SINGLE_QUOTE 1
|
||||
#define STATE_IN_QUOTE 2
|
||||
#define STATE_MSG_ELEMENT 3
|
||||
|
||||
#define MAppend(c) if (len < blen) buffer[len++] = c; else len++
|
||||
|
||||
|
||||
int32_t umsg_autoQuoteApostrophe(const UChar* pattern,
|
||||
int32_t plen,
|
||||
UChar* buffer,
|
||||
int32_t blen,
|
||||
UErrorCode* ec)
|
||||
{
|
||||
int32_t state = STATE_INITIAL;
|
||||
int32_t braceCount = 0;
|
||||
int32_t len = 0;
|
||||
|
||||
if (ec == NULL || U_FAILURE(*ec)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (pattern == NULL || plen < -1 || (buffer == NULL && blen > 0)) {
|
||||
*ec = U_ILLEGAL_ARGUMENT_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (plen == -1) {
|
||||
plen = u_strlen(pattern);
|
||||
}
|
||||
|
||||
for (int i = 0; i < plen; ++i) {
|
||||
UChar c = pattern[i];
|
||||
switch (state) {
|
||||
case STATE_INITIAL:
|
||||
switch (c) {
|
||||
case SINGLE_QUOTE:
|
||||
state = STATE_SINGLE_QUOTE;
|
||||
break;
|
||||
case CURLY_BRACE_LEFT:
|
||||
state = STATE_MSG_ELEMENT;
|
||||
++braceCount;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case STATE_SINGLE_QUOTE:
|
||||
switch (c) {
|
||||
case SINGLE_QUOTE:
|
||||
state = STATE_INITIAL;
|
||||
break;
|
||||
case CURLY_BRACE_LEFT:
|
||||
case CURLY_BRACE_RIGHT:
|
||||
state = STATE_IN_QUOTE;
|
||||
break;
|
||||
default:
|
||||
MAppend(SINGLE_QUOTE);
|
||||
state = STATE_INITIAL;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case STATE_IN_QUOTE:
|
||||
switch (c) {
|
||||
case SINGLE_QUOTE:
|
||||
state = STATE_INITIAL;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case STATE_MSG_ELEMENT:
|
||||
switch (c) {
|
||||
case CURLY_BRACE_LEFT:
|
||||
++braceCount;
|
||||
break;
|
||||
case CURLY_BRACE_RIGHT:
|
||||
if (--braceCount == 0) {
|
||||
state = STATE_INITIAL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
default: // Never happens.
|
||||
break;
|
||||
}
|
||||
|
||||
MAppend(c);
|
||||
}
|
||||
|
||||
// End of scan
|
||||
if (state == STATE_SINGLE_QUOTE || state == STATE_IN_QUOTE) {
|
||||
MAppend(SINGLE_QUOTE);
|
||||
}
|
||||
|
||||
return u_terminateUChars(buffer, blen, len, ec);
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
|
|
@ -573,6 +573,26 @@ public:
|
|||
Formattable& result,
|
||||
ParsePosition& pos) const;
|
||||
|
||||
/**
|
||||
* Convert an 'apostrophe-friendly' pattern into a standard
|
||||
* pattern. Standard patterns treat all apostrophes as
|
||||
* quotes, which is problematic in some languages, e.g.
|
||||
* French, where apostrophe is commonly used. This utility
|
||||
* assumes that only an unpaired apostrophe immediately before
|
||||
* a brace is a true quote. Other unpaired apostrophes are paired,
|
||||
* and the resulting standard pattern string is returned.
|
||||
*
|
||||
* <p><b>Note</b> it is not guaranteed that the returned pattern
|
||||
* is indeed a valid pattern. The only effect is to convert
|
||||
* between patterns having different quoting semantics.
|
||||
*
|
||||
* @param pattern the 'apostrophe-friendly' patttern to convert
|
||||
* @return the standard equivalent of the original pattern
|
||||
* @since ICU 3.4
|
||||
*/
|
||||
static UnicodeString autoQuoteApostrophe(const UnicodeString& pattern,
|
||||
UErrorCode& status);
|
||||
|
||||
/**
|
||||
* Returns a unique class ID POLYMORPHICALLY. Pure virtual override.
|
||||
* This method is to implement a simple version of RTTI, since not all
|
||||
|
|
|
@ -597,6 +597,36 @@ umsg_vparse(const UMessageFormat *fmt,
|
|||
UErrorCode *status);
|
||||
|
||||
|
||||
/**
|
||||
* Convert an 'apostrophe-friendly' pattern into a standard
|
||||
* pattern. Standard patterns treat all apostrophes as
|
||||
* quotes, which is problematic in some languages, e.g.
|
||||
* French, where apostrophe is commonly used. This utility
|
||||
* assumes that only an unpaired apostrophe immediately before
|
||||
* a brace is a true quote. Other unpaired apostrophes are paired,
|
||||
* and the resulting standard pattern string is returned.
|
||||
*
|
||||
* <p><b>Note</b> it is not guaranteed that the returned pattern
|
||||
* is indeed a valid pattern. The only effect is to convert
|
||||
* between patterns having different quoting semantics.
|
||||
*
|
||||
* @param pattern the 'apostrophe-friendly' patttern to convert
|
||||
* @param plen the length of pattern, or -1 if unknown and pattern is null-terminated
|
||||
* @param buffer the buffer for the result, or NULL if preflight only
|
||||
* @param blen the length of the buffer, or 0 if preflighting
|
||||
* @param ec the error code
|
||||
* @return the length of the resulting text, not including trailing null
|
||||
* if buffer has room for the trailing null, it is provided, otherwise
|
||||
* not
|
||||
* @draft ICU 3.4
|
||||
*/
|
||||
U_STABLE int32_t U_EXPORT2
|
||||
umsg_autoQuoteApostrophe(const UChar* pattern,
|
||||
int32_t plen,
|
||||
UChar* buffer,
|
||||
int32_t blen,
|
||||
UErrorCode* ec);
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
||||
#endif
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "unicode/numfmt.h"
|
||||
#include "unicode/choicfmt.h"
|
||||
#include "unicode/gregocal.h"
|
||||
#include <stdio.h>
|
||||
|
||||
void
|
||||
TestMessageFormat::runIndexedTest(int32_t index, UBool exec,
|
||||
|
@ -53,6 +54,7 @@ TestMessageFormat::runIndexedTest(int32_t index, UBool exec,
|
|||
TESTCASE(17,TestUnlimitedArgsAndSubformats);
|
||||
TESTCASE(18,TestRBNF);
|
||||
TESTCASE(19,TestTurkishCasing);
|
||||
TESTCASE(20,testAutoQuoteApostrophe);
|
||||
default: name = ""; break;
|
||||
}
|
||||
}
|
||||
|
@ -193,7 +195,6 @@ void TestMessageFormat::testBug2()
|
|||
#include "unicode/datefmt.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
IntlTest&
|
||||
operator<<( IntlTest& stream,
|
||||
|
@ -1211,4 +1212,32 @@ void TestMessageFormat::TestRBNF(void) {
|
|||
delete numFmt;
|
||||
}
|
||||
|
||||
void TestMessageFormat::testAutoQuoteApostrophe(void) {
|
||||
const char* patterns[] = { // pattern, expected pattern
|
||||
"'", "''",
|
||||
"''", "''",
|
||||
"'{", "'{'",
|
||||
"' {", "'' {",
|
||||
"'a", "''a",
|
||||
"'{'a", "'{'a",
|
||||
"'{a'", "'{a'",
|
||||
"'{}", "'{}'",
|
||||
"{'", "{'",
|
||||
"{'a", "{'a",
|
||||
"{'a{}'a}'a", "{'a{}'a}''a",
|
||||
};
|
||||
int32_t pattern_count = sizeof(patterns)/sizeof(patterns[0]);
|
||||
|
||||
for (int i = 0; i < pattern_count; i += 2) {
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UnicodeString result = MessageFormat::autoQuoteApostrophe(patterns[i], status);
|
||||
UnicodeString target(patterns[i+1]);
|
||||
if (target != result) {
|
||||
char buf[128];
|
||||
sprintf(buf, "[%2d] \"%s\" : \"%s\" != \"%s\"\n", i/2, patterns[i], patterns[i+1], result);
|
||||
errln(buf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* #if !UCONFIG_NO_FORMATTING */
|
||||
|
|
|
@ -86,6 +86,7 @@ public:
|
|||
void testParse(void);
|
||||
void testAdopt(void);
|
||||
void TestTurkishCasing(void);
|
||||
void testAutoQuoteApostrophe(void);
|
||||
|
||||
private:
|
||||
};
|
||||
|
|
Loading…
Add table
Reference in a new issue