diff --git a/icu4c/source/data/translit/t_FWidth_HWidth.txt b/icu4c/source/data/translit/t_FWidth_HWidth.txt index 0861678f8e0..d0ffe7638f6 100644 --- a/icu4c/source/data/translit/t_FWidth_HWidth.txt +++ b/icu4c/source/data/translit/t_FWidth_HWidth.txt @@ -274,9 +274,9 @@ t_FWidth_HWidth { "¥<>'¥';" // from FULLWIDTH YEN SIGN "₩<>₩;" // from FULLWIDTH WON SIGN "│<>│;" // to HALFWIDTH FORMS LIGHT VERTICAL -"←<>←;" // to HALFWIDTH LEFTWARDS ARROW +"'←'<>'←';" // to HALFWIDTH LEFTWARDS ARROW "↑<>↑;" // to HALFWIDTH UPWARDS ARROW -"→<>→;" // to HALFWIDTH RIGHTWARDS ARROW +"'→'<>'→';" // to HALFWIDTH RIGHTWARDS ARROW "↓<>↓;" // to HALFWIDTH DOWNWARDS ARROW "■<>■;" // to HALFWIDTH BLACK SQUARE "○<>○;" // to HALFWIDTH WHITE CIRCLE diff --git a/icu4c/source/i18n/rbt_pars.cpp b/icu4c/source/i18n/rbt_pars.cpp index 632ca4b0e9f..9cf9331728a 100644 --- a/icu4c/source/i18n/rbt_pars.cpp +++ b/icu4c/source/i18n/rbt_pars.cpp @@ -69,6 +69,14 @@ static const UChar DOT_SET[] = { // "[^[:Zp:][:Zl:]\r\n$]"; // A function is denoted &Source-Target/Variant(text) #define FUNCTION ((UChar)38) /*&*/ +// Aliases for some of the syntax characters. These are provided so +// transliteration rules can be expressed in XML without clashing with +// XML syntax characters '<', '>', and '&'. +#define ALT_REVERSE_RULE_OP ((UChar)0x2190) // Left Arrow +#define ALT_FORWARD_RULE_OP ((UChar)0x2192) // Right Arrow +#define ALT_FWDREV_RULE_OP ((UChar)0x2194) // Left Right Arrow +#define ALT_FUNCTION ((UChar)0x2206) // Increment (~Greek Capital Delta) + // Special characters disallowed at the top level static const UChar ILLEGAL_TOP[] = {41,0}; // ")" @@ -82,12 +90,17 @@ static const UChar ILLEGAL_FUNC[] = {94,40,46,42,43,63,123,125,124,64,0}; // "^( // trailing SymbolTable.SYMBOL_REF character. // private static final char ANCHOR_END = '$'; -static const UChar gOPERATORS[] = { - 0x3D, 0x3E, 0x3C, 0 // "=><" +static const UChar gOPERATORS[] = { // "=><" + VARIABLE_DEF_OP, FORWARD_RULE_OP, REVERSE_RULE_OP, + ALT_FORWARD_RULE_OP, ALT_REVERSE_RULE_OP, ALT_FWDREV_RULE_OP, + 0 }; -static const UChar HALF_ENDERS[] = { - 0x3D, 0x3E, 0x3C, 59, 0 // "=><;" +static const UChar HALF_ENDERS[] = { // "=><;" + VARIABLE_DEF_OP, FORWARD_RULE_OP, REVERSE_RULE_OP, + ALT_FORWARD_RULE_OP, ALT_REVERSE_RULE_OP, ALT_FWDREV_RULE_OP, + END_OF_RULE, + 0 }; // These are also used in Transliterator::toRules() @@ -511,6 +524,7 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l } break; case FUNCTION: + case ALT_FUNCTION: { int32_t iref = pos; TransliteratorIDParser::SingleID* single = @@ -1212,6 +1226,19 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos, op = FWDREV_RULE_OP; } + // Translate alternate op characters. + switch (op) { + case ALT_FORWARD_RULE_OP: + op = FORWARD_RULE_OP; + break; + case ALT_REVERSE_RULE_OP: + op = REVERSE_RULE_OP; + break; + case ALT_FWDREV_RULE_OP: + op = FWDREV_RULE_OP; + break; + } + pos = right->parse(rule, pos, limit); if (U_FAILURE(status)) { return start; diff --git a/icu4c/source/test/intltest/transtst.cpp b/icu4c/source/test/intltest/transtst.cpp index cd93b4f6381..488eb8a956a 100644 --- a/icu4c/source/test/intltest/transtst.cpp +++ b/icu4c/source/test/intltest/transtst.cpp @@ -183,6 +183,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec, TESTCASE(74,TestRuleWhitespace); TESTCASE(75,TestAllCodepoints); TESTCASE(76,TestBoilerplate); + TESTCASE(77,TestAlternateSyntax); default: name = ""; break; } } @@ -3913,6 +3914,19 @@ void TransliteratorTest::TestBoilerplate() { TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator); } +void TransliteratorTest::TestAlternateSyntax() { + // U+2206 == & + // U+2190 == < + // U+2192 == > + // U+2194 == <> + expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"), + "abc", + "xbz"); + expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"), + CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"), + "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"); +} + //====================================================================== // Support methods //====================================================================== diff --git a/icu4c/source/test/intltest/transtst.h b/icu4c/source/test/intltest/transtst.h index 20c8113bedc..d607c58fe8f 100644 --- a/icu4c/source/test/intltest/transtst.h +++ b/icu4c/source/test/intltest/transtst.h @@ -342,6 +342,8 @@ private: void TestBoilerplate(void); + void TestAlternateSyntax(void); + //====================================================================== // Support methods //====================================================================== diff --git a/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java b/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java index a5757f272c3..a52146861f6 100755 --- a/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java +++ b/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java @@ -5,8 +5,8 @@ ******************************************************************************* * * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $ - * $Date: 2003/02/19 00:18:46 $ - * $Revision: 1.121 $ + * $Date: 2003/04/23 00:20:16 $ + * $Revision: 1.122 $ * ***************************************************************************************** */ @@ -2825,6 +2825,19 @@ public class TransliteratorTest extends TestFmwk { } } + public void TestAlternateSyntax() { + // U+2206 == & + // U+2190 == < + // U+2192 == > + // U+2194 == <> + expect("a \u2192 x; b \u2190 y; c \u2194 z", + "abc", + "xbz"); + expect("([:^ASCII:]) \u2192 \u2206Name($1);", + "<=\u2190; >=\u2192; <>=\u2194; &=\u2206", + "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"); + } + //====================================================================== // These tests are not mirrored (yet) in icu4c at // source/test/intltest/transtst.cpp diff --git a/icu4j/src/com/ibm/icu/impl/data/Transliterator_Fullwidth_Halfwidth.txt b/icu4j/src/com/ibm/icu/impl/data/Transliterator_Fullwidth_Halfwidth.txt index 733bbcc53af..77ee5c85394 100755 --- a/icu4j/src/com/ibm/icu/impl/data/Transliterator_Fullwidth_Halfwidth.txt +++ b/icu4j/src/com/ibm/icu/impl/data/Transliterator_Fullwidth_Halfwidth.txt @@ -3,8 +3,8 @@ # Corporation and others. All Rights Reserved. #-------------------------------------------------------------------- # $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/impl/data/Attic/Transliterator_Fullwidth_Halfwidth.txt,v $ -# $Date: 2002/03/02 00:27:27 $ -# $Revision: 1.3 $ +# $Date: 2003/04/23 00:20:15 $ +# $Revision: 1.4 $ #-------------------------------------------------------------------- # Fullwidth-Halfwidth @@ -264,9 +264,9 @@ ¥<>'¥'; # from FULLWIDTH YEN SIGN ₩<>₩; # from FULLWIDTH WON SIGN │<>│; # to HALFWIDTH FORMS LIGHT VERTICAL -←<>←; # to HALFWIDTH LEFTWARDS ARROW +'←'<>'←'; # to HALFWIDTH LEFTWARDS ARROW ↑<>↑; # to HALFWIDTH UPWARDS ARROW -→<>→; # to HALFWIDTH RIGHTWARDS ARROW +'→'<>'→'; # to HALFWIDTH RIGHTWARDS ARROW ↓<>↓; # to HALFWIDTH DOWNWARDS ARROW ■<>■; # to HALFWIDTH BLACK SQUARE ○<>○; # to HALFWIDTH WHITE CIRCLE diff --git a/icu4j/src/com/ibm/icu/text/TransliteratorParser.java b/icu4j/src/com/ibm/icu/text/TransliteratorParser.java index 21206790357..e31fdce9ed9 100755 --- a/icu4j/src/com/ibm/icu/text/TransliteratorParser.java +++ b/icu4j/src/com/ibm/icu/text/TransliteratorParser.java @@ -4,8 +4,8 @@ * Corporation and others. All Rights Reserved. ********************************************************************** * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TransliteratorParser.java,v $ -* $Date: 2002/07/26 21:12:36 $ -* $Revision: 1.23 $ +* $Date: 2003/04/23 00:20:15 $ +* $Revision: 1.24 $ ********************************************************************** */ package com.ibm.icu.text; @@ -133,8 +133,8 @@ class TransliteratorParser { private static final char REVERSE_RULE_OP = '<'; private static final char FWDREV_RULE_OP = '~'; // internal rep of <> op - private static final String OPERATORS = "=><"; - private static final String HALF_ENDERS = "=><;"; + private static final String OPERATORS = "=><\u2190\u2192\u2194"; + private static final String HALF_ENDERS = "=><\u2190\u2192\u2194;"; // Other special characters private static final char QUOTE = '\''; @@ -167,6 +167,14 @@ class TransliteratorParser { // A function is denoted &Source-Target/Variant(text) private static final char FUNCTION = '&'; + // Aliases for some of the syntax characters. These are provided so + // transliteration rules can be expressed in XML without clashing with + // XML syntax characters '<', '>', and '&'. + private static final char ALT_REVERSE_RULE_OP = '\u2190'; // Left Arrow + private static final char ALT_FORWARD_RULE_OP = '\u2192'; // Right Arrow + private static final char ALT_FWDREV_RULE_OP = '\u2194'; // Left Right Arrow + private static final char ALT_FUNCTION = '\u2206'; // Increment (~Greek Capital Delta) + // Special characters disallowed at the top level private static UnicodeSet ILLEGAL_TOP = new UnicodeSet("[\\)]"); @@ -569,6 +577,7 @@ class TransliteratorParser { } break; case FUNCTION: + case ALT_FUNCTION: { iref[0] = pos; TransliteratorIDParser.SingleID single = TransliteratorIDParser.parseFilterID(rule, iref); @@ -1112,6 +1121,19 @@ class TransliteratorParser { operator = FWDREV_RULE_OP; } + // Translate alternate op characters. + switch (operator) { + case ALT_FORWARD_RULE_OP: + operator = FORWARD_RULE_OP; + break; + case ALT_REVERSE_RULE_OP: + operator = REVERSE_RULE_OP; + break; + case ALT_FWDREV_RULE_OP: + operator = FWDREV_RULE_OP; + break; + } + pos = right.parse(rule, pos, limit, this); if (pos < limit) {