ICU-3729 port [before] fix

X-SVN-Rev: 15889
2025-04-13 08:53:20 +00:00 · 2004-06-16 18:19:51 +00:00 · 2004-06-16 18:19:51 +00:00 · 152733ca80
commit 152733ca80
parent 3b290bedcb
4 changed files with 94 additions and 20 deletions
--- a/icu4j/src/com/ibm/icu/dev/test/collator/CollationMiscTest.java
+++ b/icu4j/src/com/ibm/icu/dev/test/collator/CollationMiscTest.java
@ -2085,9 +2085,9 @@ public class CollationMiscTest extends TestFmwk {
 	        "xAx"
 	    };
 	    /* TODO: port builder fixes to before */
-	    /*genericRulesStarter(rules, test);*/
+	    genericRulesStarter(rules, test);
 	    genericLocaleStarter(new Locale("zh"), test);
-	    /*genericRulesStarter(rules, test2);*/
+	    genericRulesStarter(rules, test2);
 	    genericLocaleStarter(new Locale("zh"), test2);
 	}

--- a/icu4j/src/com/ibm/icu/text/CollationParsedRuleBuilder.java
+++ b/icu4j/src/com/ibm/icu/text/CollationParsedRuleBuilder.java
@ -110,12 +110,25 @@ final class CollationParsedRuleBuilder
 		   && result > 0) { 
 		                // this condition should prevent falling off the edge of the 
 		                // world 
-		// here, we end up in a singularity - zero
-		prevresult[0] = m_table_[3 * (-- result)];
-		prevresult[1] = m_table_[3 * result + 1];
+		        // here, we end up in a singularity - zero
+		        prevresult[0] = m_table_[3 * (-- result)];
+		        prevresult[1] = m_table_[3 * result + 1];
+		   }
+           return result;
+		}
+	    
+	    final int getCEStrengthDifference(int CE, int contCE, 
+	    		int prevCE, int prevContCE) {
+			int strength = Collator.TERTIARY;
+			while(
+			((prevCE & STRENGTH_MASK_[strength]) != (CE & STRENGTH_MASK_[strength]) 
+			|| (prevContCE & STRENGTH_MASK_[strength]) != (contCE & STRENGTH_MASK_[strength]))
+			&& (strength != 0)) {
+				strength--;
+			}
+			return strength;                
 	    }
-	    return result;
-	}
+
        
        /**
         * Finding the inverse CE of the argument CEs
@ -299,9 +312,12 @@ final class CollationParsedRuleBuilder
 								   Collator.SECONDARY);
 			listheader.m_gapsHi_[3 * st + 2] = (t1 & 0x3f) << 24 
 			    | (t2 & 0x3f) << 16;
-			pos --;
-			t1 = m_table_[3 * pos];
-			t2 = m_table_[3 * pos + 1];
+			//pos --;
+			//t1 = m_table_[3 * pos];
+			//t2 = m_table_[3 * pos + 1];
+            t1 = listheader.m_baseCE_;
+            t2 = listheader.m_baseContCE_;
+            
 			listheader.m_gapsLo_[3 * st] = mergeCE(t1, t2, 
 							       Collator.PRIMARY);
 			listheader.m_gapsLo_[3 * st + 1] = mergeCE(t1, t2, 
@ -1500,9 +1516,10 @@ final class CollationParsedRuleBuilder
 	    if (Utility.compareUnsigned(low, 
 					RuleBasedCollator.COMMON_BOTTOM_2_ << 24) < 0) {
 		g.m_rangesLength_ = allocateWeights(
-						    RuleBasedCollator.COMMON_BOTTOM_2_ << 24, 
+						    RuleBasedCollator.BYTE_UNSHIFTED_MIN_ << 24, 
 						    high, count, maxbyte, g.m_ranges_);
-		g.m_current_ = RuleBasedCollator.COMMON_BOTTOM_2_ << 24;
+        g.m_current_ = nextWeight(g);
+		//g.m_current_ = RuleBasedCollator.COMMON_BOTTOM_2_ << 24;
 		return g.m_current_;
 	    }
 	} 
--- a/icu4j/src/com/ibm/icu/text/CollationRuleParser.java
+++ b/icu4j/src/com/ibm/icu/text/CollationRuleParser.java
@ -153,6 +153,7 @@ final class CollationRuleParser
       Token m_previous_;
       Token m_next_;
       StringBuffer m_rules_;
+       char m_flags_;     
       
       // package private constructors ---------------------------------------
       
@ -883,6 +884,18 @@ final class CollationRuleParser
 		               m_extraCurrent_ += size + m_parsedToken_.m_extensionLen_;
 		           }
 		        }
+               // if the previous token was a reset before, the strength of this
+               // token must match the strength of before. Otherwise we have an
+               // undefined situation.
+               // In other words, we currently have a cludge which we use to 
+               // represent &a >> x. This is written as &[before 2]a << x.
+               if((lastToken.m_flags_ & TOKEN_BEFORE_) != 0) {
+                   int beforeStrength = (lastToken.m_flags_ & TOKEN_BEFORE_) - 1;
+                   if(beforeStrength != sourceToken.m_strength_) {
+                   	   throwParseException(m_source_.toString(), m_current_);
+                   }
+               }
+               
            } 
            else {
 	            if (lastToken != null && lastStrength == TOKEN_RESET_) {
@ -1585,6 +1598,36 @@ final class CollationRuleParser
 		    int invpos = CollationParsedRuleBuilder.INVERSE_UCA_.getInversePrevCE(
 	                                                     basece, basecontce, 
 	                                                     strength, m_utilCEBuffer_);
+		    // we got the previous CE. Now we need to see if the difference between
+		    // the two CEs is really of the requested strength.
+		    // if it's a bigger difference (we asked for secondary and got primary), we 
+		    // need to modify the CE.
+		    if(CollationParsedRuleBuilder.INVERSE_UCA_.getCEStrengthDifference(basece, basecontce, m_utilCEBuffer_[0], m_utilCEBuffer_[1]) < strength) {
+		    	// adjust the strength
+		    	// now we are in the situation where our baseCE should actually be modified in 
+		    	// order to get the CE in the right position.
+		    	if(strength == Collator.SECONDARY) {
+		    		m_utilCEBuffer_[0] = basece - 0x0200;
+		    	} else { // strength == UCOL_TERTIARY
+		    		m_utilCEBuffer_[0] = basece - 0x02;
+		    	}
+		    	if(RuleBasedCollator.isContinuation(basecontce)) {
+		    		if(strength == Collator.SECONDARY) {
+		    			m_utilCEBuffer_[1] = basecontce - 0x0200;
+		    		} else { // strength == UCOL_TERTIARY
+		    			m_utilCEBuffer_[1] = basecontce - 0x02;
+		    		}
+		    	}
+		    }
+            
+/*            
+            // the code below relies on getting a code point from the inverse table, in order to be
+            // able to merge the situations like &x < 9 &[before 1]a < d. This won't work:
+            // 1. There are many code points that have the same CE
+            // 2. The CE to codepoint table (things pointed to by CETable[3*invPos+2] are broken.
+            // Also, in case when there is no equivalent strength before an element, we have to actually
+            // construct one. For example, &[before 2]a << x won't result in x << a, because the element 
+            // before a is a primary difference. 
 		    ch = CollationParsedRuleBuilder.INVERSE_UCA_.m_table_[3 * invpos 
 	                                                                  + 2];
 	        if ((ch &  INVERSE_SIZE_MASK_) != 0) {
@ -1606,16 +1649,27 @@ final class CollationRuleParser
 		                                         | m_parsedToken_.m_charsOffset_;
 		    m_utilToken_.m_rules_ = m_source_;
 		    sourcetoken = (Token)m_hashTable_.get(m_utilToken_);
-		  
+*/		  
+
+		    // here is how it should be. The situation such as &[before 1]a < x, should be 
+		    // resolved exactly as if we wrote &a > x. 
+		    // therefore, I don't really care if the UCA value before a has been changed.
+		    // However, I do care if the strength between my element and the previous element
+		    // is bigger then I wanted. So, if CE < baseCE and I wanted &[before 2], then i'll 
+		    // have to construct the base CE.
+            
 		    // if we found a tailored thing, we have to use the UCA value and 
 		    // construct a new reset token with constructed name
-		    if (sourcetoken != null && sourcetoken.m_strength_ != TOKEN_RESET_) {
+		    //if (sourcetoken != null && sourcetoken.m_strength_ != TOKEN_RESET_) {
 		        // character to which we want to anchor is already tailored. 
 		        // We need to construct a new token which will be the anchor point
-	            m_source_.setCharAt(m_extraCurrent_ - 1, '\uFFFE');
-		        m_source_.append(ch);
-		        m_extraCurrent_ ++;
-		        m_parsedToken_.m_charsLen_ ++;
+	            //m_source_.setCharAt(m_extraCurrent_ - 1, '\uFFFE');
+		        //m_source_.append(ch);
+		        //m_extraCurrent_ ++;
+		        //m_parsedToken_.m_charsLen_ ++;
+                // grab before
+                m_parsedToken_.m_charsOffset_ -= 10;
+                m_parsedToken_.m_charsLen_ += 10;
 	            m_listHeader_[m_resultLength_] = new TokenListHeader();
 		        m_listHeader_[m_resultLength_].m_baseCE_ 
 	                                             = m_utilCEBuffer_[0] & 0xFFFFFF3F;
@ -1633,7 +1687,7 @@ final class CollationRuleParser
 		        m_listHeader_[m_resultLength_].m_indirect_ = false;
 		        sourcetoken = new Token();
 		        initAReset(-1, sourcetoken);   
-		    }
+		    //}
 	    }
 	    return sourcetoken;
 	}
@ -1665,6 +1719,9 @@ final class CollationRuleParser
 	                            | m_parsedToken_.m_charsOffset_;
 	    targetToken.m_expansion_ = m_parsedToken_.m_extensionLen_ << 24 
 	                               | m_parsedToken_.m_extensionOffset_;
+	    // keep the flags around so that we know about before
+	    targetToken.m_flags_ = m_parsedToken_.m_flags_;
+	    
 	    if (m_parsedToken_.m_prefixOffset_ != 0) {
 	        throwParseException(m_rules_, m_parsedToken_.m_charsOffset_ - 1);
 	    } 
--- a/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java
+++ b/icu4j/src/com/ibm/icu/text/RuleBasedCollator.java
@ -1922,7 +1922,7 @@ public final class RuleBasedCollator extends Collator
    private static final byte BYTE_LEVEL_SEPARATOR_ = (byte)0x01;
    private static final byte BYTE_SORTKEY_GLUE_ = (byte)0x02;
    private static final byte BYTE_SHIFT_PREFIX_ = (byte)0x03;
-    private static final byte BYTE_UNSHIFTED_MIN_ = BYTE_SHIFT_PREFIX_;
+    /*private*/ static final byte BYTE_UNSHIFTED_MIN_ = BYTE_SHIFT_PREFIX_;
    private static final byte BYTE_FIRST_UCA_ = BYTE_COMMON_;
    static final byte CODAN_PLACEHOLDER = 0x24;
    private static final byte BYTE_LAST_LATIN_PRIMARY_ = (byte)0x4C;