ICU-1030

New implementation for Unicode Boyer Moore string search. X-SVN-Rev: 5587
2025-04-16 18:25:57 +00:00 · 2001-08-25 02:03:53 +00:00 · 2001-08-25 02:03:53 +00:00 · 75adf07067
commit 75adf07067
parent 8e4ca45744
13 changed files with 5595 additions and 9 deletions
--- a/icu4c/source/i18n/Makefile.in
+++ b/icu4c/source/i18n/Makefile.in
@ -72,7 +72,8 @@ unifltlg.o unirange.o uniset.o unitohex.o unum.o \
 dbbi.o dbbi_tbl.o rbbi.o rbbi_tbl.o brkdict.o nultrans.o jamohang.o hangjamo.o \
 remtrans.o utrans.o \
 titletrn.o tolowtrn.o toupptrn.o xformtrn.o name2uni.o uni2name.o nortrans.o \
-unifilt.o quant.o strmatch.o transreg.o
+unifilt.o quant.o strmatch.o transreg.o usearch.o search.o stsearch.o
+

 STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O))

--- a/icu4c/source/i18n/i18n.dsp
+++ b/icu4c/source/i18n/i18n.dsp
@ -234,6 +234,10 @@ SOURCE=.\remtrans.cpp
 # End Source File
 # Begin Source File

+SOURCE=.\search.cpp
+# End Source File
+# Begin Source File
+
 SOURCE=.\simpletz.cpp
 # End Source File
 # Begin Source File
@ -250,6 +254,10 @@ SOURCE=.\strmatch.cpp
 # End Source File
 # Begin Source File

+SOURCE=.\stsearch.cpp
+# End Source File
+# Begin Source File
+
 SOURCE=.\tblcoll.cpp
 # End Source File
 # Begin Source File
@ -350,6 +358,10 @@ SOURCE=.\unum.cpp
 # End Source File
 # Begin Source File

+SOURCE=.\usearch.cpp
+# End Source File
+# Begin Source File
+
 SOURCE=.\utrans.cpp
 # End Source File
 # Begin Source File
@ -1119,6 +1131,25 @@ SOURCE=.\unicode\remtrans.h
 # End Source File
 # Begin Source File

+SOURCE=.\unicode\search.h
+
+!IF  "$(CFG)" == "i18n - Win32 Release"
+
+!ELSEIF  "$(CFG)" == "i18n - Win32 Debug"
+
+# Begin Custom Build
+InputPath=.\unicode\search.h
+
+"..\..\include\unicode\search.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy               unicode\search.h                ..\..\include\unicode
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
 SOURCE=.\unicode\simpletz.h

 !IF  "$(CFG)" == "i18n - Win32 Release"
@ -1204,6 +1235,25 @@ SOURCE=.\strmatch.h
 # End Source File
 # Begin Source File

+SOURCE=.\unicode\stsearch.h
+
+!IF  "$(CFG)" == "i18n - Win32 Release"
+
+!ELSEIF  "$(CFG)" == "i18n - Win32 Debug"
+
+# Begin Custom Build
+InputPath=.\unicode\stsearch.h
+
+"..\..\include\unicode\stsearch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy               unicode\stsearch.h                ..\..\include\unicode
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
 SOURCE=.\unicode\tblcoll.h

 !IF  "$(CFG)" == "i18n - Win32 Release"
@ -1676,6 +1726,29 @@ InputPath=.\unicode\unum.h
 # End Source File
 # Begin Source File

+SOURCE=.\unicode\usearch.h
+
+!IF  "$(CFG)" == "i18n - Win32 Release"
+
+!ELSEIF  "$(CFG)" == "i18n - Win32 Debug"
+
+# Begin Custom Build
+InputPath=.\unicode\usearch.h
+
+"..\..\include\unicode\usearch.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+	copy                unicode\usearch.h                ..\..\include\unicode
+
+# End Custom Build
+
+!ENDIF 
+
+# End Source File
+# Begin Source File
+
+SOURCE=.\usrchimp.h
+# End Source File
+# Begin Source File
+
 SOURCE=.\unicode\utrans.h

 !IF  "$(CFG)" == "i18n - Win32 Release"
--- a/icu4c/source/i18n/search.cpp
+++ b/icu4c/source/i18n/search.cpp
@ -0,0 +1,357 @@
+/*
+**********************************************************************
+*   Copyright (C) 2001 IBM and others. All rights reserved.
+**********************************************************************
+*   Date        Name        Description
+*  03/22/2000   helena      Creation.
+**********************************************************************
+*/
+
+#include "unicode/brkiter.h"
+#include "unicode/schriter.h"
+#include "unicode/search.h"
+#include "usrchimp.h"
+#include "cmemory.h"
+
+// public constructors and destructors -----------------------------------
+
+SearchIterator::SearchIterator(const SearchIterator &other)
+{   
+    if (other != *this) {
+        m_breakiterator_            = other.m_breakiterator_;
+        m_text_                     = other.m_text_;
+        m_search_                   = (USearch *)uprv_malloc(sizeof(USearch));   
+        m_search_->breakIter        = other.m_search_->breakIter;
+        m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
+        m_search_->isOverlap        = other.m_search_->isOverlap;
+        m_search_->matchedIndex     = other.m_search_->matchedIndex;
+        m_search_->matchedLength    = other.m_search_->matchedLength;
+        m_search_->text             = other.m_search_->text;
+        m_search_->textLength       = other.m_search_->textLength;
+    }
+}
+
+SearchIterator::~SearchIterator()
+{
+    if (m_search_ != NULL) {
+        uprv_free(m_search_);
+    }
+}
+
+// public get and set methods ----------------------------------------
+
+void SearchIterator::setAttribute(USearchAttribute       attribute,
+                                  USearchAttributeValue  value,
+                                  UErrorCode            &status)
+{
+    if (U_SUCCESS(status)) {
+        switch (attribute)
+        {
+        case USEARCH_OVERLAP :
+            m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
+            break;
+        case USEARCH_CANONICAL_MATCH :
+            m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
+            break;
+        default:
+            status = U_ILLEGAL_ARGUMENT_ERROR;
+        }
+    }
+    if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
+        status = U_ILLEGAL_ARGUMENT_ERROR;
+    }
+}
+
+USearchAttributeValue SearchIterator::getAttribute(
+                                          USearchAttribute  attribute) const
+{
+    switch (attribute) {
+    case USEARCH_ATTRIBUTE_COUNT :
+        return USEARCH_DEFAULT;
+    case USEARCH_OVERLAP :
+        return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
+    case USEARCH_CANONICAL_MATCH :
+        return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON : 
+                                                                USEARCH_OFF);
+    }
+    return USEARCH_DEFAULT;
+}
+    
+UTextOffset SearchIterator::getMatchedStart() const
+{
+    return m_search_->matchedIndex;
+}
+
+int32_t SearchIterator::getMatchedLength() const
+{
+    return m_search_->matchedLength;
+}
+    
+void SearchIterator::getMatchedText(UnicodeString &result) const
+{
+    UTextOffset matchedindex  = m_search_->matchedIndex;
+    int32_t     matchedlength = m_search_->matchedLength;
+    if (matchedindex != USEARCH_DONE && matchedlength != 0) {
+        result.setTo(m_search_->text + matchedindex, matchedlength); 
+    }
+    else {
+        result.remove();
+    }
+}
+    
+void SearchIterator::setBreakIterator(BreakIterator *breakiter, 
+                                      UErrorCode &status)
+{
+    if (U_SUCCESS(status)) {
+        m_search_->breakIter = NULL;
+        // the c++ breakiterator may not make use of ubreakiterator.
+        // so we'll have to keep track of it ourselves.
+        m_breakiterator_ = breakiter;
+    }
+}
+    
+const BreakIterator * SearchIterator::getBreakIterator(void) const
+{
+    return m_breakiterator_;
+}
+
+void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
+{
+    if (U_SUCCESS(status)) {
+        if (text.length() == 0) {
+            status = U_ILLEGAL_ARGUMENT_ERROR;
+        }
+        else {
+            m_text_        = text;
+            m_search_->text = m_text_.fArray;
+        }
+    }
+}
+
+void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
+{
+    if (U_SUCCESS(status)) {
+        text.getText(m_text_);
+        setText(m_text_, status);
+    }
+}
+    
+const UnicodeString & SearchIterator::getText(void) const
+{
+    return m_text_;
+}
+
+// operator overloading ----------------------------------------------
+
+UBool SearchIterator::operator==(const SearchIterator &that) const
+{
+    if (this == &that) {
+        return TRUE;
+    }
+    return (m_breakiterator_            == that.m_breakiterator_ &&
+            m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
+            m_search_->isOverlap        == that.m_search_->isOverlap &&
+            m_search_->matchedIndex     == that.m_search_->matchedIndex &&
+            m_search_->matchedLength    == that.m_search_->matchedLength &&
+            m_search_->textLength       == that.m_search_->textLength &&
+            getOffset() == that.getOffset() &&
+            (uprv_memcmp(m_search_->text, that.m_search_->text, 
+                              m_search_->textLength * sizeof(UChar)) == 0));
+}
+
+// public methods ----------------------------------------------------
+
+UTextOffset SearchIterator::first(UErrorCode &status)
+{
+    setOffset(0, status);
+    return handleNext(0, status);
+}
+
+UTextOffset SearchIterator::following(UTextOffset position, 
+                                      UErrorCode &status)
+{
+    setOffset(position, status);
+    return handleNext(position, status);
+}
+    
+UTextOffset SearchIterator::last(UErrorCode &status)
+{
+    setOffset(m_search_->textLength, status);
+    return handlePrev(m_search_->textLength, status);
+}
+
+UTextOffset SearchIterator::preceding(UTextOffset position, 
+                                      UErrorCode &status)
+{
+    setOffset(position, status);
+    return handlePrev(position, status);
+}
+
+UTextOffset SearchIterator::next(UErrorCode &status)
+{
+    if (U_SUCCESS(status)) {
+        UTextOffset offset      = getOffset();
+        UTextOffset matchindex  = m_search_->matchedIndex;
+        int32_t     matchlength = m_search_->matchedLength;
+        m_search_->reset        = FALSE;
+        if (m_search_->isForwardSearching == TRUE) {
+            int32_t textlength = m_search_->textLength;
+            if (offset == textlength || matchindex == textlength || 
+                (matchindex != USEARCH_DONE && 
+                matchindex + matchlength >= textlength)) {
+                // not enough characters to match
+                setMatchNotFound();
+                return USEARCH_DONE; 
+            }
+        }
+        else {
+            // switching direction. 
+            // if matchedIndex == USEARCH_DONE, it means that either a 
+            // setOffset has been called or that previous ran off the text
+            // string. the iterator would have been set to offset 0 if a 
+            // match is not found.
+            m_search_->isForwardSearching = TRUE;
+            if (m_search_->matchedIndex != USEARCH_DONE) {
+                // there's no need to set the collation element iterator
+                // the next call to next will set the offset.
+                return matchindex;
+            }
+        }
+
+        if (matchindex != USEARCH_DONE) {
+            return handleNext(matchindex + matchlength, status); 
+        }
+        return handleNext(offset, status);
+    }
+    return USEARCH_DONE;
+}
+
+UTextOffset SearchIterator::previous(UErrorCode &status)
+{
+    if (U_SUCCESS(status)) {
+        UTextOffset offset;
+        if (m_search_->reset) {
+            offset                       = m_search_->textLength;
+            m_search_->isForwardSearching = FALSE;
+            m_search_->reset              = FALSE;
+        }
+        else {
+            offset = getOffset();
+        }
+        
+        UTextOffset matchindex = m_search_->matchedIndex;
+        if (m_search_->isForwardSearching == TRUE) {
+            // switching direction. 
+            // if matchedIndex == USEARCH_DONE, it means that either a 
+            // setOffset has been called or that next ran off the text
+            // string. the iterator would have been set to offset textLength if 
+            // a match is not found.
+            m_search_->isForwardSearching = FALSE;
+            if (matchindex != USEARCH_DONE) {
+                return matchindex;
+            }
+        }
+        else {
+            if (offset == 0 || matchindex == 0) {
+                // not enough characters to match
+                setMatchNotFound();
+                return USEARCH_DONE; 
+            }
+        }
+
+        if (matchindex != USEARCH_DONE) {
+            return handlePrev(matchindex, status); 
+        }
+        return handlePrev(offset, status);
+    }
+    return USEARCH_DONE;
+}
+
+void SearchIterator::reset()
+{
+    setMatchNotFound();
+    m_search_->isOverlap          = FALSE;
+    m_search_->isCanonicalMatch   = FALSE;
+    m_search_->isForwardSearching = TRUE;
+    m_search_->reset              = TRUE;
+}
+
+// protected constructors and destructors -----------------------------
+
+SearchIterator::SearchIterator() : m_breakiterator_(NULL)
+{
+    m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
+    m_search_->breakIter          = NULL;
+    m_search_->isOverlap          = FALSE;
+    m_search_->isCanonicalMatch   = FALSE;
+    m_search_->isForwardSearching = TRUE;
+    m_search_->reset              = TRUE;
+    m_search_->matchedIndex       = USEARCH_DONE;
+    m_search_->matchedLength      = 0;
+    m_search_->text               = NULL;
+    m_search_->textLength         = 0;
+}
+
+SearchIterator::SearchIterator(const UnicodeString &text, 
+                                     BreakIterator *breakiter) :
+                               m_breakiterator_(breakiter),
+                               m_text_(text)
+{
+    m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
+    m_search_->breakIter          = NULL;
+    m_search_->isOverlap          = FALSE;
+    m_search_->isCanonicalMatch   = FALSE;
+    m_search_->isForwardSearching = TRUE;
+    m_search_->reset              = TRUE;
+    m_search_->matchedIndex       = USEARCH_DONE;
+    m_search_->matchedLength      = 0;
+    m_search_->text               = m_text_.fArray;
+    m_search_->textLength         = text.length();
+}
+
+SearchIterator::SearchIterator(CharacterIterator &text, 
+                               BreakIterator     *breakiter) :
+                               m_breakiterator_(breakiter)
+{
+    m_search_                     = (USearch *)uprv_malloc(sizeof(USearch));
+    m_search_->breakIter          = NULL;
+    m_search_->isOverlap          = FALSE;
+    m_search_->isCanonicalMatch   = FALSE;
+    m_search_->isForwardSearching = TRUE;
+    m_search_->reset              = TRUE;
+    m_search_->matchedIndex       = USEARCH_DONE;
+    m_search_->matchedLength      = 0;
+    text.getText(m_text_);
+    m_search_->text               = m_text_.fArray;
+    m_search_->textLength         = m_text_.length();
+    m_breakiterator_             = breakiter;
+}
+
+// protected methods ------------------------------------------------------
+
+void SearchIterator::setMatchLength(int32_t length)
+{
+    m_search_->matchedLength = length;
+}
+
+void SearchIterator::setMatchStart(UTextOffset position)
+{
+    m_search_->matchedIndex = position;
+}
+
+void SearchIterator::setMatchNotFound() 
+{
+    setMatchStart(USEARCH_DONE);
+    setMatchLength(0);
+    UErrorCode status = U_ZERO_ERROR;
+    // by default no errors should be returned here since offsets are within 
+    // range.
+    if (m_search_->isForwardSearching) {
+        setOffset(m_search_->textLength, status);
+    }
+    else {
+        setOffset(0, status);
+    }
+}
+
+
--- a/icu4c/source/i18n/stsearch.cpp
+++ b/icu4c/source/i18n/stsearch.cpp
@ -0,0 +1,387 @@
+/*
+**********************************************************************
+*   Copyright (C) 2001 IBM and others. All rights reserved.
+**********************************************************************
+*   Date        Name        Description
+*  03/22/2000   helena      Creation.
+**********************************************************************
+*/
+
+#include "unicode/stsearch.h"
+#include "cmemory.h"
+#include "usrchimp.h"
+
+// public constructors and destructors -----------------------------------
+
+StringSearch::StringSearch(const UnicodeString &pattern, 
+                           const UnicodeString &text,
+                           const Locale        &locale,       
+                                 BreakIterator *breakiter,
+                                 UErrorCode    &status) :
+                           SearchIterator(text, breakiter), 
+                           m_collator_(),
+                           m_pattern_(pattern)
+{
+    m_strsrch_ = usearch_open(m_pattern_.fArray, m_pattern_.fLength, 
+                              m_text_.fArray, m_text_.fLength, 
+                              locale.getName(), NULL, &status);
+    uprv_free(m_search_);
+    m_search_ = NULL;
+
+    if (U_SUCCESS(status)) {
+              int32_t  length;
+        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
+        m_collation_rules_.setTo(rules, length);
+        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
+                                 &m_collation_rules_);
+        // m_search_ has been created by the base SearchIterator class
+        m_search_        = m_strsrch_->search;
+    }
+}
+
+StringSearch::StringSearch(const UnicodeString     &pattern, 
+                           const UnicodeString     &text,
+                                 RuleBasedCollator *coll,       
+                                 BreakIterator     *breakiter,
+                                 UErrorCode        &status) :
+                           SearchIterator(text, breakiter), 
+                           m_collator_(),
+                           m_pattern_(pattern)
+{
+    if (coll == NULL) {
+        status     = U_ILLEGAL_ARGUMENT_ERROR;
+        m_strsrch_ = NULL;
+        return;
+    }
+    m_strsrch_ = usearch_openFromCollator(m_pattern_.fArray, 
+                                          m_pattern_.fLength, m_text_.fArray, 
+                                          m_text_.fLength, coll->ucollator, 
+                                          NULL, &status);
+    uprv_free(m_search_);
+    m_search_ = NULL;
+
+    if (U_SUCCESS(status)) {
+              int32_t  length;
+        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
+        m_collation_rules_.setTo(rules, length);
+        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
+                                 &m_collation_rules_);
+        // m_search_ has been created by the base SearchIterator class
+        m_search_ = m_strsrch_->search;
+    }
+}
+
+StringSearch::StringSearch(const UnicodeString     &pattern, 
+                                 CharacterIterator &text,
+                           const Locale            &locale, 
+                                 BreakIterator     *breakiter,
+                                 UErrorCode        &status) :
+                           SearchIterator(text, breakiter), 
+                           m_collator_(),
+                           m_pattern_(pattern)
+{
+    m_strsrch_ = usearch_open(m_pattern_.fArray, m_pattern_.fLength, 
+                              m_text_.fArray, m_text_.fLength, 
+                              locale.getName(), NULL, &status);
+    uprv_free(m_search_);
+    m_search_ = NULL;
+
+    if (U_SUCCESS(status)) {
+              int32_t  length;
+        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
+        m_collation_rules_.setTo(rules, length);
+        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
+                                 &m_collation_rules_);
+        // m_search_ has been created by the base SearchIterator class
+        m_search_ = m_strsrch_->search;
+    }
+}
+
+StringSearch::StringSearch(const UnicodeString     &pattern,
+                                 CharacterIterator &text,
+                                 RuleBasedCollator *coll, 
+                                 BreakIterator     *breakiter,
+                                 UErrorCode        &status) :
+                           SearchIterator(text, breakiter), 
+                           m_collator_(),
+                           m_pattern_(pattern)
+{
+    if (coll == NULL) {
+        status     = U_ILLEGAL_ARGUMENT_ERROR;
+        m_strsrch_ = NULL;
+        return;
+    }
+    m_strsrch_ = usearch_openFromCollator(m_pattern_.fArray, 
+                                          m_pattern_.fLength, m_text_.fArray, 
+                                          m_text_.fLength, coll->ucollator, 
+                                          NULL, &status);
+    uprv_free(m_search_);
+    m_search_ = NULL;
+
+    if (U_SUCCESS(status)) {
+              int32_t  length;
+        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
+        m_collation_rules_.setTo(rules, length);
+        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
+                                 &m_collation_rules_);
+        // m_search_ has been created by the base SearchIterator class
+        m_search_ = m_strsrch_->search;
+    }
+}
+
+StringSearch::StringSearch(const StringSearch &that) :
+                       SearchIterator(that.m_text_, that.m_breakiterator_),
+                       m_collator_(),
+                       m_pattern_(that.m_pattern_)
+{
+    UErrorCode status = U_ZERO_ERROR;
+    if (that.m_strsrch_ == NULL) {
+        m_strsrch_ = NULL;
+        status     = U_ILLEGAL_ARGUMENT_ERROR;
+    }
+    else {
+        m_strsrch_ = usearch_openFromCollator(m_pattern_.fArray, 
+                                              m_pattern_.fLength, 
+                                              m_text_.fArray, m_text_.fLength, 
+                                              that.m_strsrch_->collator, 
+                                              NULL, &status);
+    }
+    uprv_free(m_search_);
+    m_search_ = NULL;
+
+    if (U_SUCCESS(status)) {
+              int32_t  length;
+        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
+        m_collation_rules_.setTo(rules, length);
+        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
+                                 &m_collation_rules_);
+        // m_search_ has been created by the base SearchIterator class
+        m_search_        = m_strsrch_->search;
+        m_breakiterator_ = that.m_breakiterator_;
+    }
+}
+
+StringSearch::~StringSearch()
+{
+    usearch_close(m_strsrch_);
+    m_search_ = NULL;
+}
+
+// operator overloading ---------------------------------------------
+StringSearch & StringSearch::operator=(const StringSearch &that)
+{
+    if ((*this) != that) {
+        UErrorCode status = U_ZERO_ERROR;
+        m_text_          = that.m_text_;
+        m_breakiterator_ = that.m_breakiterator_;
+        m_pattern_       = that.m_pattern_;
+        // all m_search_ in the parent class is linked up with m_strsrch_
+        usearch_close(m_strsrch_);
+        m_strsrch_ = usearch_openFromCollator(m_pattern_.fArray, 
+                                              m_pattern_.fLength, 
+                                              m_text_.fArray, 
+                                              m_text_.fLength, 
+                                              that.m_strsrch_->collator, 
+                                              NULL, &status);
+              int32_t  length;
+        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
+        m_collation_rules_.setTo(rules, length);
+        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
+                                 &m_collation_rules_);
+        m_search_ = m_strsrch_->search;
+    }
+    return *this;
+}
+
+UBool StringSearch::operator==(const SearchIterator &that) const
+{
+    if (this == &that) {
+        return TRUE;
+    }
+    if (SearchIterator::operator ==(that)) {
+        StringSearch &thatsrch = (StringSearch &)that;
+        return (this->m_pattern_ == thatsrch.m_pattern_ &&
+                this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
+    }
+    return FALSE;
+}
+
+// public get and set methods ----------------------------------------
+
+void StringSearch::setOffset(UTextOffset position, UErrorCode &status)
+{
+    usearch_setOffset(m_strsrch_, position, &status);
+}
+
+UTextOffset StringSearch::getOffset(void) const
+{
+    return usearch_getOffset(m_strsrch_);
+}
+
+void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
+{
+    m_text_ = text;
+    usearch_setText(m_strsrch_, text.fArray, text.fLength, &status);
+}
+    
+void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
+{
+    text.getText(m_text_);
+    usearch_setText(m_strsrch_, m_text_.fArray, m_text_.fLength, &status);
+}
+
+RuleBasedCollator * StringSearch::getCollator() const
+{
+    return (RuleBasedCollator *)&m_collator_;
+}
+    
+void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
+{
+    usearch_setCollator(m_strsrch_, coll->getUCollator(), &status);
+    m_collation_rules_.setTo(coll->getRules());
+    m_collator_.setUCollator((UCollator *)m_strsrch_->collator, 
+                             &m_collation_rules_);
+}
+    
+void StringSearch::setPattern(const UnicodeString &pattern, 
+                                    UErrorCode    &status)
+{
+    m_pattern_ = pattern;
+    usearch_setPattern(m_strsrch_, m_pattern_.fArray, m_pattern_.fLength,
+                       &status);
+}
+    
+const UnicodeString & StringSearch::getPattern() const
+{
+    return m_pattern_;
+}
+
+// public methods ----------------------------------------------------
+
+void StringSearch::reset()
+{
+    usearch_reset(m_strsrch_);
+}
+
+SearchIterator * StringSearch::safeClone(void) const
+{
+    UErrorCode status = U_ZERO_ERROR;
+    StringSearch *result = new StringSearch(m_pattern_, m_text_, 
+                                            (RuleBasedCollator *)&m_collator_, 
+                                            m_breakiterator_,
+                                            status);
+    result->setOffset(getOffset(), status);
+    result->setMatchStart(m_strsrch_->search->matchedIndex);
+    result->setMatchLength(m_strsrch_->search->matchedLength);
+    if (U_FAILURE(status)) {
+        return NULL;
+    }
+    return result;
+}
+    
+// protected method -------------------------------------------------
+
+UTextOffset StringSearch::handleNext(int32_t position, UErrorCode &status)
+{
+    // values passed here are already in the pre-shift position
+    if (U_SUCCESS(status)) {
+        if (m_strsrch_->pattern.CELength == 0) {
+            m_search_->matchedIndex = 
+                                    m_search_->matchedIndex == USEARCH_DONE ? 
+                                    getOffset() : m_search_->matchedIndex + 1;
+            m_search_->matchedLength = 0;
+            ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, 
+                           &status);
+            if (m_search_->matchedIndex == m_search_->textLength) {
+                m_search_->matchedIndex = USEARCH_DONE;
+            }
+        }
+        else {
+            // looking at usearch.cpp, this part is shifted out to 
+            // StringSearch instead of SearchIterator because m_strsrch_ is
+            // not accessible in SearchIterator
+            if (!m_search_->isOverlap &&
+                position + m_strsrch_->pattern.defaultShiftSize > 
+                m_search_->textLength) {
+                setMatchNotFound();
+                return USEARCH_DONE;
+            }
+            while (TRUE) {
+                if (m_search_->isCanonicalMatch) {
+                    // can't use exact here since extra accents are allowed.
+                    usearch_handleNextCanonical(m_strsrch_, &status);
+                }
+                else {
+                    usearch_handleNextExact(m_strsrch_, &status);
+                }
+                if (U_FAILURE(status)) {
+                    return USEARCH_DONE;
+                }
+                if (m_breakiterator_ == NULL || 
+                    m_search_->matchedIndex == USEARCH_DONE ||
+                    (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
+                     m_breakiterator_->isBoundary(m_search_->matchedIndex + 
+                                                  m_search_->matchedLength))) {
+                    return m_search_->matchedIndex;
+                }
+            }
+        }
+    }
+    return USEARCH_DONE;
+}
+
+UTextOffset StringSearch::handlePrev(int32_t position, UErrorCode &status)
+{
+    // values passed here are already in the pre-shift position
+    if (U_SUCCESS(status)) {
+        if (m_strsrch_->pattern.CELength == 0) {
+            m_search_->matchedIndex = 
+                  (m_search_->matchedIndex == USEARCH_DONE ? getOffset() : 
+                   m_search_->matchedIndex);
+            if (m_search_->matchedIndex == 0) {
+                setMatchNotFound();
+            }
+            else {
+                m_search_->matchedIndex --;
+                ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, 
+                               &status);
+                m_search_->matchedLength = 0;
+            }
+        }
+        else {
+            // looking at usearch.cpp, this part is shifted out to 
+            // StringSearch instead of SearchIterator because m_strsrch_ is
+            // not accessible in SearchIterator
+            if (!m_search_->isOverlap && 
+                position - m_strsrch_->pattern.defaultShiftSize < 0) {
+                setMatchNotFound();
+                return USEARCH_DONE;
+            }
+            while (TRUE) {
+                if (m_search_->isCanonicalMatch) {
+                    // can't use exact here since extra accents are allowed.
+                    usearch_handlePreviousCanonical(m_strsrch_, &status);
+                }
+                else {
+                    usearch_handlePreviousExact(m_strsrch_, &status);
+                }
+                if (U_FAILURE(status)) {
+                    return USEARCH_DONE;
+                }
+                if (m_breakiterator_ == NULL || 
+                    m_search_->matchedIndex == USEARCH_DONE ||
+                    (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
+                     m_breakiterator_->isBoundary(m_search_->matchedIndex + 
+                                                  m_search_->matchedLength))) {
+                    return m_search_->matchedIndex;
+                }
+            }
+        }
+          
+        return m_search_->matchedIndex;
+    }
+    return USEARCH_DONE;
+}
+
+
+
--- a/icu4c/source/i18n/tblcoll.cpp
+++ b/icu4c/source/i18n/tblcoll.cpp
@ -326,18 +326,12 @@ Collator* RuleBasedCollator::clone() const
  return new RuleBasedCollator(*this);
 }

-/**
-* Create a CollationElementIterator object that will iterator over the
-* elements in a string, using the collation rules defined in this
-* RuleBasedCollator
-*/
 CollationElementIterator* RuleBasedCollator::createCollationElementIterator
                                           (const UnicodeString& source) const
 {
  UErrorCode status = U_ZERO_ERROR;
  CollationElementIterator *result = new CollationElementIterator(source, this,
                                                                  status);
-
  if (U_FAILURE(status))
    return NULL;

--- a/icu4c/source/i18n/ucol_imp.h
+++ b/icu4c/source/i18n/ucol_imp.h
@ -368,6 +368,13 @@ ucol_cloneRuleData(const UCollator *coll, int32_t *length, UErrorCode *status);
 #define getExpansionCount(CE) ((CE)&0xF)
 #define isCEIgnorable(CE) (((CE) & 0xFFFFFFBF) == 0)

+/* StringSearch internal use */
+#define inNormBuf(coleiter) ((coleiter)->iteratordata_.flags & UCOL_ITER_INNORMBUF)
+#define isFCDPointerNull(coleiter) ((coleiter)->iteratordata_.fcdPosition == NULL)
+#define getExpansionPrefix(coleiter) ((coleiter)->iteratordata_.toReturn - (coleiter)->iteratordata_.CEs)
+#define setExpansionPrefix(coleiter, offset) ((coleiter)->iteratordata_.CEs + offset)
+#define getExpansionSuffix(coleiter) ((coleiter)->iteratordata_.CEpos - (coleiter)->iteratordata_.toReturn)
+#define setExpansionSuffix(coleiter, offset) ((coleiter)->iteratordata_.toReturn = (coleiter)->iteratordata_.CEpos - leftoverces)

 #define UCA_DATA_TYPE "dat"
 #define UCA_DATA_NAME "ucadata"
--- a/icu4c/source/i18n/ucoleitr.cpp
+++ b/icu4c/source/i18n/ucoleitr.cpp
@ -223,6 +223,9 @@ ucol_setOffset(UCollationElements    *elems,
  collIterate *ci = &(elems->iteratordata_);
  ci->pos         = ci->string + offset;
  ci->CEpos       = ci->toReturn = ci->CEs;
+  if (ci->flags & UCOL_ITER_INNORMBUF) {
+    ci->flags       = ci->origFlags;
+  }
  if ((ci->flags & UCOL_ITER_HASLEN) == 0) {
      ci->endp = ci->string + u_strlen(ci->string);
  }
--- a/icu4c/source/i18n/unicode/search.h
+++ b/icu4c/source/i18n/unicode/search.h
@ -0,0 +1,466 @@
+/*
+**********************************************************************
+*   Copyright (C) 2001 IBM and others. All rights reserved.
+**********************************************************************
+*   Date        Name        Description
+*  03/22/2000   helena      Creation.
+**********************************************************************
+*/
+
+#ifndef SEARCH_H
+#define SEARCH_H
+
+#include "unicode/unistr.h"
+#include "unicode/chariter.h"
+#include "unicode/brkiter.h"
+#include "unicode/usearch.h"
+
+/**
+ * <tt>SearchIterator</tt> is an abstract base class that provides 
+ * methods to search for a pattern within a text string. Instances of
+ * <tt>SearchIterator</tt> maintain a current position and scans over the 
+ * target text, returning the indices the pattern is matched and the length 
+ * of each match.
+ * <p>
+ * <tt>SearchIterator</tt> defines a protocol for text searching. 
+ * Subclasses provide concrete implementations of various search algorithms. 
+ * For example, {@link StringSearch} implements language-sensitive pattern 
+ * matching based on the comparison rules defined in a 
+ * {@link RuleBasedCollator} object. 
+ * <p> 
+ * Other options for searching includes using a BreakIterator to restrict 
+ * the points at which matches are detected.
+ * <p>
+ * <tt>SearchIterator</tt> provides an API that is similar to that of
+ * other text iteration classes such as <tt>BreakIterator</tt>. Using 
+ * this class, it is easy to scan through text looking for all occurances of 
+ * a given pattern. The following example uses a <tt>StringSearch</tt> 
+ * object to find all instances of "fox" in the target string. Any other 
+ * subclass of <tt>SearchIterator</tt> can be used in an identical 
+ * manner.
+ * <pre><code>
+ * UnicodeString target("The quick brown fox jumped over the lazy fox");
+ * UnicodeString pattern("fox");
+ *
+ * SearchIterator *iter = new StringSearch(pattern, target);
+ *
+ * for (int pos = iter->first(); pos != USEARCH_DONE; 
+ *                               pos = iter->next()) {
+ *     printf("Found match at %d pos, length is %d\n", pos, 
+ *                                             iter.getMatchLength());
+ * }
+ * </code></pre>
+ *
+ * @see StringSearch
+ */
+
+struct USearch;
+typedef struct USearch USearch;
+
+/**
+* Data structure for searching
+*/
+class U_I18N_API SearchIterator {
+
+public:
+
+    // public constructors and destructors -------------------------------
+
+    /** 
+    * Copy constructor that creates a SearchIterator instance with the same 
+    * behavior, and iterating over the same text. 
+    * @param other the SearchIterator instance to be copied.
+    */
+    SearchIterator(const SearchIterator &other);
+
+    /**
+     * Destructor. Cleans up the search iterator data struct.
+     */
+    virtual ~SearchIterator();
+
+    // public get and set methods ----------------------------------------
+
+    /**
+     * Sets the index to point to the given position, and clears any state 
+     * that's affected.
+     * <p>
+     * This method takes the argument index and sets the position in the text 
+     * string accordingly without checking if the index is pointing to a 
+     * valid starting point to begin searching. 
+     * @param position within the text to be set
+     * @param status for errors if it occurs
+     */
+    virtual void setOffset(UTextOffset position, UErrorCode &status) = 0;
+
+    /**
+     * Return the current index in the text being searched.
+     * If the iteration has gone past the end of the text
+     * (or past the beginning for a backwards search), {@link #USEARCH_DONE} 
+     * is returned.
+     * @return current index in the text being searched.
+     */
+    virtual UTextOffset getOffset(void) const = 0;
+
+    /**
+    * Sets the text searching attributes located in the enum 
+    * USearchAttribute with values from the enum USearchAttributeValue.
+    * USEARCH_DEFAULT can be used for all attributes for resetting.
+    * @param attribute text attribute (enum USearchAttribute) to be set
+    * @param value text attribute value
+    * @param status for errors if it occurs
+    */
+    void setAttribute(USearchAttribute       attribute,
+                      USearchAttributeValue  value,
+                      UErrorCode            &status);
+
+    /**    
+    * Gets the text searching attributes
+    * @param attribute text attribute (enum USearchAttribute) to be retrieve
+    * @return text attribute value
+    */
+    USearchAttributeValue getAttribute(USearchAttribute  attribute) const;
+    
+    /**
+    * Returns the index to the match in the text string that was searched.
+    * This call returns a valid result only after a successful call to 
+    * {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
+    * Just after construction, or after a searching method returns 
+    * <tt>USEARCH_DONE</tt>, this method will return <tt>USEARCH_DONE</tt>.
+    * <p>
+    * Use getMatchedLength to get the matched string length.
+    * @return index of a substring within the text string that is being 
+    *         searched.
+    */
+    UTextOffset getMatchedStart(void) const;
+
+    /**
+     * Returns the length of text in the string which matches the search 
+     * pattern. This call returns a valid result only after a successful call 
+     * to {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
+     * Just after construction, or after a searching method returns 
+     * <tt>USEARCH_DONE</tt>, this method will return 0.
+     * @return The length of the match in the target text, or 0 if there
+     *         is no match currently.
+     */
+    int32_t getMatchedLength(void) const;
+    
+    /**
+     * Returns the text that was matched by the most recent call to 
+     * {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
+     * If the iterator is not pointing at a valid match (e.g. just after 
+     * construction or after <tt>USEARCH_DONE</tt> has been returned, 
+     * returns an empty string. 
+     * @param result stores the matched string or an empty string if a match
+     *        is not found.
+     */
+    void getMatchedText(UnicodeString &result) const;
+    
+    /**
+     * Set the BreakIterator that will be used to restrict the points
+     * at which matches are detected. The user is responsible for deleting 
+     * the breakiterator.
+     * @param breakiter A BreakIterator that will be used to restrict the 
+     *                points at which matches are detected. If a match is 
+     *                found, but the match's start or end index is not a 
+     *                boundary as determined by the <tt>BreakIterator</tt>, 
+     *                the match will be rejected and another will be searched 
+     *                for. If this parameter is <tt>NULL</tt>, no break
+     *                detection is attempted.
+     * @param  status for errors if it occurs
+     */
+    void setBreakIterator(BreakIterator *breakiter, UErrorCode &status);
+    
+    /**
+     * Returns the BreakIterator that is used to restrict the points at 
+     * which matches are detected.  This will be the same object that was 
+     * passed to the constructor or to <tt>setBreakIterator</tt>.
+     * Note that <tt>NULL</tt> is a legal value; it means that break
+     * detection should not be attempted.
+     * @return BreakIterator used to restrict matchings.
+     */
+    const BreakIterator * getBreakIterator(void) const;
+
+    /**
+     * Set the string text to be searched. Text iteration will hence begin at 
+     * the start of the text string. This method is useful if you want to 
+     * re-use an iterator to search for the same pattern within a different 
+     * body of text. The user is responsible for deleting the text.
+     * @param text string to be searched.
+     * @param status for errors if it occurs
+     */
+    virtual void setText(const UnicodeString &text, UErrorCode &status);    
+
+    /**
+     * Set the string text to be searched. Text iteration will hence begin at 
+     * the start of the text string. This method is useful if you want to 
+     * re-use an iterator to search for the same pattern within a different 
+     * body of text.
+     * <p>
+     * Note: No parsing of the text within the <tt>CharacterIterator</tt> 
+     * will be done during searching for this version. The block of text 
+     * in <tt>CharacterIterator</tt> will be used as it is.
+     * The user is responsible for deleting the text.
+     * @param text string iterator to be searched.
+     * @param  status for errors if it occurs
+     */
+    virtual void setText(CharacterIterator &text, UErrorCode &status);
+    
+    /**
+     * Return the string text to be searched.
+     * @return text string to be searched.
+     */
+    const UnicodeString & getText(void) const;
+
+    // operator overloading ----------------------------------------------
+
+    /**
+     * Equality operator. 
+     * @param that SearchIterator instance to be compared.
+     * @return TRUE if both BreakIterators are of the same class, have the 
+     *         same behavior, terates over the same text and have the same
+     *         attributes. FALSE otherwise.
+     */
+    virtual UBool operator==(const SearchIterator &that) const;
+
+    /**
+     * Not-equal operator. 
+     * @param that SearchIterator instance to be compared.
+     * @return FALSE if operator== returns TRUE, and vice versa.
+     */
+    UBool operator!=(const SearchIterator &that) const;
+
+    // public methods ----------------------------------------------------
+
+    /**
+     * Returns a copy of SearchIterator with the same behavior, and 
+     * iterating over the same text, as this one. Note that all data will be
+     * replicated, except for the text string to be searched.
+     * @return cloned object
+     */
+    virtual SearchIterator* safeClone(void) const = 0;
+
+    /**
+     * Returns the first index at which the string text matches the search 
+     * pattern. The iterator is adjusted so that its current index (as 
+     * returned by {@link #usearch_getOffset}) is the match position if one 
+     * was found.
+     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+     * the iterator will be adjusted to the index USEARCH_DONE
+     * @param  status for errors if it occurs
+     * @return The character index of the first match, or 
+     *         <tt>USEARCH_DONE</tt> if there are no matches.
+     */
+    UTextOffset first(UErrorCode &status);
+
+    /**
+     * Returns the first index greater than <tt>position</tt> at which the 
+     * string text matches the search pattern. The iterator is adjusted so 
+     * that its current index (as returned by {@link #getOffset}) is the 
+     * match position if one was found. If a match is not found, 
+     * <tt>USEARCH_DONE</tt> will be returned and the iterator will be 
+     * adjusted to the index USEARCH_DONE
+     * @param  position where search if to start from
+     * @param  status for errors if it occurs
+     * @return The character index of the first match following 
+     *         <tt>position</tt>, or <tt>USEARCH_DONE</tt> if there are no 
+     *         matches.
+     */
+    UTextOffset following(UTextOffset position, UErrorCode &status);
+    
+    /**
+     * Returns the last index in the target text at which it matches the 
+     * search pattern. The iterator is adjusted so that its current index 
+     * (as returned by {@link #getOffset}) is the match position if one was 
+     * found.
+     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+     * the iterator will be adjusted to the index USEARCH_DONE.
+     * @param  status for errors if it occurs
+     * @return The index of the first match, or <tt>USEARCH_DONE</tt> if 
+     *         there are no matches.
+     */
+    UTextOffset last(UErrorCode &status);
+
+    /**
+     * Returns the first index less than <tt>position</tt> at which the string 
+     * text matches the search pattern. The iterator is adjusted so that its 
+     * current index (as returned by {@link #getOffset}) is the match 
+     * position if one was found. If a match is not found, 
+     * <tt>USEARCH_DONE</tt> will be returned and the iterator will be 
+     * adjusted to the index USEARCH_DONE
+     * @param  position where search is to start from
+     * @param  status for errors if it occurs
+     * @return The character index of the first match preceding 
+     *         <tt>position</tt>, or <tt>USEARCH_DONE</tt> if there are 
+     *         no matches.
+     */
+    UTextOffset preceding(UTextOffset position, UErrorCode &status);
+
+    /**
+     * Returns the index of the next point at which the text matches the
+     * search pattern, starting from the current position
+     * The iterator is adjusted so that its current index (as returned by 
+     * {@link #getIndex}) is the match position if one was found.
+     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+     * the iterator will be adjusted to a position after the end of the text 
+     * string.
+     * @param  status for errors if it occurs
+     * @return The index of the next match after the current position,
+     *          or <tt>USEARCH_DONE</tt> if there are no more matches.
+     */
+     UTextOffset next(UErrorCode &status);
+
+    /**
+     * Returns the index of the previous point at which the string text 
+     * matches the search pattern, starting at the current position.
+     * The iterator is adjusted so that its current index (as returned by 
+     * {@link #getOffset}) is the match position if one was found.
+     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+     * the iterator will be adjusted to the index USEARCH_DONE
+     * @param  status for errors if it occurs
+     * @return The index of the previous match before the current position,
+     *          or <tt>USEARCH_DONE</tt> if there are no more matches.
+     */
+    UTextOffset previous(UErrorCode &status);
+
+    /** 
+    * Resets the iteration.
+    * Search will begin at the start of the text string if a forward 
+    * iteration is initiated before a backwards iteration. Otherwise if a 
+    * backwards iteration is initiated before a forwards iteration, the 
+    * search will begin at the end of the text string.    
+    */
+    virtual void reset();
+
+protected:
+    // protected data members ---------------------------------------------
+
+    /**
+    * C search data struct
+    */
+    USearch *m_search_;
+
+    /**
+    * Break iterator.
+    * Currently the C++ breakiterator does not have getRules etc to reproduce
+    * another in C. Hence we keep the original around and do the verification
+    * at the end of the match. The user is responsible for deleting this
+    * break iterator.
+    */
+    BreakIterator *m_breakiterator_;
+    
+    /**
+    * Unicode string version of the search text
+    */
+    UnicodeString  m_text_;
+
+    // protected constructors and destructors -----------------------------
+
+    /**
+    * Default constructor.
+    * Initializes data to the default values.
+    */
+    SearchIterator();
+
+    /**
+     * Constructor for use by subclasses.
+     * @param text The target text to be searched.
+     * @param breakiter A {@link BreakIterator} that is used to restrict the 
+     *                points at which matches are detected. If 
+     *                <tt>handleNext</tt> or <tt>handlePrev</tt> finds a 
+     *                match, but the match's start or end index is not a 
+     *                boundary as determined by the <tt>BreakIterator</tt>, 
+     *                the match is rejected and <tt>handleNext</tt> or 
+     *                <tt>handlePrev</tt> is called again. If this parameter 
+     *                is <tt>NULL</tt>, no break detection is attempted.                
+     * @param status error status
+     */
+    SearchIterator(const UnicodeString &text, 
+                         BreakIterator *breakiter = NULL);
+
+    /**
+     * Constructor for use by subclasses.
+     * <p>
+     * Note: No parsing of the text within the <tt>CharacterIterator</tt> 
+     * will be done during searching for this version. The block of text 
+     * in <tt>CharacterIterator</tt> will be used as it is.
+     * @param text The target text to be searched.
+     * @param breakiter A {@link BreakIterator} that is used to restrict the 
+     *                points at which matches are detected. If 
+     *                <tt>handleNext</tt> or <tt>handlePrev</tt> finds a 
+     *                match, but the match's start or end index is not a 
+     *                boundary as determined by the <tt>BreakIterator</tt>, 
+     *                the match is rejected and <tt>handleNext</tt> or 
+     *                <tt>handlePrev</tt> is called again. If this parameter 
+     *                is <tt>NULL</tt>, no break detection is attempted.
+     */
+    SearchIterator(CharacterIterator &text, BreakIterator *breakiter = NULL);
+
+    // protected methods --------------------------------------------------
+
+    /**
+     * Abstract method which subclasses override to provide the mechanism
+     * for finding the next match in the target text. This allows different
+     * subclasses to provide different search algorithms.
+     * <p>
+     * If a match is found, the implementation should return the index at
+     * which the match starts and should call 
+     * {@link #setMatchLength setMatchLength} with the number of characters 
+     * in the target text that make up the match. If no match is found, the 
+     * method should return USEARCH_DONE.
+     * <p>
+     * @param position The index in the target text at which the search 
+     *                 should start.
+     * @param status for error codes if it occurs.
+     */
+    virtual UTextOffset handleNext(UTextOffset position, UErrorCode &status) 
+                                                                         = 0;
+
+    /**
+     * Abstract method which subclasses override to provide the mechanism for
+     * finding the previous match in the target text. This allows different
+     * subclasses to provide different search algorithms.
+     * <p>
+     * If a match is found, the implementation should return the index at
+     * which the match starts and should call 
+     * {@link #setMatchLength setMatchLength} with the number of characters 
+     * in the target text that make up the match. If no match is found, the 
+     * method should return USEARCH_DONE.
+     * <p>
+     * @param position The index in the target text at which the search 
+     *                 should start.
+     * @param status for error codes if it occurs.
+     */
+     virtual UTextOffset handlePrev(UTextOffset position, UErrorCode &status) 
+                                                                         = 0;
+
+    /**
+     * Sets the length of the currently matched string in the text string to
+     * be searched.
+     * Subclasses' <tt>handleNext</tt> and <tt>handlePrev</tt>
+     * methods should call this when they find a match in the target text.
+     * @param length length of the matched text.
+     */
+    virtual void setMatchLength(int32_t length);
+
+    /**
+     * Sets the offset of the currently matched string in the text string to
+     * be searched.
+     * Subclasses' <tt>handleNext</tt> and <tt>handlePrev</tt>
+     * methods should call this when they find a match in the target text.
+     * @param position start offset of the matched text.
+     */
+    virtual void setMatchStart(UTextOffset position);
+
+    /**
+    * sets match not found 
+    */
+    void setMatchNotFound();
+};
+
+inline UBool SearchIterator::operator!=(const SearchIterator &that) const
+{
+   return !operator==(that); 
+}
+
+#endif
+
--- a/icu4c/source/i18n/unicode/stsearch.h
+++ b/icu4c/source/i18n/unicode/stsearch.h
@ -0,0 +1,433 @@
+/*
+**********************************************************************
+*   Copyright (C) 2001 IBM and others. All rights reserved.
+**********************************************************************
+*   Date        Name        Description
+*  03/22/2000   helena      Creation.
+**********************************************************************
+*/
+
+#ifndef STSEARCH_H
+#define STSEARCH_H
+
+#include "unicode/tblcoll.h"
+#include "unicode/coleitr.h"
+#include "unicode/search.h"
+
+/**
+ * <tt>StringSearch</tt> is a <tt>SearchIterator</tt> that provides
+ * language-sensitive text searching based on the comparison rules defined
+ * in a {@link RuleBasedCollator} object.
+ * StringSearch ensures that language eccentricity can be 
+ * handled, e.g. for the German collator, characters ß and SS will be matched 
+ * if case is chosen to be ignored. 
+ * See the <a href=http://oss.software.ibm.com/icu/develop/collation/ICU_collation_design.htm>
+ * "ICU Collation Design Document"</a> for more information.
+ * <p> 
+ * The algorithm implemented is a modified form of the Boyer Moore's search.
+ * For more information  see 
+ * <a href=http://oss.software.ibm.com/icu/docs/papers/text-search.html>
+ * "Efficient Text Searching in Java"</a>, published in <i>Java Report</i> 
+ * in February, 1999, for further information on the algorithm.
+ * <p>
+ * There are 2 match options for selection:<br>
+ * Let S' be the sub-string of a text string S between the offsets start and 
+ * end <start, end>.
+ * <br>
+ * A pattern string P matches a text string S at the offsets <start, end> 
+ * if
+ * <pre> 
+ * option 1. Some canonical equivalent of P matches some canonical equivalent 
+ *           of S'
+ * option 2. P matches S' and if P starts or ends with a combining mark, 
+ *           there exists no non-ignorable combining mark before or after S? 
+ *           in S respectively. 
+ * </pre>
+ * Option 2. will be the default·
+ * <p>
+ * This search has APIs similar to that of other text iteration mechanisms 
+ * such as the break iterators in <tt>BreakIterator</tt>. Using these 
+ * APIs, it is easy to scan through text looking for all occurances of 
+ * a given pattern. This search iterator allows changing of direction by 
+ * calling a <tt>reset</tt> followed by a <tt>next</tt> or <tt>previous</tt>. 
+ * Though a direction change can occur without calling <tt>reset</tt> first,  
+ * this operation comes with some speed penalty.
+ * Match results in the forward direction will match the result matches in 
+ * the backwards direction in the reverse order
+ * <p>
+ * <tt>SearchIterator</tt> provides APIs to specify the starting position 
+ * within the text string to be searched, e.g. <tt>setOffset</tt>,
+ * <tt>preceding</tt> and <tt>following</tt>. Since the 
+ * starting position will be set as it is specified, please take note that 
+ * there are some danger points which the search may render incorrect 
+ * results:
+ * <ul>
+ * <li> The midst of a substring that requires normalization.
+ * <li> If the following match is to be found, the position should not be the
+ *      second character which requires to be swapped with the preceding 
+ *      character. Vice versa, if the preceding match is to be found, 
+ *      position to search from should not be the first character which 
+ *      requires to be swapped with the next character. E.g certain Thai and
+ *      Lao characters require swapping.
+ * <li> If a following pattern match is to be found, any position within a 
+ *      contracting sequence except the first will fail. Vice versa if a 
+ *      preceding pattern match is to be found, a invalid starting point 
+ *      would be any character within a contracting sequence except the last.
+ * <\ul>
+ * <p>
+ * A breakiterator can be used if only matches at logical breaks are desired.
+ * <p>
+ * Options are provided to handle overlapping matches. 
+ * E.g. In English, overlapping matches produces the result 0 and 2 
+ * for the pattern "abab" in the text "ababab", where else mutually 
+ * exclusive matches only produce the result of 0.
+ * <p>
+ * Though collator attributes will be taken into consideration while 
+ * performing matches, there are no APIs here for setting and getting the 
+ * attributes. These attributes can be set by getting the collator
+ * from <tt>getCollator</tt> and using the APIs in <tt>coll.h</tt>.
+ * <p> 
+ * Restriction: <br>
+ * Currently there are no composite characters that consists of a
+ * character with combining class > 0 before a character with combining 
+ * class == 0. However, if such a character exists in the future,  
+ * StringSearch does not guarantee the results for option 1.
+ * <p>
+ * Consult the <tt>SearchIterator</tt> documentation for information on
+ * and examples of how to use instances of this class to implement text
+ * searching.
+ * <pre><code>
+ * UnicodeString target("The quick brown fox jumped over the lazy fox");
+ * UnicodeString pattern("fox");
+ *
+ * SearchIterator *iter = new StringSearch(pattern, target);
+ *
+ * for (int pos = iter->first(); pos != USEARCH_DONE; 
+ *                               pos = iter->next()) {
+ *     printf("Found match at %d pos, length is %d\n", pos, 
+ *                                             iter.getMatchLength());
+ * }
+ * </code></pre>
+ * @see SearchIterator
+ * @see RuleBasedCollator
+ */
+
+class U_I18N_API StringSearch : public SearchIterator
+{
+public:
+
+    // public constructors and destructors --------------------------------
+
+    /**
+     * Creating a <tt>StringSearch</tt> instance using the argument locale 
+     * language rule set. A collator will be created in the process, which 
+     * will be owned by this instance and will be deleted in during 
+     * destruction
+     * @param pattern The text for which this object will search.
+     * @param text    The text in which to search for the pattern.
+     * @param locale  A locale which defines the language-sensitive 
+     *                comparison rules used to determine whether text in the 
+     *                pattern and target matches. 
+     * @param breakiter A <tt>BreakIterator</tt> object used to constrain 
+     *                the matches that are found. Matches whose start and end 
+     *                indices in the target text are not boundaries as 
+     *                determined by the <tt>BreakIterator</tt> are 
+     *                ignored. If this behavior is not desired, 
+     *                <tt>NULL</tt> can be passed in instead.
+     * @param status  for errors if any
+     */
+    StringSearch(const UnicodeString &pattern, const UnicodeString &text,
+                 const Locale        &locale,       
+                       BreakIterator *breakiter,
+                       UErrorCode    &status);
+
+    /**
+     * Creating a <tt>StringSearch</tt> instance using the argument collator 
+     * language rule set. Note, user retains the ownership of this collator, 
+     * it does not get destroyed during this instance's destruction.
+     * @param pattern The text for which this object will search.
+     * @param text    The text in which to search for the pattern.
+     * @param coll    A <tt>RuleBasedCollator</tt> object which defines 
+     *                the language-sensitive comparison rules used to 
+     *                determine whether text in the pattern and target 
+     *                matches. User is responsible for the clearing of this
+     *                object.
+     * @param breakiter A <tt>BreakIterator</tt> object used to constrain 
+     *                the matches that are found. Matches whose start and end 
+     *                indices in the target text are not boundaries as 
+     *                determined by the <tt>BreakIterator</tt> are 
+     *                ignored. If this behavior is not desired, 
+     *                <tt>NULL</tt> can be passed in instead.
+     * @param status  for errors if any
+     */
+    StringSearch(const UnicodeString     &pattern, 
+                 const UnicodeString     &text,
+                       RuleBasedCollator *coll,       
+                       BreakIterator     *breakiter,
+                       UErrorCode        &status);
+
+    /**
+     * Creating a <tt>StringSearch</tt> instance using the argument locale 
+     * language rule set. A collator will be created in the process, which 
+     * will be owned by this instance and will be deleted in during 
+     * destruction
+     * <p>
+     * Note: No parsing of the text within the <tt>CharacterIterator</tt> 
+     * will be done during searching for this version. The block of text 
+     * in <tt>CharacterIterator</tt> will be used as it is.
+     * @param pattern The text for which this object will search.
+     * @param text    The text iterator in which to search for the pattern.
+     * @param locale  A locale which defines the language-sensitive 
+     *                comparison rules used to determine whether text in the 
+     *                pattern and target matches. User is responsible for 
+     *                the clearing of this object.
+     * @param breakiter A <tt>BreakIterator</tt> object used to constrain 
+     *                the matches that are found. Matches whose start and end 
+     *                indices in the target text are not boundaries as 
+     *                determined by the <tt>BreakIterator</tt> are 
+     *                ignored. If this behavior is not desired, 
+     *                <tt>NULL</tt> can be passed in instead.
+     * @param status  for errors if any
+     */
+    StringSearch(const UnicodeString &pattern, CharacterIterator &text,
+                 const Locale        &locale, 
+                       BreakIterator *breakiter,
+                       UErrorCode    &status);
+
+    /**
+     * Creating a <tt>StringSearch</tt> instance using the argument collator 
+     * language rule set. Note, user retains the ownership of this collator, 
+     * it does not get destroyed during this instance's destruction.
+     * <p>
+     * Note: No parsing of the text within the <tt>CharacterIterator</tt> 
+     * will be done during searching for this version. The block of text 
+     * in <tt>CharacterIterator</tt> will be used as it is.
+     * @param pattern The text for which this object will search.
+     * @param text    The text in which to search for the pattern.
+     * @param coll    A <tt>RuleBasedCollator</tt> object which defines 
+     *                the language-sensitive comparison rules used to 
+     *                determine whether text in the pattern and target 
+     *                matches. User is responsible for the clearing of this
+     *                object.
+     * @param breakiter A <tt>BreakIterator</tt> object used to constrain 
+     *                the matches that are found. Matches whose start and end 
+     *                indices in the target text are not boundaries as 
+     *                determined by the <tt>BreakIterator</tt> are 
+     *                ignored. If this behavior is not desired, 
+     *                <tt>NULL</tt> can be passed in instead.
+     * @param status  for errors if any
+     */
+    StringSearch(const UnicodeString     &pattern, CharacterIterator &text,
+                       RuleBasedCollator *coll, 
+                       BreakIterator     *breakiter,
+                       UErrorCode        &status);
+
+    /**
+     * Copy constructor that creates a StringSearch instance with the same 
+     * behavior, and iterating over the same text.
+     * @param that StringSearch instance to be copied.
+     */
+    StringSearch(const StringSearch &that);
+
+    /**
+    * Destructor. Cleans up the search iterator data struct.
+    * If a collator is created in the constructor, it will be destroyed here.
+    */
+    virtual ~StringSearch(void);
+
+    // operator overloading ---------------------------------------------
+
+    /**
+     * Assignment operator. Sets this iterator to have the same behavior,
+     * and iterate over the same text, as the one passed in.
+     * @param that instance to be copied.
+     */
+    virtual StringSearch & operator=(const StringSearch &that);
+
+    /**
+     * Equality operator. 
+     * @param that instance to be compared.
+     * @return TRUE if both instances have the same attributes, 
+     *         breakiterators, collators and iterate over the same text 
+     *         while looking for the same pattern.
+     */
+    virtual UBool operator==(const SearchIterator &that) const;
+
+    // public get and set methods ----------------------------------------
+
+    /**
+     * Sets the index to point to the given position, and clears any state 
+     * that's affected.
+     * <p>
+     * This method takes the argument index and sets the position in the text 
+     * string accordingly without checking if the index is pointing to a 
+     * valid starting point to begin searching. 
+     * @param position within the text to be set
+     * @param status for errors if it occurs
+     */
+    virtual void setOffset(UTextOffset position, UErrorCode &status);
+
+    /**
+     * Return the current index in the text being searched.
+     * If the iteration has gone past the end of the text
+     * (or past the beginning for a backwards search), {@link #USEARCH_DONE} 
+     * is returned.
+     * @return current index in the text being searched.
+     */
+    virtual UTextOffset getOffset(void) const;
+
+    /**
+     * Set the target text to be searched.
+     * Text iteration will hence begin at the start of the text string. 
+     * This method is 
+     * useful if you want to re-use an iterator to search for the same 
+     * pattern within a different body of text.
+     * @param text text string to be searched
+     * @param status  for errors if any
+     */
+    virtual void setText(const UnicodeString &text, UErrorCode &status);
+    
+    /**
+     * Set the target text to be searched.
+     * Text iteration will hence begin at the start of the text string. 
+     * This method is 
+     * useful if you want to re-use an iterator to search for the same 
+     * pattern within a different body of text.
+     * Note: No parsing of the text within the <tt>CharacterIterator</tt> 
+     * will be done during searching for this version. The block of text 
+     * in <tt>CharacterIterator</tt> will be used as it is.
+     * @param text text string to be searched
+     * @param status  for errors if any
+     */
+    virtual void setText(CharacterIterator &text, UErrorCode &status);
+
+    /**
+     * Gets the collator used for the language rules. 
+     * <p>
+     * Deleting the returned <tt>RuleBasedCollator</tt> before calling 
+     * the destructor would cause the string search to fail.
+     * The destructor will delete the collator if this instance owns it
+     * @return collator used for string search
+     */
+    RuleBasedCollator * getCollator() const;
+    
+    /**
+     * Sets the collator used for the language rules. User retains the 
+     * ownership of this collator, thus the responsibility of deletion lies 
+     * with the user. This method causes internal data such as Boyer-Moore 
+     * shift tables to be recalculated, but the iterator's position is 
+     * unchanged.
+     * @param coll    collator 
+     * @param status  for errors if any
+     */
+    void setCollator(RuleBasedCollator *coll, UErrorCode &status);
+    
+    /**
+     * Sets the pattern used for matching.
+     * Internal data like the Boyer Moore table will be recalculated, but 
+     * the iterator's position is unchanged.
+     * @param pattern search pattern to be found
+     * @param status for errors if any
+     */
+    void setPattern(const UnicodeString &pattern, UErrorCode &status);
+    
+    /**
+     * Gets the search pattern.
+     * @return pattern used for matching
+     */
+    const UnicodeString & getPattern() const;
+
+    // public methods ----------------------------------------------------
+
+    /** 
+     * Reset the iteration.
+     * Search will begin at the start of the text string if a forward 
+     * iteration is initiated before a backwards iteration. Otherwise if 
+     * a backwards iteration is initiated before a forwards iteration, the 
+     * search will begin at the end of the text string.
+     */
+    virtual void reset();
+
+    /**
+     * Returns a copy of StringSearch with the same behavior, and 
+     * iterating over the same text, as this one. Note that all data will be
+     * replicated, except for the user-specified collator and the
+     * breakiterator.
+     * @return cloned object
+     */
+    virtual SearchIterator * safeClone(void) const;
+    
+protected:
+
+    // protected method -------------------------------------------------
+
+    /**
+     * Search forward for matching text, starting at a given location.
+     * Clients should not call this method directly; instead they should 
+     * call {@link SearchIterator#next}.
+     * <p>
+     * If a match is found, this method returns the index at which the match
+     * starts and calls {@link SearchIterator#setMatchLength} with the number 
+     * of characters in the target text that make up the match. If no match 
+     * is found, the method returns <tt>USEARCH_DONE</tt>.
+     * <p>
+     * The <tt>StringSearch</tt> is adjusted so that its current index 
+     * (as returned by {@link #getOffset}) is the match position if one was 
+     * found.
+     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+     * the <tt>StringSearch</tt> will be adjusted to the index USEARCH_DONE.
+     * @param position The index in the target text at which the search 
+     *                 starts
+     * @param status for errors if any occurs
+     * @return The index at which the matched text in the target starts, or 
+     *         USEARCH_DONE if no match was found.
+     */
+    virtual UTextOffset handleNext(UTextOffset position, UErrorCode &status);
+
+    /**
+     * Search backward for matching text, starting at a given location.
+     * Clients should not call this method directly; instead they should call
+     * <tt>SearchIterator.previous()</tt>, which this method overrides.
+     * <p>
+     * If a match is found, this method returns the index at which the match
+     * starts and calls {@link SearchIterator#setMatchLength} with the number 
+     * of characters in the target text that make up the match. If no match 
+     * is found, the method returns <tt>USEARCH_DONE</tt>.
+     * <p>
+     * The <tt>StringSearch</tt> is adjusted so that its current index 
+     * (as returned by {@link #getOffset}) is the match position if one was 
+     * found.
+     * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+     * the <tt>StringSearch</tt> will be adjusted to the index USEARCH_DONE.
+     * @param position The index in the target text at which the search 
+     *                 starts.
+     * @param status for errors if any occurs
+     * @return The index at which the matched text in the target starts, or 
+     *         USEARCH_DONE if no match was found.
+     */
+    virtual UTextOffset handlePrev(UTextOffset position, UErrorCode &status);
+    
+private :
+
+    // private data members ----------------------------------------------
+
+    /**
+    * RuleBasedCollator, contains exactly the same UCollator * in m_strsrch_
+    */
+    RuleBasedCollator  m_collator_;
+    /**
+    * Pattern text
+    */
+    UnicodeString      m_pattern_;
+    /**
+    * Corresponding collation rules
+    */
+    UnicodeString      m_collation_rules_;
+    /**
+    * String search struct data
+    */
+    UStringSearch     *m_strsrch_;
+};
+
+#endif
+
--- a/icu4c/source/i18n/unicode/tblcoll.h
+++ b/icu4c/source/i18n/unicode/tblcoll.h
@ -849,6 +849,11 @@ private:
  */
  friend class Collator;

+  /**
+  * Searching over collation elements in a character source
+  */
+  friend class StringSearch;
+
  // private constructors --------------------------------------------------

  /**
@ -893,10 +898,24 @@ private:
  /**
  * Creates the c struct for ucollator
  * @param collator new ucollator data
-  * @param status error status
  */
  void setUCollator(UCollator *collator);

+  /**
+  * Creates the c struct for ucollator. This used internally by StringSearch.
+  * Hence the responsibility of cleaning up the ucollator is not done by
+  * this RuleBasedCollator. The isDataOwned flag is set to FALSE.
+  * @param collator new ucollator data
+  * @param rules corresponding collation rules
+  */
+  void setUCollator(UCollator *collator, UnicodeString *rules);
+
+  /**
+  * Get UCollator data struct. Used only by StringSearch.
+  * @return UCollator data struct
+  */
+  const UCollator * getUCollator();
+
  /**
  * Converts C's UCollationResult to EComparisonResult
  * @param result member of the enum UComparisonResult
@ -947,11 +966,29 @@ inline void RuleBasedCollator::setUCollator(const Locale &locale,

 inline void RuleBasedCollator::setUCollator(UCollator *collator)
 {
-  if (ucollator && dataIsOwned)
+  if (ucollator && dataIsOwned) {
    ucol_close(ucollator);
+  }
  ucollator = collator;
 }

+inline void RuleBasedCollator::setUCollator(UCollator     *collator, 
+                                            UnicodeString *rules)
+{
+    if (ucollator && dataIsOwned) {
+        ucol_close(ucollator);
+        delete urulestring;
+    }
+    ucollator   = collator;
+    urulestring = rules;
+    dataIsOwned = FALSE;
+}
+
+inline const UCollator * RuleBasedCollator::getUCollator()
+{
+    return ucollator;
+}
+
 inline Collator::EComparisonResult RuleBasedCollator::getEComparisonResult(
                                           const UCollationResult &result) const
 {
--- a/icu4c/source/i18n/unicode/usearch.h
+++ b/icu4c/source/i18n/unicode/usearch.h
@ -0,0 +1,546 @@
+/*
+**********************************************************************
+*   Copyright (C) 2001 IBM and others. All rights reserved.
+**********************************************************************
+*   Date        Name        Description
+*  06/28/2001   synwee      Creation.
+**********************************************************************
+*/
+#ifndef USEARCH_H
+#define USEARCH_H
+
+#include "unicode/utypes.h"
+#include "unicode/ucol.h"
+#include "unicode/ucoleitr.h"
+#include "unicode/ubrk.h"
+
+/**
+ * C Apis for an engine that provides language-sensitive text searching based 
+ * on the comparison rules defined in a <tt>UCollator</tt> data struct,
+ * see <tt>ucol.h</tt>. This ensures that language eccentricity can be 
+ * handled, e.g. for the German collator, characters ß and SS will be matched 
+ * if case is chosen to be ignored. 
+ * See the <a href=http://oss.software.ibm.com/icu/develop/collation/ICU_collation_design.htm>
+ * "ICU Collation Design Document"</a> for more information.
+ * <p> 
+ * The algorithm implemented is a modified form of the Boyer Moore's search.
+ * For more information  see 
+ * <a href=http://oss.software.ibm.com/icu/docs/papers/text-search.html>
+ * "Efficient Text Searching in Java"</a>, published in <i>Java Report</i> 
+ * in February, 1999, for further information on the algorithm.
+ * <p>
+ * There are 2 match options for selection:<br>
+ * Let S' be the sub-string of a text string S between the offsets start and 
+ * end <start, end>.
+ * <br>
+ * A pattern string P matches a text string S at the offsets <start, end> 
+ * if
+ * <pre> 
+ * option 1. Some canonical equivalent of P matches some canonical equivalent 
+ *           of S'
+ * option 2. P matches S' and if P starts or ends with a combining mark, 
+ *           there exists no non-ignorable combining mark before or after S’ 
+ *           in S respectively. 
+ * </pre>
+ * Option 2. will be the default·
+ * <p>
+ * This search has APIs similar to that of other text iteration mechanisms 
+ * such as the break iterators in <tt>ubrk.h</tt>. Using these 
+ * APIs, it is easy to scan through text looking for all occurances of 
+ * a given pattern. This search iterator allows changing of direction by 
+ * calling a <tt>reset</tt> followed by a <tt>next</tt> or <tt>previous</tt>. 
+ * Though a direction change can occur without calling <tt>reset</tt> first,  
+ * this operation comes with some speed penalty.
+ * Generally, match results in the forward direction will match the result 
+ * matches in the backwards direction in the reverse order
+ * <p>
+ * <tt>usearch.h</tt> provides APIs to specify the starting position 
+ * within the text string to be searched, e.g. <tt>usearch_setOffset</tt>,
+ * <tt>usearch_preceding</tt> and <tt>usearch_following</tt>. Since the 
+ * starting position will be set as it is specified, please take note that 
+ * there are some dangerous positions which the search may render incorrect 
+ * results:
+ * <ul>
+ * <li> The midst of a substring that requires normalization.
+ * <li> If the following match is to be found, the position should not be the
+ *      second character which requires to be swapped with the preceding 
+ *      character. Vice versa, if the preceding match is to be found, 
+ *      position to search from should not be the first character which 
+ *      requires to be swapped with the next character. E.g certain Thai and
+ *      Lao characters require swapping.
+ * <li> If a following pattern match is to be found, any position within a 
+ *      contracting sequence except the first will fail. Vice versa if a 
+ *      preceding pattern match is to be found, a invalid starting point 
+ *      would be any character within a contracting sequence except the last.
+ * <\ul>
+ * <p>
+ * A breakiterator can be used if only matches at logical breaks are desired.
+ * <p>
+ * Options are provided to handle overlapping matches. 
+ * E.g. In English, overlapping matches produces the result 0 and 2 
+ * for the pattern "abab" in the text "ababab", where else mutually 
+ * exclusive matches only produce the result of 0.
+ * <p>
+ * Though collator attributes will be taken into consideration while 
+ * performing matches, there are no APIs here for setting and getting the 
+ * attributes. These attributes can be set by getting the collator
+ * from <tt>usearch_getCollator</tt> and using the APIs in <tt>ucol.h</tt>.
+ * <p> 
+ * Restriction: <br>
+ * Currently there are no composite characters that consists of a
+ * character with combining class > 0 before a character with combining 
+ * class == 0. However, if such a character exists in the future, the 
+ * search mechanism does not guarantee the results for option 1.
+ * 
+ * <p>
+ * Example of use:<br>
+ * <pre><code>
+ * char *tgtstr = "The quick brown fox jumped over the lazy fox";
+ * char *patstr = "fox";
+ * UChar target[64];
+ * UChar pattern[16];
+ * UErrorCode status = U_ZERO_ERROR;
+ * u_uastrcpy(target, tgtstr);
+ * u_uastrcpy(pattern, patstr);
+ *
+ * UStringSearch *search = usearch_open(pattern, -1, target, -1, "en_US", 
+ *                                  &status);
+ * if (U_SUCCESS(status)) {
+ *     for (int pos = usearch_first(search); 
+ *                                      pos != USEARCH_DONE; 
+ *                                      pos = usearch_next(search)) {
+ *         printf("Found match at %d pos, length is %d\n", pos, 
+ *                                        usearch_getMatchLength(search));
+ *     }
+ * }
+ * </code></pre>
+ */
+
+/**
+* DONE is returned by previous() and next() after all valid matches have 
+* been returned, and by first() and last() if there are no matches at all.
+*/
+#define USEARCH_DONE -1
+
+/**
+* Data structure for searching
+*/
+struct UStringSearch;
+typedef struct UStringSearch UStringSearch;
+
+typedef enum {
+    /** Option for overlapping matches */
+    USEARCH_OVERLAP,
+    /** 
+    Option for canonical matches. option 1 in header documentation.
+    The default value will be USEARCH_OFF
+    */
+    USEARCH_CANONICAL_MATCH,
+    USEARCH_ATTRIBUTE_COUNT
+} USearchAttribute;
+
+typedef enum {
+    /** default value for any USearchAttribute */
+    USEARCH_DEFAULT = -1,
+    /** value for USEARCH_OVERLAP and USEARCH_CANONICAL_MATCH */
+    USEARCH_OFF, 
+    /** value for USEARCH_OVERLAP and USEARCH_CANONICAL_MATCH */
+    USEARCH_ON,
+    USEARCH_ATTRIBUTE_VALUE_COUNT
+} USearchAttributeValue;
+
+/* open and close ------------------------------------------------------ */
+
+/**
+* Creating a search iterator data struct using the argument locale language
+* rule set. A collator will be created in the process, which will be owned by
+* this search and will be deleted in <tt>usearch_close</tt>.
+* @param pattern for matching
+* @param patternlength length of the pattern, -1 for null-termination
+* @param text text string
+* @param textlength length of the text string, -1 for null-termination
+* @param locale name of locale for the rules to be used
+* @param breakiter A BreakIterator that will be used to restrict the points
+*                  at which matches are detected. If a match is found, but 
+*                  the match's start or end index is not a boundary as 
+*                  determined by the <tt>BreakIterator</tt>, the match will 
+*                  be rejected and another will be searched for. 
+*                  If this parameter is <tt>NULL</tt>, no break detection is 
+*                  attempted.
+* @param status for errors if it occurs
+* @return search iterator data structure
+*/
+U_CAPI UStringSearch * U_EXPORT2 usearch_open(const UChar          *pattern, 
+                                              int32_t         patternlength, 
+                                        const UChar          *text, 
+                                              int32_t         textlength,
+                                        const char           *locale,
+                                              UBreakIterator *breakiter,
+                                              UErrorCode     *status);
+
+/**
+* Creating a search iterator data struct using the argument collator language
+* rule set. Note, user retains the ownership of this collator, thus the 
+* responsibility of deletion lies with the user.
+* @param pattern for matching
+* @param patternlength length of the pattern, -1 for null-termination
+* @param text text string
+* @param textlength length of the text string, -1 for null-termination
+* @param collator used for the language rules
+* @param breakiter A BreakIterator that will be used to restrict the points
+*                  at which matches are detected. If a match is found, but 
+*                  the match's start or end index is not a boundary as 
+*                  determined by the <tt>BreakIterator</tt>, the match will 
+*                  be rejected and another will be searched for. 
+*                  If this parameter is <tt>NULL</tt>, no break detection is 
+*                  attempted.
+* @param status for errors if it occurs
+* @return search iterator data structure
+*/
+U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator(
+                                         const UChar *pattern, 
+                                               int32_t         patternlength,
+                                         const UChar          *text, 
+                                               int32_t         textlength,
+                                         const UCollator      *collator,
+                                               UBreakIterator *breakiter,
+                                               UErrorCode     *status);
+
+/**
+* Destroying and cleaning up the search iterator data struct.
+* If a collator is created in usearch_open, it will be destroyed here.
+* @param searchiter data struct to clean up
+*/
+U_CAPI void U_EXPORT2 usearch_close(UStringSearch *strsrch);
+
+/* get and set methods -------------------------------------------------- */
+
+/**
+* Sets the current position in the text string which the next search will 
+* start from. Clears previous states. 
+* This method takes the argument index and sets the position in the text 
+* string accordingly without checking if the index is pointing to a 
+* valid starting point to begin searching. 
+* Search positions that may render incorrect results are highlighted in the
+* header comments
+* @param strsrch search iterator data struct
+* @param position position to start next search from.
+* @param status error status if any.
+*/
+U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch, 
+                                        UTextOffset    position,
+                                        UErrorCode    *status);
+
+/**
+* Return the current index in the string text being searched.
+* If the iteration has gone past the end of the text (or past the beginning 
+* for a backwards search), {@link #USEARCH_DONE} is returned.
+* @param strsrch search iterator data struct
+*/
+U_CAPI UTextOffset U_EXPORT2 usearch_getOffset(const UStringSearch *strsrch);
+    
+/**
+* Sets the text searching attributes located in the enum USearchAttribute
+* with values from the enum USearchAttributeValue.
+* USEARCH_DEFAULT can be used for all attributes for resetting.
+* @param strsrch search iterator data struct
+* @param attribute text attribute to be set
+* @param value text attribute value
+* @param status for errors if it occurs
+* @see #usearch_getAttribute
+*/
+U_CAPI void U_EXPORT2 usearch_setAttribute(UStringSearch         *strsrch, 
+                                           USearchAttribute       attribute,
+                                           USearchAttributeValue  value,
+                                           UErrorCode            *status);
+
+/**    
+* Gets the text searching attributes.
+* @param strsrch search iterator data struct
+* @param attribute text attribute to be retrieve
+* @return text attribute value
+* @see #usearch_setAttribute
+*/
+U_CAPI USearchAttributeValue U_EXPORT2 usearch_getAttribute(
+                                         const UStringSearch    *strsrch,
+                                               USearchAttribute  attribute);
+
+/**
+* Returns the index to the match in the text string that was searched.
+* This call returns a valid result only after a successful call to 
+* {@link #usearch_first}, {@link #usearch_next}, 
+* {@link #usearch_previous}, or {@link #usearch_last}.
+* Just after construction, or after a searching method returns 
+* <tt>USEARCH_DONE</tt>, this method will return <tt>USEARCH_DONE</tt>.
+* <p>
+* Use usearch_getMatchedLength to get the matched string length.
+* @param strsrch search iterator data struct
+* @return index to a substring within the text string that is being 
+*         searched.
+*/
+U_CAPI UTextOffset U_EXPORT2 usearch_getMatchedStart(
+                                               const UStringSearch *strsrch);
+    
+/**
+* Returns the length of text in the string which matches the search pattern. 
+* This call returns a valid result only after a successful call to 
+* {@link #usearch_first}, {@link #usearch_next}, 
+* {@link #usearch_previous}, or {@link #usearch_last}.
+* Just after construction, or after a searching method returns 
+* <tt>USEARCH_DONE</tt>, this method will return 0.
+* @param strsrch search iterator data struct
+* @return The length of the match in the string text, or 0 if there is no 
+*         match currently.
+*/
+U_CAPI int32_t U_EXPORT2 usearch_getMatchedLength(
+                                               const UStringSearch *strsrch);
+
+/**
+* Returns the text that was matched by the most recent call to 
+* {@link #usearch_first}, {@link #usearch_next}, 
+* {@link #usearch_previous}, or {@link #usearch_last}.
+* If the iterator is not pointing at a valid match (e.g. just after 
+* construction or after <tt>USEARCH_DONE</tt> has been returned, returns
+* an empty string. If result is not large enough to store the matched text,
+* result will be filled with the partial text and an U_BUFFER_OVERFLOW_ERROR 
+* will be returned in status. result will be null-terminated whenever 
+* possible. If the buffer fits the matched text exactly, a null-termination 
+* is not possible, then a U_STRING_NOT_TERMINATED_ERROR set in status.
+* Pre-flighting can be either done with length = 0 or the API 
+* usearch_getMatchLength().
+* @param strsrch search iterator data struct
+* @param result UChar buffer to store the matched string
+* @param resultCapacity length of the result buffer
+* @param status error returned if result is not large enough
+* @return exact length of the matched text, not counting the null-termination
+*/
+U_CAPI int32_t U_EXPORT2 usearch_getMatchedText(const UStringSearch *strsrch, 
+                                            UChar         *result, 
+                                            int32_t        resultCapacity, 
+                                            UErrorCode    *status);
+
+/**
+* Set the BreakIterator that will be used to restrict the points at which 
+* matches are detected.
+* @param strsrch search iterator data struct
+* @param breakiter A BreakIterator that will be used to restrict the points
+*                  at which matches are detected. If a match is found, but 
+*                  the match's start or end index is not a boundary as 
+*                  determined by the <tt>BreakIterator</tt>, the match will 
+*                  be rejected and another will be searched for. 
+*                  If this parameter is <tt>NULL</tt>, no break detection is 
+*                  attempted.
+* @param status for errors if it occurs
+* @see #usearch_getBreakIterator
+*/
+U_CAPI void U_EXPORT2 usearch_setBreakIterator(UStringSearch  *strsrch, 
+                                               UBreakIterator *breakiter,
+                                               UErrorCode     *status);
+    
+/**
+* Returns the BreakIterator that is used to restrict the points at which 
+* matches are detected. This will be the same object that was passed to the 
+* constructor or to <tt>usearch_setBreakIterator</tt>. Note that 
+* <tt>NULL</tt> 
+* is a legal value; it means that break detection should not be attempted.
+* @param strsrch search iterator data struct
+* @return break iterator used
+* @see #usearch_setBreakIterator
+*/
+U_CAPI const UBreakIterator * U_EXPORT2 usearch_getBreakIterator(
+                                              const UStringSearch *strsrch);
+    
+/**
+* Set the string text to be searched. Text iteration will hence begin at the 
+* start of the text string. This method is useful if you want to re-use an 
+* iterator to search for the same pattern within a different body of text.
+* @param strsrch search iterator data struct
+* @param text new string to look for match
+* @param textlength length of the new string, -1 for null-termination
+* @param status for errors if it occurs
+* @see #usearch_getText
+*/
+U_CAPI void U_EXPORT2 usearch_setText(      UStringSearch *strsrch, 
+                                      const UChar         *text,
+                                            int32_t        textlength,
+                                            UErrorCode    *status);
+
+/**
+* Return the string text to be searched.
+* @param strsrch search iterator data struct
+* @param length returned string text length
+* @return string text 
+* @see #usearch_setText
+*/
+U_CAPI const UChar * U_EXPORT2 usearch_getText(const UStringSearch *strsrch, 
+                                               int32_t       *length);
+
+/**
+* Gets the collator used for the language rules. 
+* <p>
+* Deleting the returned <tt>UCollator</tt> before calling 
+* <tt>usearch_close</tt> would cause the string search to fail.
+* <tt>usearch_close</tt> will delete the collator if this search owns it.
+* @param strsrch search iterator data struct
+* @return collator
+*/
+U_CAPI UCollator * U_EXPORT2 usearch_getCollator(
+                                               const UStringSearch *strsrch);
+
+/**
+* Sets the collator used for the language rules. User retains the ownership 
+* of this collator, thus the responsibility of deletion lies with the user.
+* This method causes internal data such as Boyer-Moore shift tables to  
+* be recalculated, but the iterator's position is unchanged.
+* @param strsrch search iterator data struct
+* @param collator to be used
+* @param status for errors if it occurs
+*/
+U_CAPI void U_EXPORT2 usearch_setCollator(      UStringSearch *strsrch, 
+                                          const UCollator     *collator,
+                                                UErrorCode    *status);
+
+/**
+* Sets the pattern used for matching.
+* Internal data like the Boyer Moore table will be recalculated, but the 
+* iterator's position is unchanged.
+* @param strsrch search iterator data struct
+* @param pattern string
+* @param patternlength pattern length, -1 for null-terminated string
+* @param status for errors if it occurs
+*/
+U_CAPI void U_EXPORT2 usearch_setPattern(      UStringSearch *strsrch, 
+                                         const UChar         *pattern,
+                                               int32_t        patternlength,
+                                               UErrorCode    *status);
+
+/**
+* Gets the search pattern
+* @param strsrch search iterator data struct
+* @param length return length of the pattern, -1 indicates that the pattern 
+*               is null-terminated
+* @return pattern string
+*/
+U_CAPI const UChar * U_EXPORT2 usearch_getPattern(
+                                               const UStringSearch *strsrch, 
+                                                     int32_t       *length);
+
+/* methods ------------------------------------------------------------- */
+
+/**
+* Returns the first index at which the string text matches the search 
+* pattern.  
+* The iterator is adjusted so that its current index (as returned by 
+* {@link #usearch_getOffset}) is the match position if one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index USEARCH_DONE.
+* @param strsrch search iterator data struct
+* @param status for errors if it occurs
+* @return The character index of the first match, or 
+* <tt>USEARCH_DONE</tt> if there are no matches.
+*/
+U_CAPI UTextOffset U_EXPORT2 usearch_first(UStringSearch *strsrch, 
+                                           UErrorCode    *status);
+
+/**
+* Returns the first index greater than <tt>position</tt> at which the string 
+* text 
+* matches the search pattern. The iterator is adjusted so that its current 
+* index (as returned by {@link #usearch_getOffset}) is the match position if 
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index USEARCH_DONE
+* <p>
+* Search positions that may render incorrect results are highlighted in the
+* header comments.
+* @param strsrch search iterator data struct
+* @param position to start the search at
+* @param status for errors if it occurs
+* @return The character index of the first match following <tt>pos</tt>,
+*         or <tt>USEARCH_DONE</tt> if there are no matches.
+*/
+U_CAPI UTextOffset U_EXPORT2 usearch_following(UStringSearch *strsrch, 
+                                               UTextOffset    position, 
+                                               UErrorCode    *status);
+    
+/**
+* Returns the last index in the target text at which it matches the search 
+* pattern. The iterator is adjusted so that its current 
+* index (as returned by {@link #usearch_getOffset}) is the match position if 
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index USEARCH_DONE.
+* @param strsrch search iterator data struct
+* @param status for errors if it occurs
+* @return The index of the first match, or <tt>USEARCH_DONE</tt> if there 
+*         are no matches.
+*/
+U_CAPI UTextOffset U_EXPORT2 usearch_last(UStringSearch *strsrch, 
+                                          UErrorCode    *status);
+
+/**
+* Returns the first index less than <tt>position</tt> at which the string text 
+* matches the search pattern. The iterator is adjusted so that its current 
+* index (as returned by {@link #usearch_getOffset}) is the match position if 
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index USEARCH_DONE
+* <p>
+* Search positions that may render incorrect results are highlighted in the
+* header comments.
+* @param strsrch search iterator data struct
+* @param position index position the search is to begin at
+* @param status for errors if it occurs
+* @return The character index of the first match preceding <tt>pos</tt>,
+*         or <tt>USEARCH_DONE</tt> if there are no matches.
+*/
+U_CAPI UTextOffset U_EXPORT2 usearch_preceding(UStringSearch *strsrch, 
+                                               UTextOffset    position, 
+                                               UErrorCode    *status);
+    
+/**
+* Returns the index of the next point at which the string text matches the
+* search pattern, starting from the current position.
+* The iterator is adjusted so that its current 
+* index (as returned by {@link #usearch_getOffset}) is the match position if 
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index USEARCH_DONE
+* @param strsrch search iterator data struct
+* @param status for errors if it occurs
+* @return The index of the next match after the current position, or 
+*         <tt>USEARCH_DONE</tt> if there are no more matches.
+* @see #usearch_first
+*/
+U_CAPI UTextOffset U_EXPORT2 usearch_next(UStringSearch *strsrch, 
+                                          UErrorCode    *status);
+
+/**
+* Returns the index of the previous point at which the string text matches
+* the search pattern, starting at the current position.
+* The iterator is adjusted so that its current 
+* index (as returned by {@link #usearch_getOffset}) is the match position if 
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index USEARCH_DONE
+* @param strsrch search iterator data struct
+* @param status for errors if it occurs
+* @return The index of the previous match before the current position,
+*         or <tt>USEARCH_DONE</tt> if there are no more matches.
+*/
+U_CAPI UTextOffset U_EXPORT2 usearch_previous(UStringSearch *strsrch, 
+                                              UErrorCode    *status);
+    
+/** 
+* Reset the iteration.
+* Search will begin at the start of the text string if a forward iteration 
+* is initiated before a backwards iteration. Otherwise if a backwards 
+* iteration is initiated before a forwards iteration, the search will begin
+* at the end of the text string.
+* @param strsrch search iterator data struct
+*/
+U_CAPI void U_EXPORT2 usearch_reset(UStringSearch *strsrch);
+
+#endif
+
+
--- a/icu4c/source/i18n/usearch.cpp
+++ b/icu4c/source/i18n/usearch.cpp
--- a/icu4c/source/i18n/usrchimp.h
+++ b/icu4c/source/i18n/usrchimp.h
@ -0,0 +1,123 @@
+/*
+**********************************************************************
+*   Copyright (C) 2001 IBM and others. All rights reserved.
+**********************************************************************
+*   Date        Name        Description
+*  08/13/2001   synwee      Creation.
+**********************************************************************
+*/
+#ifndef USRCHIMP_H
+#define USRCHIMP_H
+
+#include "unicode/utypes.h"
+#include "unicode/ucol.h"
+#include "unicode/ucoleitr.h"
+#include "unicode/ubrk.h"
+
+#define INITIAL_ARRAY_SIZE_       256
+#define MAX_TABLE_SIZE_           256
+
+struct USearch {
+    // required since collation element iterator does not have a getText API
+    const UChar              *text;
+          int32_t             textLength; // exact length
+          UBool               isOverlap;
+          UBool               isCanonicalMatch;
+          UBreakIterator     *breakIter;
+    // value USEARCH_DONE is the default value
+    // if we are not at the start of the text or the end of the text, 
+    // depending on the iteration direction and matchedIndex is USEARCH_DONE 
+    // it means that we can find any more matches in that particular direction
+          UTextOffset         matchedIndex; 
+          int32_t             matchedLength;
+          UBool               isForwardSearching;
+          UBool               reset;
+};
+
+struct UPattern {
+    const UChar    *text;
+          int32_t   textLength; // exact length
+          // length required for backwards ce comparison
+          int32_t   CELength; 
+          uint32_t *CE;
+          uint32_t  CEBuffer[INITIAL_ARRAY_SIZE_];
+          UBool     hasPrefixAccents;
+          UBool     hasSuffixAccents;
+          int32_t   defaultShiftSize;
+          int32_t   shift[MAX_TABLE_SIZE_];
+          int32_t   backShift[MAX_TABLE_SIZE_];
+};
+
+struct UStringSearch {
+    struct USearch            *search;
+    struct UPattern            pattern;
+    const  UCollator          *collator;
+    // positions within the collation element iterator is used to determine
+    // if we are at the start of the text.
+           UCollationElements *textIter;
+           UBool               ownCollator;
+           UBool               toNormalize;
+           UCollationStrength  strength;
+           uint32_t            ceMask;
+           uint32_t            variableTop;
+           UBool               toShift;
+           UChar               canonicalPrefixAccents[INITIAL_ARRAY_SIZE_];
+           UChar               canonicalSuffixAccents[INITIAL_ARRAY_SIZE_];
+};
+
+/**
+* Exact matches without checking for the ends for extra accents.
+* The match after the position within the collation element iterator is to be
+* found. 
+* After a match is found the offset in the collation element iterator will be
+* shifted to the start of the match.
+* Implementation note: 
+* For tertiary we can't use the collator->tertiaryMask, that is a 
+* preprocessed mask that takes into account case options. since we are only 
+* concerned with exact matches, we don't need that.
+* Alternate handling - since only the 16 most significant digits is only used, 
+* we can safely do a compare without masking if the ce is a variable, we mask 
+* and get only the primary values no shifting to quartenary is required since 
+* all primary values less than variabletop will need to be masked off anyway.
+* If the end character is composite and the pattern ce does not match the text 
+* ce, we skip it until we find a match in the end composite character or when 
+* it has passed the character. This is so that we can match pattern "a" with
+* the text "\u00e6" 
+* @param strsrch string search data
+* @param status error status if any
+* @return TRUE if an exact match is found, FALSE otherwise
+*/
+UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status);
+
+/**
+* Canonical matches.
+* According to the definition, matches found here will include the whole span 
+* of beginning and ending accents if it overlaps that region.
+* @param strsrch string search data
+* @param status error status if any
+* @return TRUE if a canonical match is found, FALSE otherwise
+*/
+UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status);
+
+/**
+* Gets the previous match.
+* Comments follows from handleNextExact
+* @param strsrch string search data
+* @param status error status if any
+*/
+UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status);
+
+/**
+* Canonical matches.
+* According to the definition, matches found here will include the whole span 
+* of beginning and ending accents if it overlaps that region.
+* @param strsrch string search data
+* @param status error status if any
+* @return TRUE if a canonical match is found, FALSE otherwise
+*/
+UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, 
+                                      UErrorCode    *status);
+
+#endif
+
+