ICU-1004 New sample codes for Boyer Moore search.

X-SVN-Rev: 5030
2025-04-14 17:24:01 +00:00 · 2001-06-20 21:47:31 +00:00 · 2001-06-20 21:47:31 +00:00 · 1973c85169
commit 1973c85169
parent e9f3387660
7 changed files with 2135 additions and 0 deletions
--- a/icu4c/source/samples/search/search.cpp
+++ b/icu4c/source/samples/search/search.cpp
@ -0,0 +1,170 @@
+/**************************************************************************
+*
+*   Copyright (C) 2000, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+***************************************************************************
+*   file name:  colex.cpp
+*
+*   created on: 2001June8
+*   created by: Helena Shih
+*
+*   Sample code for the ICU Search C++ routines.  
+*/
+#include <stdio.h>
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/locid.h"
+
+#include "strsrch.h"
+
+int main()
+{
+   UErrorCode status = U_ZERO_ERROR;
+   UnicodeString target("A quick fox jumped over the lazy dog.", "");
+   UnicodeString easyPatterns[] = {"FoX", "CAT", "jump", "under" };
+   int exactOffsets[] = { -1, -1, 12, -1 };
+   int tertiaryOffsets[] = { 8, -1, 12, -1 };
+   uint32_t patternIndex[] = { 3, 9, 13, 17 };
+   UnicodeString monkeyTarget("abcdefgh");
+   UnicodeString monkeyTarget2("ijklmnop");
+
+   int i, j;
+   int pos = 0; 
+   StringSearch *searchIter = new StringSearch(easyPatterns[0], target, status);
+   fprintf(stdout, "\n");
+   if (U_FAILURE(status))
+   {
+        fprintf(stderr, "Failed to create a StringSearch object for the default locale.\n");
+   }
+   fprintf(stdout, "Try with default normalization mode and strength.\n");
+   for (i = 0; TRUE; i++)
+   {
+       status = U_ZERO_ERROR;
+       searchIter->reset();
+       pos = searchIter->next();
+       if ( pos != exactOffsets[i] )
+          fprintf(stdout, "Exact match failed at the index %d pattern.\n", i);
+       
+       if (i + 1 == 4) {
+           break;
+       }
+
+       searchIter->setPattern(easyPatterns[i+1], status);
+       if (U_FAILURE(status))
+       {
+            fprintf(stderr, "Failed to set a pattern for %d element.\n", i);
+            continue;
+       }
+   }
+   fprintf(stdout, "Try now with strength == primary.\n");
+   status = U_ZERO_ERROR;
+   searchIter->setStrength(Collator::PRIMARY, status);
+   if (U_FAILURE(status))
+   {
+        fprintf(stderr, "Failed to set strength of the string search object.\n");
+   }
+   searchIter->reset();
+   searchIter->setPattern(easyPatterns[0], status);
+   if (U_FAILURE(status))
+   {
+        fprintf(stderr, "Failed to set a pattern for the first element.\n");
+   }
+   pos = searchIter->first();
+   if (pos != tertiaryOffsets[0])
+       fprintf(stdout, "Tertiary match failed at the first pattern.\n");
+   for (i = 1; i < 4; i++)
+   {
+       status = U_ZERO_ERROR;
+       searchIter->setPattern(easyPatterns[i], status);
+       searchIter->reset();
+       pos = searchIter->next();
+       if (pos != tertiaryOffsets[i])
+           fprintf(stdout, "Tertiary match failed at index %d pattern.\n", i);
+   }
+   // Going backwards
+   searchIter->reset();
+   searchIter->setPattern(easyPatterns[--i], status);
+   if (U_FAILURE(status))
+   {
+        fprintf(stderr, "Failed to set a pattern for the last element.\n");
+   }
+   pos = searchIter->last();
+   if (pos != tertiaryOffsets[i])
+       fprintf(stdout, "Tertiary match failed at the last pattern.\n");
+   for (; i >= 1 ; --i)
+   {
+       status = U_ZERO_ERROR;       
+       searchIter->setPattern(easyPatterns[i-1], status);
+       searchIter->reset();
+       pos = searchIter->previous();
+       if (pos != tertiaryOffsets[i-1])
+           fprintf(stdout, "Walking backwards: tertiary match failed at index %d pattern.\n", i);
+   }
+   status = U_ZERO_ERROR;
+  searchIter->setTarget(monkeyTarget);
+  if (U_FAILURE(status))
+  {
+      fprintf(stderr, "Failed to set a pattern for the monkey target.\n");
+      goto cleanup;
+  }
+  searchIter->setStrength(Collator::TERTIARY, status);
+  // change direction again 
+   searchIter->reset();
+   searchIter->setPattern(monkeyTarget, status);
+   if (U_FAILURE(status))
+   {
+        fprintf(stderr, "Failed to set a pattern as monkey test itself.\n");
+   }
+   pos = searchIter->first();
+   if (pos == -1)
+       fprintf(stdout, "Matching monkey test itself failed.\n");
+  for (i = 0; i < monkeyTarget.length() - 1; i++)
+   {
+       // will always find its substring
+       for (j = i+1; j < monkeyTarget.length(); j++)
+       {
+            UnicodeString temp;
+            status = U_ZERO_ERROR;
+            searchIter->reset();
+            monkeyTarget.extract(i, j, temp);
+            searchIter->setPattern(temp, status);
+            if (U_FAILURE(status))
+            {
+                fprintf(stderr, "Failed to set a pattern for the %d -th monkey pattern of length %d.\n", i, j);
+                continue;
+            }
+            pos = searchIter->next();
+            if (pos == -1)
+               fprintf(stdout, "Monkey match failed at index %d in monkey pattern of length %d.\n", i, j);
+       }
+   }
+  status = U_ZERO_ERROR;
+  searchIter->setTarget(monkeyTarget2);
+  if (U_FAILURE(status))
+  {
+      fprintf(stderr, "Failed to set a pattern for the monkey target2.\n");
+      goto cleanup;
+  }
+  for (i = 0; i < monkeyTarget.length() - 1; i++)
+   {
+       // will never find the match
+        UnicodeString temp;
+        status = U_ZERO_ERROR;
+        monkeyTarget.extract(i, monkeyTarget.length(), temp);
+        searchIter->reset();
+        searchIter->setPattern(temp, status);
+        if (U_FAILURE(status))
+        {
+            fprintf(stderr, "Failed to set a pattern for the monkey pattern at offset index %d.\n", i);
+            continue;
+        }
+        pos = searchIter->next();
+        if (pos != -1)
+           fprintf(stdout, "Monkey mismatch failed at index %d in monkey pattern.\n", i);
+   }
+   
+cleanup:
+    delete searchIter;
+    return 0;
+}
--- a/icu4c/source/samples/search/search.dsp
+++ b/icu4c/source/samples/search/search.dsp
@ -0,0 +1,118 @@
+# Microsoft Developer Studio Project File - Name="search" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** DO NOT EDIT **
+
+# TARGTYPE "Win32 (x86) Console Application" 0x0103
+
+CFG=search - Win32 Debug
+!MESSAGE This is not a valid makefile. To build this project using NMAKE,
+!MESSAGE use the Export Makefile command and run
+!MESSAGE 
+!MESSAGE NMAKE /f "search.mak".
+!MESSAGE 
+!MESSAGE You can specify a configuration when running NMAKE
+!MESSAGE by defining the macro CFG on the command line. For example:
+!MESSAGE 
+!MESSAGE NMAKE /f "search.mak" CFG="search - Win32 Debug"
+!MESSAGE 
+!MESSAGE Possible choices for configuration are:
+!MESSAGE 
+!MESSAGE "search - Win32 Release" (based on "Win32 (x86) Console Application")
+!MESSAGE "search - Win32 Debug" (based on "Win32 (x86) Console Application")
+!MESSAGE 
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF  "$(CFG)" == "search - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+# ADD CPP /nologo /MT /W3 /GX /O2 /I "..\..\..\include" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+# ADD BASE RSC /l 0x409 /d "NDEBUG"
+# ADD RSC /l 0x409 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 ..\..\..\lib\icuuc.lib ..\..\..\lib\icuin.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 /libpath:"..\..\..\lib"
+
+!ELSEIF  "$(CFG)" == "search - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
+# ADD CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /I "..\..\..\include" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
+# ADD BASE RSC /l 0x409 /d "_DEBUG"
+# ADD RSC /l 0x409 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 ..\..\..\lib\icuucd.lib ..\..\..\lib\icuind.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\..\..\lib"
+
+!ENDIF 
+
+# Begin Target
+
+# Name "search - Win32 Release"
+# Name "search - Win32 Debug"
+# Begin Group "Source Files"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=.\search.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=.\srchiter.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=.\strsrch.cpp
+# End Source File
+# End Group
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# Begin Source File
+
+SOURCE=.\srchiter.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\strsrch.h
+# End Source File
+# End Group
+# Begin Group "Resource Files"
+
+# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+# End Group
+# End Target
+# End Project
--- a/icu4c/source/samples/search/search.dsw
+++ b/icu4c/source/samples/search/search.dsw
@ -0,0 +1,29 @@
+Microsoft Developer Studio Workspace File, Format Version 6.00
+# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE!
+
+###############################################################################
+
+Project: "search"=.\search.dsp - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+}}}
+
+###############################################################################
+
+Global:
+
+Package=<5>
+{{{
+}}}
+
+Package=<3>
+{{{
+}}}
+
+###############################################################################
+
--- a/icu4c/source/samples/search/srchiter.cpp
+++ b/icu4c/source/samples/search/srchiter.cpp
@ -0,0 +1,279 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999-2000 IBM and others. All rights reserved.
+**********************************************************************
+*   Date        Name        Description
+*  03/22/2000   helena      Creation.
+**********************************************************************
+*/
+
+#include "unicode/brkiter.h"
+#include "unicode/schriter.h"
+#include "srchiter.h"
+
+int32_t const SearchIterator::DONE = -1;
+int32_t const SearchIterator::BEFORE = -2;    
+
+SearchIterator::SearchIterator(void) :
+    index(0),
+    length(0),
+    target(0),
+    backward(FALSE), /* going forward */
+    breaker(NULL),
+    overlap(TRUE)
+{
+    UErrorCode status = U_ZERO_ERROR;
+    this->breaker = BreakIterator::createCharacterInstance(Locale::getDefault(), status);
+    if (U_FAILURE(status)) return;
+}
+
+SearchIterator::SearchIterator(CharacterIterator* target, 
+                               BreakIterator* breaker) :
+    index(0),
+    length(0),
+    target(0),
+    backward(FALSE), /* going forward */
+    breaker(NULL),
+    overlap(TRUE)
+{
+    this->target = target;
+    
+    this->breaker = breaker;
+    this->breaker->adoptText(this->target);
+    
+    index = this->target->startIndex();
+    length = 0;
+}
+
+SearchIterator::SearchIterator(const  SearchIterator&   other) :
+    length(other.length),
+    target(0),
+    backward(other.backward), /* going forward */
+    breaker(NULL),
+    overlap(other.overlap)  
+{
+    index = other.target->startIndex();
+    this->target = other.target->clone();
+    
+    this->breaker = ((BreakIterator&)other.breaker).clone();
+    this->breaker->adoptText(this->target);
+}
+
+SearchIterator::~SearchIterator()
+{
+    // deletion of breaker will delete target
+    if (breaker != NULL) {
+        delete breaker;
+        breaker = 0;
+    }
+}
+
+bool_t SearchIterator::operator == (const SearchIterator& that) const
+{
+    if (this == &that) return TRUE;
+    if (*that.breaker != *breaker) return FALSE;
+    else if (*that.target != *target) return FALSE;
+    else if (that.backward != backward) return FALSE;
+    else if (that.index != index) return FALSE;
+    else if (that.length != length) return FALSE;
+    else if (that.overlap != overlap) return FALSE;
+    else return TRUE;
+}
+
+int32_t SearchIterator::first(void) 
+{
+    setIndex(SearchIterator::BEFORE);
+    return next();
+}
+
+int32_t SearchIterator::following(int32_t pos) 
+{
+    setIndex(pos);
+    return next();
+}
+    
+int32_t SearchIterator::last(void) 
+{
+    setIndex(SearchIterator::DONE);
+    return previous();
+}
+
+int32_t SearchIterator::preceding(int32_t pos) 
+{
+    setIndex(pos);
+    return previous();
+}
+    
+int32_t SearchIterator::next(void) 
+{
+    if (index == SearchIterator::BEFORE){
+        // Starting at the beginning of the text
+        index = target->startIndex();
+    } else if (index == SearchIterator::DONE) {
+        return SearchIterator::DONE;
+    } else if (length > 0) {
+        // Finding the next match after a previous one
+        index += overlap ? 1 : length;
+    }
+    index -= 1;
+    backward = FALSE;
+        
+    do {
+        UErrorCode status = U_ZERO_ERROR;
+        length = 0;
+        index = handleNext(index + 1, status);
+        if (U_FAILURE(status))
+        {
+            return SearchIterator::DONE;
+        }
+    } while (index != SearchIterator::DONE && !isBreakUnit(index, index+length));
+    
+    return index;
+}
+
+int32_t SearchIterator::previous(void) 
+{
+    if (index == SearchIterator::DONE) {
+        index = target->endIndex();
+    } else if (index == SearchIterator::BEFORE) {
+        return SearchIterator::DONE;
+    } else if (length > 0) {
+        // Finding the previous match before a following one
+        index = overlap ? index + length - 1 : index;
+    }
+    index += 1;
+    backward = TRUE;
+    
+    do {
+        UErrorCode status = U_ZERO_ERROR;
+        length = 0;
+        index = handlePrev(index - 1, status);
+        if (U_FAILURE(status))
+        {
+            return SearchIterator::DONE;
+        }
+    } while (index != SearchIterator::DONE && !isBreakUnit(index, index+length));
+
+    if (index == SearchIterator::DONE) {
+        index = SearchIterator::BEFORE;
+    }
+    return getIndex();
+}
+
+
+int32_t SearchIterator::getIndex() const
+{
+    return index == SearchIterator::BEFORE ? SearchIterator::DONE : index;
+}
+
+void SearchIterator::setOverlapping(bool_t allowOverlap) 
+{
+     overlap = allowOverlap;
+}
+    
+bool_t SearchIterator::isOverlapping(void) const
+{
+    return overlap;
+}
+    
+int32_t SearchIterator::getMatchLength(void) const
+{
+    return length;
+}
+
+void SearchIterator::reset(void)
+{
+    length = 0;
+    if (backward == FALSE) {
+        index = 0;
+        target->setToStart();
+        breaker->first();
+    } else {
+        index = SearchIterator::DONE;
+        target->setToEnd();
+        breaker->last();
+    }
+    overlap = TRUE;
+}
+
+void SearchIterator::setBreakIterator(const BreakIterator* iterator) 
+{
+    CharacterIterator *buffer = target->clone();
+    delete breaker;
+    breaker = iterator->clone();
+    breaker->adoptText(buffer);
+}
+
+const BreakIterator& SearchIterator::getBreakIterator(void) const
+{
+    return *breaker;
+}
+ 
+void SearchIterator::setTarget(const UnicodeString& newText)
+{
+    if (target != NULL && target->getDynamicClassID()
+            == StringCharacterIterator::getStaticClassID()) {
+        ((StringCharacterIterator*)target)->setText(newText);
+    }
+    else {
+        delete target;
+		target = new StringCharacterIterator(newText);
+        target->first();
+        breaker->adoptText(target);
+    }
+}
+  
+void SearchIterator::adoptTarget(CharacterIterator* iterator) {
+    target = iterator;
+    breaker->adoptText(target);
+    setIndex(SearchIterator::BEFORE);
+}
+
+const CharacterIterator& SearchIterator::getTarget(void) const
+{
+    SearchIterator* nonConstThis = (SearchIterator*)this;
+    
+    // The iterator is initialized pointing to no text at all, so if this
+    // function is called while we're in that state, we have to fudge an
+    // an iterator to return.
+    if (nonConstThis->target == NULL)
+        nonConstThis->target = new StringCharacterIterator("");
+    return *nonConstThis->target;
+}
+
+void SearchIterator::getMatchedText(UnicodeString& result) 
+{
+    result.remove();
+    if (length > 0) {
+        int i = 0;
+        for (UChar c = target->setIndex(index); i < length; c = target->next(), i++)
+        {
+            result += c;
+        }
+    }
+}
+
+
+void SearchIterator::setMatchLength(int32_t length) 
+{
+    this->length = length;
+}
+
+void SearchIterator::setIndex(int32_t pos) {
+    index = pos;
+    length = 0;
+}
+
+bool_t SearchIterator::isBreakUnit(int32_t start, 
+                                   int32_t end)
+{
+    if (breaker == NULL) {
+        return TRUE;
+    } 
+    bool_t startBound = breaker->isBoundary(start);
+    bool_t endBound = (end == target->endIndex()) || breaker->isBoundary(end);
+    
+    return startBound && endBound;
+}
+
+
--- a/icu4c/source/samples/search/srchiter.h
+++ b/icu4c/source/samples/search/srchiter.h
@ -0,0 +1,388 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999-2000 IBM and others. All rights reserved.
+**********************************************************************
+*   Date        Name        Description
+*  03/22/2000   helena      Creation.
+**********************************************************************
+*/
+#ifndef SRCHITER_H
+#define SRCHITER_H
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/chariter.h"
+#include "unicode/brkiter.h"
+
+/**
+ * <code>SearchIterator</code> is an abstract base class that provides methods
+ * to search for a pattern within a text string.  Instances of
+ * <code>SearchIterator</code> maintain a current position and scan over
+ * the target text, returning the indices the pattern is matched
+ * and the length of each match.
+ * <p>
+ * <code>SearchIterator</code> is an abstract base class that defines a
+ * protocol for text searching.  Subclasses provide concrete implementations of
+ * various search algorithms.  For example, {@link StringSearch}
+ * implements language-sensitive pattern matching based on the comparison rules
+ * defined in a {@link RuleBasedCollator} object.
+ * <p>
+ * Internally, <code>SearchIterator</code> scans text using a
+ * {@link CharacterIterator}, and is thus able to scan text held
+ * by any object implementing that protocol. A <code>StringCharacterIterator</code>
+ * is used to scan <code>String</code> objects passed to <code>setText</code>.
+ * <p>
+ * <code>SearchIterator</code> provides an API that is similar to that of
+ * other text iteration classes such as <code>BreakIterator</code>.  Using this
+ * class, it is easy to scan through text looking for all occurances of a
+ * given pattern.  The following example uses a <code>StringSearch</code> object to
+ * find all instances of "fox" in the target string.  Any other subclass of
+ * <code>SearchIterator</code> can be used in an identical manner.
+ * <pre><code>
+ * UnicodeString target("The quick brown fox jumped over the lazy fox");
+ * UnicodeString pattern("fox");
+ *
+ * SearchIterator *iter = new StringSearch(pattern, target);
+ *
+ * for (int pos = iter->first(); pos != SearchIterator::DONE; pos = iter->next()) {
+ *     printf("Found match at %d pos, length is %d\n", pos, iter.getMatchLength());
+ * }
+ * </code></pre>
+ *
+ * @see StringSearch
+ */
+
+class SearchIterator {
+public:
+    /**
+     * DONE is returned by previous() and next() after all valid
+     * matches have been returned, and by first() and last() if
+     * there are no matches at all.
+     */
+     static  const int32_t DONE;
+    
+    //=======================================================================
+    // boilerplate
+    //=======================================================================
+
+    /**
+     * Destructor
+     */
+    virtual ~SearchIterator();
+
+    /** copy constructor */
+    SearchIterator(const    SearchIterator&   other);
+
+    /**
+     * Equality operator.  Returns TRUE if both BreakIterators are of the
+     * same class, have the same behavior, and iterate over the same text.
+     */
+    virtual bool_t operator==(const SearchIterator& that) const;
+
+    /**
+     * Not-equal operator.  If operator== returns TRUE, this returns FALSE,
+     * and vice versa.
+     */
+    bool_t operator!=(const SearchIterator& that) const;
+
+    /**
+     * Returns a newly-constructed RuleBasedBreakIterator with the same
+     * behavior, and iterating over the same text, as this one.
+     */
+    virtual SearchIterator* clone(void) const = 0;
+
+    /**
+     * Return a polymorphic class ID for this object. Different subclasses
+     * will return distinct unequal values.
+     * @stable
+     */
+    virtual UClassID getDynamicClassID(void) const = 0;
+
+    /**
+     * Return the first index at which the target text matches the search
+     * pattern.  The iterator is adjusted so that its current index
+     * (as returned by {@link #getIndex}) is the match posisition if one was found
+     * and <code>DONE</code> if one was not.
+     *
+     * @return The character index of the first match, or <code>DONE</code> if there
+     *          are no matches.
+     */
+     int32_t first(void);
+
+    /**
+     * Return the first index greater than <tt>pos</tt> at which the target
+     * text matches the search pattern.   The iterator is adjusted so that its current index
+     * (as returned by {@link #getIndex}) is the match posisition if one was found
+     * and <code>DONE</code> if one was not.
+     *
+     * @return The character index of the first match following <code>pos</code>,
+     *          or <tt>DONE</tt> if there are no matches.
+     */
+    int32_t following(int32_t pos);
+    
+    /**
+     * Return the last index in the target text at which it matches
+     * the search pattern and adjusts the iteration to point to that position.
+     *
+     * @return The index of the first match, or <tt>DONE</tt> if there
+     *          are no matches.
+     */
+    int32_t last(void);
+
+    /**
+     * Return the first index less than <code>pos</code> at which the target
+     * text matches the search pattern.   The iterator is adjusted so that its current index
+     * (as returned by {@link #getIndex}) is the match posisition if one was found
+     * and <tt>DONE</tt> if one was not.
+     *
+     * @return The character index of the first match preceding <code>pos</code>,
+     *          or <code>DONE</code> if there are no matches.
+     */
+    int32_t preceding(int32_t pos);
+    
+    /**
+     * Return the index of the next point at which the text matches the
+     * search pattern, starting from the current position
+     * <p>
+     * @return The index of the next match after the current position,
+     *          or <code>DONE</code> if there are no more matches.
+     *
+     * @see #first
+     */
+     int32_t next(void);
+
+    /**
+     * Return the index of the previous point at which the text matches
+     * the search pattern, starting at the current position
+     *
+     * @return The index of the previous match before the current position,
+     *          or <code>DONE</code> if there are no more matches.
+     */
+    int32_t previous(void);
+
+    /**
+     * Return the current index in the text being searched.
+     * If the iteration has gone past the end of the text
+     * (or past the beginning for a backwards search), 
+     * {@link #DONE} is returned.
+     */
+    int32_t getIndex(void) const;
+    /**
+     * Determines whether overlapping matches are returned.  If this
+     * property is <code>true</code>, matches that begin within the
+     * boundry of the previous match are considered valid and will
+     * be returned.  For example, when searching for "abab" in the
+     * target text "ababab", both offsets 0 and 2 will be returned
+     * as valid matches if this property is <code>true</code>.
+     * <p>
+     * The default setting of this property is <tt>true</tt>
+     */
+    void setOverlapping(bool_t allowOverlap);
+    
+    /**
+     * Determines whether overlapping matches are returned.
+     *
+     * @see #setOverlapping
+     */
+    bool_t isOverlapping(void) const;
+    
+    /**
+     * Returns the length of text in the target which matches the search
+     * pattern.  This call returns a valid result only after a successful
+     * call to {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
+     * Just after construction, or after a searching method returns
+     * <tt>DONE</tt>, this method will return 0.
+     *
+     * @return The length of the match in the target text, or 0 if there
+     *          is no match currently.
+     */
+    int32_t getMatchLength(void) const;
+
+    /**
+     * Set the BreakIterator that will be used to restrict the points
+     * at which matches are detected.
+     *
+     * @param breaker   A {@link java.text.BreakIterator BreakIterator}
+     *                  that will be used to restrict the points
+     *                  at which matches are detected.  If a match is found, but the match's start
+     *                  or end index is not a boundary as determined by
+     *                  the <tt>BreakIterator</tt>, the match will be rejected and
+     *                  another will be searched for.
+     *
+     *                  If this parameter is <tt>null</tt>, no break
+     *                  detection is attempted.
+     *
+     * @see #getBreakIterator
+     */
+    /* HSYS : Check, aliasing or owning */
+    void setBreakIterator(const BreakIterator* iterator);
+    
+    /**
+     * Returns the BreakIterator that is used to restrict the points
+     * at which matches are detected.  This will be the same object
+     * that was passed to the constructor or to <code>setBreakIterator</code>.
+     * Note that <tt>null</tt> is a legal value; it means that break
+     * detection should not be attempted.
+     *
+     * @see #setBreakIterator
+     */
+    const BreakIterator& getBreakIterator(void) const;
+    
+    /**
+     * Set the target text which should be searched and resets the
+     * iterator's position to point before the start of the target text.
+     * This method is useful if you want to re-use an iterator to
+     * search for the same pattern within a different body of text.
+     *
+     * @see #getTarget
+     */
+    virtual void setTarget(const UnicodeString& newText);    
+
+    /**
+     * Set the target text which should be searched and resets the
+     * iterator's position to point before the start of the target text.
+     * This method is useful if you want to re-use an iterator to
+     * search for the same pattern within a different body of text.
+     *
+     * @see #getTarget
+     */
+    virtual void adoptTarget(CharacterIterator* iterator);
+    /**
+     * Return the target text which is being searched
+     *
+     * @see #setTarget
+     */
+    const CharacterIterator& getTarget(void) const;
+    
+    /** Reset the iteration.
+    */
+    virtual void reset(void);
+
+    /**
+     * Returns the text that was matched by the most recent call to 
+     * {@link #first}, {@link #next}, {@link #previous}, or {@link #last}.
+     * If the iterator is not pointing at a valid match (e.g. just after
+     * construction or after <tt>DONE</tt> has been returned, returns
+     * an empty string.
+     */
+    void getMatchedText(UnicodeString& result);
+
+    //-------------------------------------------------------------------
+    // Protected interface for subclasses
+    //-------------------------------------------------------------------
+
+protected:
+    SearchIterator();
+
+    /**
+     * Constructor for use by subclasses
+     * <p>
+     * @param target    The target text to be searched.  This is for internal
+     *                  use by this class.  Subclasses need to maintain their
+     *                  own reference to or iterator over the target text
+     *                  for use by their {@link #handleNext handleNext} and
+     *                  {@link #handlePrev handlePrev} methods.  The target will
+     *                  be adopted and owned by the SearchIterator object.
+     *
+     * @param breaker   A {@link BreakIterator} that is used to restrict the points
+     *                  at which matches are detected.  If <tt>handleNext</tt> or
+     *                  <tt>handlePrev</tt> finds a match, but the match's start
+     *                  or end index is not a boundary as determined by
+     *                  the <tt>BreakIterator</tt>, the match is rejected and 
+     *                  <tt>handleNext</tt> or <tt>handlePrev</tt> is called again.
+     *                  If this parameter is <tt>null</tt>, no break
+     *                  detection is attempted.
+     *                  
+     */
+    SearchIterator(CharacterIterator* target, 
+                   BreakIterator* breaker);
+/**
+     * Abstract method which subclasses override to provide the mechanism
+     * for finding the next match in the target text.  This allows different
+     * subclasses to provide different search algorithms.
+     * <p>
+     * If a match is found, the implementation should return the index at
+     * which the match starts and should call {@link #setMatchLength setMatchLength}
+     * with the number of characters in the target
+     * text that make up the match.  If no match is found, the method
+     * should return DONE and should not call <tt>setMatchLength</tt>.
+     * <p>
+     * @param startAt   The index in the target text at which the search
+     *                  should start.
+     *
+     * @see #setMatchLength
+     */
+    virtual int32_t handleNext(int32_t startAt, UErrorCode& status) = 0;
+
+    /**
+     * Abstract method which subclasses override to provide the mechanism
+     * for finding the previous match in the target text.  This allows different
+     * subclasses to provide different search algorithms.
+     * <p>
+     * If a match is found, the implementation should return the index at
+     * which the match starts and should call {@link #setMatchLength setMatchLength}
+     * with the number of characters in the target
+     * text that make up the match.  If no match is found, the method
+     * should return DONE and should not call <tt>setMatchLength</tt>.
+     * <p>
+     * @param startAt   The index in the target text at which the search
+     *                  should start.
+     *
+     * @see #setMatchLength
+     */
+     virtual int32_t handlePrev(int32_t startAt, UErrorCode& status) = 0;
+
+    /**
+     * Sets the length of the currently matched string in the target text.
+     * Subclasses' <code>handleNext</code> and <code>handlePrev</code>
+     * methods should call this when they find a match in the target text.
+     */
+    void setMatchLength(int32_t length);
+
+    //-------------------------------------------------------------------
+    // Privates
+    //
+private:
+    /**
+     * Class ID
+     */
+    static char fgClassID;
+private:    
+    /**
+     * Private value indicating that the iterator is pointing
+     * before the beginning of the target text.
+     */
+     static const int32_t BEFORE;
+
+    /**
+     * Internal method used by preceding and following.  Sets the index
+     * to point to the given position, and clears any state that's
+     * affected.
+     */
+    void setIndex(int32_t pos);
+    
+    /**
+     * Determine whether the target text bounded by <code>start</code> and
+     * <code>end</code> is one or more whole units of text as determined by
+     * the current <code>BreakIterator</code>.
+     */
+    bool_t isBreakUnit(int32_t start, int32_t end);
+    
+    //-------------------------------------------------------------------------
+    // Private data...
+    //-------------------------------------------------------------------------
+    int32_t                 index;          // Current position in the target text
+    int32_t                 length;         // Length of matched text, or 0
+    bool_t                  overlap;        // Return overlapping matches?
+    CharacterIterator*      target;         // Target text to be searched
+    BreakIterator*          breaker;        // Break iterator to constrain matches
+    bool_t                  backward;
+};
+
+inline bool_t SearchIterator::operator!=(const SearchIterator& that) const
+{
+   return !operator==(that); 
+}
+
+#endif
+
--- a/icu4c/source/samples/search/strsrch.cpp
+++ b/icu4c/source/samples/search/strsrch.cpp
@ -0,0 +1,758 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999-2000 IBM and others. All rights reserved.
+**********************************************************************
+*   Date        Name        Description
+*  03/22/2000   helena      Creation.
+**********************************************************************
+*/
+
+#include  <memory.h>
+#include "unicode/coleitr.h"
+#include "unicode/schriter.h"
+#include "strsrch.h"
+/**
+ * <code>StringSearch</code> is a <code>SearchIterator</code> that provides
+ * language-sensitive text searching based on the comparison rules defined
+ * in a {@link RuleBasedCollator} object.
+ * Instances of <code>StringSearch</code> function as iterators
+ * maintain a current position and scan over text returning the index of
+ * characters where the pattern occurs and the length of each match.
+ * <p>
+ * <code>StringSearch</code> uses a version of the fast Boyer-Moore search
+ * algorithm that has been adapted to work with the large character set of
+ * Unicode.  See "Efficient Text Searching in Java", to be published in
+ * <i>Java Report</i> in February, 1999, for further information on the algorithm.
+ * <p>
+ * Consult the <code>SearchIterator</code> documentation for information on
+ * and examples of how to use instances of this class to implement text
+ * searching.  <code>SearchIterator</code> provides all of the necessary
+ * API; this class only provides constructors and internal implementation
+ * methods.
+ *
+ * @see SearchIterator
+ * @see RuleBasedCollator
+ *
+ * @author Laura Werner
+ * @version 1.0
+ */
+
+char  StringSearch::fgClassID = 0; // Value is irrelevant       // class id
+/* to be removed */
+void StringSearch::dumpTables() {
+    int i;
+    for (i = 0; i < 256; i++) {
+        if (shiftTable[i] != minLen) {
+//            debug("shift[" + Integer.toString(i,16) + "] = " + shiftTable[i]);
+        }
+    }
+    for (i = 0; i < 256; i++) {
+        if (backShiftTable[i] != minLen) {
+//            debug("backShift[" + Integer.toString(i,16) + "] = " + backShiftTable[i]);
+        }
+    }
+}
+
+StringSearch::StringSearch(const UnicodeString& pat, 
+                CharacterIterator* target,
+                RuleBasedCollator* coll, 
+                BreakIterator* breaker,
+                UErrorCode& status) :
+    SearchIterator(target, breaker),
+    strength(coll->getStrength()),
+    pattern(pat),
+    valueList(NULL),
+    valueListLen(0),
+    normLen(0),        // num. of collation elements in pattern.
+    minLen(0),         // Min of composed, decomposed versions
+    maxLen(0),         // Max
+    it(NULL)
+
+{
+    if (U_FAILURE(status)) return;
+    collator = (RuleBasedCollator*)(coll->clone());
+    iter = collator->createCollationElementIterator(*target);
+    it = collator->createCollationElementIterator(pat);
+     
+    initialize(status);   // Initialize the Boyer-Moore tables
+}
+
+/**
+ * Construct a <code>StringSearch</code> object using a specific collator.
+ * <p>
+ * @param pattern   The text for which this object will search.
+ *
+ * @param target    The text in which to search for the pattern.
+ *
+ * @param collator  A <code>RuleBasedCollator</code> object which defines the
+ *                  language-sensitive comparison rules used to determine 
+ *                  whether text in the pattern and target matches.
+ */
+StringSearch::StringSearch(const UnicodeString& pat,
+                 CharacterIterator* target,
+                 RuleBasedCollator* collator,
+                 UErrorCode& status) :
+    SearchIterator(),
+    strength(collator->getStrength()),
+    pattern(pat),
+    valueList(NULL),
+    valueListLen(0),
+    normLen(0),        // num. of collation elements in pattern.
+    minLen(0),         // Min of composed, decomposed versions
+    maxLen(0),          // Max
+    it(NULL)
+{
+    if (U_FAILURE(status)) return;
+    this->adoptTarget(target);
+    this->collator = (RuleBasedCollator*)(collator->clone());
+    this->iter = collator->createCollationElementIterator(*target);
+    this->it = collator->createCollationElementIterator(pat);
+    initialize(status);
+}
+
+/**
+ * Construct a <code>StringSearch</code> object using the collator and
+ * character boundary detection rules for a given locale
+ * <p>
+ * @param pattern   The text for which this object will search.
+ *
+ * @param target    The text in which to search for the pattern.
+ *
+ * @param loc       The locale whose collation and break-detection rules
+ *                  should be used.
+ *
+ * @exception       ClassCastException thrown if the collator for the specified
+ *                  locale is not a RuleBasedCollator.
+ */
+StringSearch::StringSearch(const StringSearch& that) :
+    SearchIterator(that),    
+    valueList(NULL),
+    valueListLen(that.valueListLen),
+    normLen(that.normLen),        // num. of collation elements in pattern.
+    minLen(that.minLen),          // Min of composed, decomposed versions
+    maxLen(that.maxLen),
+    collator(that.collator),
+    strength(that.strength),
+    iter(NULL),
+    it(NULL)
+{
+    valueList = new int32_t[valueListLen];
+    memcpy(valueList, that.valueList, valueListLen*sizeof(int32_t));    
+    iter = that.collator->createCollationElementIterator(that.getTarget());
+    it = that.collator->createCollationElementIterator(that.pattern);
+}
+
+StringSearch::StringSearch(const UnicodeString& pat, 
+                 CharacterIterator* target, 
+                 const Locale& loc,
+                 UErrorCode& status) :
+    SearchIterator(),
+    pattern(pat),
+    valueList(NULL),
+    valueListLen(0),
+    normLen(0),        // num. of collation elements in pattern.
+    minLen(0),         // Min of composed, decomposed versions
+    maxLen(0)          // Max
+{
+    if (U_FAILURE(status)) return;
+    this->adoptTarget(target);
+    collator = (RuleBasedCollator*)Collator::createInstance(loc, status);
+    iter = collator->createCollationElementIterator(*target);
+    it = collator->createCollationElementIterator(pat);
+
+    strength = collator->getStrength(); 
+
+    initialize(status);
+}
+
+bool_t
+StringSearch::operator==(const SearchIterator& that) const
+{
+    if (that.getDynamicClassID() != getDynamicClassID())
+        return FALSE;
+    if (!SearchIterator::operator==(that))
+        return FALSE;
+    const StringSearch& that2 = (const StringSearch&)that;
+    if (*that2.iter != *iter) return FALSE;
+    else if (*that2.collator != *collator) return FALSE;
+    else if (that2.strength != strength) return FALSE;
+    else if (that2.valueListLen != valueListLen) return FALSE;
+    else if (memcmp(that2.valueList, valueList, valueListLen*sizeof(int32_t)) != 0) return FALSE;
+    else if (that2.pattern != pattern) return FALSE;
+    else if (that2.normLen != normLen) return FALSE;
+    else if (that2.minLen != minLen) return FALSE;
+    else if (that2.maxLen != maxLen) return FALSE;
+    else return TRUE;
+}
+
+SearchIterator* 
+StringSearch::clone(void) const
+{
+    return new StringSearch(*this);
+}
+
+/**
+ * Construct a <code>StringSearch</code> object using the collator for the default
+ * locale
+ * <p>
+ * @param pattern   The text for which this object will search.
+ *
+ * @param target    The text in which to search for the pattern.
+ *
+ * @param collator  A <code>RuleBasedCollator</code> object which defines the
+ *                  language-sensitive comparison rules used to determine 
+ *                  whether text in the pattern and target matches.
+ */
+StringSearch::StringSearch(const UnicodeString& pat, 
+                 const UnicodeString& newText,
+                 UErrorCode& status) :
+    SearchIterator(),
+    pattern(pat),
+    valueList(NULL),
+    valueListLen(0),
+    normLen(0),        // num. of collation elements in pattern.
+    minLen(0),         // Min of composed, decomposed versions
+    maxLen(0)          // Max
+{
+    StringCharacterIterator *s = new StringCharacterIterator(newText);
+    collator = (RuleBasedCollator*)Collator::createInstance(Locale::getDefault(), status);
+    strength = collator->getStrength(); 
+    iter = collator->createCollationElementIterator(newText);
+    it = collator->createCollationElementIterator(pat);
+    this->adoptTarget(s);
+    initialize(status);
+}
+
+StringSearch::~StringSearch(void)
+{
+    if (valueList != NULL) {
+        delete [] valueList;
+        valueList = 0;
+    }
+    if (iter != NULL) {
+        delete iter;
+        iter = 0;
+    }
+    if (collator != NULL) {
+        delete collator;
+        collator = 0;
+    }
+    if (it != NULL) {
+        delete it;
+        it = 0;
+    }
+}
+//-------------------------------------------------------------------
+// Getters and Setters
+//-------------------------------------------------------------------
+
+/**
+ * Sets this object's strength property. The strength determines the
+ * minimum level of difference considered significant during a
+ * search.  Generally, {@link Collator#TERTIARY} and 
+ * {@link Collator#IDENTICAL} indicate that all differences are
+ * considered significant, {@link Collator#SECONDARY} indicates
+ * that upper/lower case distinctions should be ignored, and
+ * {@link Collator#PRIMARY} indicates that both case and accents
+ * should be ignored.  However, the exact meanings of these constants
+ * are determined by individual Collator objects.
+ * <p>
+ * @see Collator#PRIMARY
+ * @see Collator#SECONDARY
+ * @see Collator#TERTIARY
+ * @see Collator#IDENTICAL
+ */
+void StringSearch::setStrength(Collator::ECollationStrength newStrength, UErrorCode& status) {
+    if (U_FAILURE(status))
+    {
+        return;
+    }
+    strength = newStrength;
+    
+    // Due to a bug (?) in CollationElementIterator, we must set the
+    // collator's strength as well, since the iterator is going to
+    // mask out the portions of the collation element that are not
+    // relevant for the collator's current strength setting
+    // Note that this makes it impossible to share a Collator among
+    // multiple StringSearch objects if you adjust Strength settings.
+    collator->setStrength(strength);
+    initialize(status);
+}
+
+
+/**
+ * Returns this object's strength property, which indicates what level
+ * of differences are considered significant during a search.
+ * <p>
+ * @see #setStrength
+ */
+Collator::ECollationStrength StringSearch::getStrength() const
+{
+    return strength;
+}
+
+/**
+ * Set the collator to be used for this string search.  Also changes
+ * the search strength to match that of the new collator.
+ * <p>
+ * This method causes internal data such as Boyer-Moore shift tables
+ * to be recalculated, but the iterator's position is unchanged.
+ * <p>
+ * @see #getCollator
+ */
+void StringSearch::setCollator(const RuleBasedCollator *coll, UErrorCode& status) 
+{
+    delete iter;
+    delete collator;
+    collator = (RuleBasedCollator*)coll->clone();
+    strength = collator->getStrength();
+    // Also need to recompute the pattern and get a new target iterator
+    iter = collator->createCollationElementIterator(getTarget());
+    initialize(status);
+}
+
+/**
+ * Return the RuleBasedCollator being used for this string search.
+ */
+const RuleBasedCollator& StringSearch::getCollator(void) const 
+{
+    return *collator;
+}
+
+/**
+ * Set the pattern for which to search.  
+ * This method causes internal data such as Boyer-Moore shift tables
+ * to be recalculated, but the iterator's position is unchanged.
+ */
+void StringSearch::setPattern(const UnicodeString& pat, UErrorCode& status) 
+{
+    pattern = pat;
+    initialize(status);
+}
+
+/**
+ * Returns the pattern for which this object is searching.
+ */
+const UnicodeString& StringSearch::getPattern() const
+{
+    return pattern;
+}
+
+/**
+ * Set the target text which should be searched and resets the
+ * iterator's position to point before the start of the new text.
+ * This method is useful if you want to re-use an iterator to
+ * search for the same pattern within a different body of text.
+ */
+void StringSearch::adoptTarget(CharacterIterator* target) 
+{
+    UErrorCode status = U_ZERO_ERROR;
+    SearchIterator::adoptTarget(target);
+    
+    // fix me: Skipped the error code
+    // Since we're caching a CollationElementIterator, recreate it
+    iter->setText(*target, status);
+}
+void StringSearch::setTarget(const UnicodeString& newText) 
+{
+    UErrorCode status = U_ZERO_ERROR;
+    SearchIterator::setTarget(newText);
+    // Since we're caching a CollationElementIterator, recreate it
+    iter->setText(newText, status);
+}
+
+void StringSearch::reset(void)
+{
+    SearchIterator::reset();
+    iter->reset();
+}//-------------------------------------------------------------------
+// Privates
+//-------------------------------------------------------------------
+
+/**
+ * Search forward for matching text, starting at a given location.
+ * Clients should not call this method directly; instead they should call
+ * {@link SearchIterator#next}.
+ * <p>
+ * If a match is found, this method returns the index at which the match
+ * starts and calls {@link SearchIterator#setMatchLength}
+ * with the number of characters in the target
+ * text that make up the match.  If no match is found, the method returns
+ * <code>DONE</code> and does not call <tt>setMatchLength</tt>.
+ * <p>
+ * @param start The index in the target text at which the search starts.
+ *
+ * @return      The index at which the matched text in the target starts, or DONE
+ *              if no match was found.
+ * <p>
+ * @see SearchIterator#next
+ * @see SearchIterator#DONE
+ */
+int32_t StringSearch::handleNext(int32_t start, UErrorCode& status)
+{
+    if (U_FAILURE(status)) 
+    { 
+        return SearchIterator::DONE; 
+    }
+    const CharacterIterator& target = getTarget();
+    
+    int mask = getMask(strength);
+    int done = CollationElementIterator::NULLORDER & mask;
+#if 0
+    if (DEBUG) {
+        debug("-------------------------handleNext-----------------------------------");
+        debug("");
+        debug("strength=" + strength + ", mask=" + Integer.toString(mask,16)
+            + ", done=" + Integer.toString(done,16));
+        debug("decomp=" + collator.getDecomposition());
+        
+        debug("target.begin=" + getTarget().getBeginIndex());
+        debug("target.end=" + getTarget().getEndIndex());
+        debug("start = " + start);
+    }
+#endif
+    int32_t index = start + minLen;
+    int32_t matchEnd = 0;
+
+    while (index <= target.endIndex())
+    {
+        int32_t patIndex = normLen;
+        int32_t tval = 0, pval = 0;
+        bool_t getP = TRUE;
+
+        iter->setOffset(index, status);
+        matchEnd = index;
+        
+        //if (DEBUG) debug(" outer loop: patIndex=" + patIndex + ", index=" + index);
+        
+        while ((patIndex > 0 || getP == false) && iter->getOffset() > start)
+        {
+#if 0
+            if (DEBUG) {
+                debug("  inner loop: patIndex=" + patIndex + " iter=" + iter.getOffset());
+                debug("   getP=" + getP);
+            }
+#endif
+            
+            // Get the previous character in both the pattern and the target
+            tval = iter->previous(status) & mask;
+            if (U_FAILURE(status)) 
+            {
+                return SearchIterator::DONE;
+            }
+            
+            if (getP) pval = valueList[--patIndex];
+            getP = TRUE;
+            
+            // (DEBUG) debug("   pval=" + Integer.toString(pval,16) + ", tval=" + Integer.toString(tval,16));
+            
+            if (tval == 0) {       // skip tval, use same pval
+                // (DEBUG) debug("   tval is ignorable");
+                getP = FALSE;
+            }
+            else if (pval != tval) {    // Mismatch, skip ahead
+                // (DEBUG) debug("   mismatch: skippping " + getShift(tval, patIndex));
+                
+                index += getShift(tval, patIndex);
+                break;
+            }
+            else if (patIndex == 0) {
+                // The values matched, and we're at the beginning of the pattern,
+                // which means we matched the whole thing.
+                start = iter->getOffset();
+                setMatchLength(matchEnd - start);
+                // if (DEBUG) debug("Found match at index "+ start );
+                return start;
+            }
+        }
+#if 0
+        if (DEBUG) debug(" end of inner loop: patIndex=" + patIndex + " iter=" + iter.getOffset());
+        if (DEBUG) debug("   getP=" + getP);
+#endif   
+        if (iter->getOffset() <= start) {
+            // We hit the beginning of the text being searched, which is
+            // possible if it contains lots of ignorable characters.
+            // Advance one character and try again.
+            // if (DEBUG) debug("hit beginning of target; advance by one");
+            index++;
+        }
+    }
+    // if (DEBUG) debug("Fell off end of outer loop; returning DONE");
+    return SearchIterator::DONE;
+}
+
+/**
+ * Search backward for matching text ,starting at a given location.
+ * Clients should not call this method directly; instead they should call
+ * <code>SearchIterator.previous()</code>, which this method overrides.
+ * <p>
+ * If a match is found, this method returns the index at which the match
+ * starts and calls {@link SearchIterator#setMatchLength}
+ * with the number of characters in the target
+ * text that make up the match.  If no match is found, the method returns
+ * <code>DONE</code> and does not call <tt>setMatchLength</tt>.
+ * <p>
+ * @param start The index in the target text at which the search starts.
+ *
+ * @return      The index at which the matched text in the target starts, or DONE
+ *              if no match was found.
+ * <p>
+ * @see SearchIterator#previous
+ * @see SearchIterator#DONE
+ */
+int32_t StringSearch::handlePrev(int32_t start, UErrorCode& status)
+{
+    if (U_FAILURE(status))
+    {
+        return SearchIterator::DONE;
+    }
+    int patLen = normLen;
+    int index = start - minLen;
+
+    int mask = getMask(strength);
+    int done = CollationElementIterator.NULLORDER & mask;
+#if 0
+    if (DEBUG) {
+        debug("-------------------------handlePrev-----------------------------------");
+        debug("");
+        debug("strength=" + strength + ", mask=" + Integer.toString(mask,16)
+            + ", done=" + Integer.toString(done,16));
+        debug("decomp=" + collator.getDecomposition());
+        
+        debug("target.begin=" + getTarget().getBeginIndex());
+        debug("target.end=" + getTarget().getEndIndex());
+    }
+#endif
+    
+    while (index >= 0) {
+        int patIndex = 0;
+        int tval = 0, pval = 0;
+        bool_t getP = TRUE;
+
+        iter->setOffset(index, status);
+        if (U_FAILURE(status))
+        {
+            return SearchIterator::DONE;
+        }
+
+
+        // if (DEBUG) debug(" outer loop: patIndex=" + patIndex + ", index=" + index);
+        
+        while ((patIndex < patLen || !getP) && iter->getOffset() < start)
+        {
+        /*    if (DEBUG) {
+                debug("  inner loop: patIndex=" + patIndex + " iter=" + iter.getOffset());
+            }
+            */
+            tval = iter->next(status) & mask;
+            if (U_FAILURE(status))
+            {
+                return SearchIterator::DONE;
+            }
+            if (getP) pval = valueList[patIndex++];
+            getP = TRUE;
+
+            //if (DEBUG) debug("   pval=" + Integer.toString(pval,16) + ", tval=" + Integer.toString(tval,16));
+
+            if (tval == done) {
+              //  if (DEBUG) debug("   end of target; no match");
+                return DONE;
+            }
+            else if (tval == 0) {
+                // if (DEBUG) debug("   tval is ignorable");
+                getP = false;
+            }
+            else if (pval != tval) {
+                // We didn't match this pattern.  Skip ahead
+                // if (DEBUG) debug("   mismatch: skippping " + getBackShift(tval, patIndex));
+                
+                int shift = getBackShift(tval, patIndex);
+                index -= shift;
+                break;
+            }
+            else if (patIndex == patLen) {
+                // The elements matched and we're at the end of the pattern,
+                // which means we matched the whole thing.
+                setMatchLength(iter->getOffset() - index);
+                return index;
+            }
+        }
+        if (iter->getOffset() >= start) {
+            // We hit the end of the text being searched, which is
+            // possible if it contains lots of ignorable characters.
+            // Back up one character and try again.
+            // if (DEBUG) debug("hit end of target; back by one");
+            index--;
+        }
+    }
+    return SearchIterator::DONE;
+}
+
+/**
+ * Return a bitmask that will select only the portions of a collation 
+ * element that are significant at the given strength level.
+ */
+int32_t StringSearch::getMask(Collator::ECollationStrength strength)  
+{
+    switch (strength) {
+    case Collator::PRIMARY:
+        return 0xFFFF0000;
+    case Collator::SECONDARY:
+        return 0xFFFFFF00;
+    default:
+        return 0xFFFFFFFF;
+    }
+}
+
+
+void StringSearch::initialize(UErrorCode& status) {
+    /*
+    if (DEBUG)  {
+        debug("-------------------------initialize-----------------------------------");
+        debug("pattern=" + pattern);
+    }
+    */
+    it->setText(pattern, status);
+    if (U_FAILURE(status)) {
+        delete it;
+        return;
+    }
+
+    int mask = getMask(strength);
+
+    // See how many non-ignorable collation keys are in the text
+    normLen = 0;
+    int32_t elem;
+    while ((elem = it->next(status)) != CollationElementIterator::NULLORDER)
+    {
+        if (U_FAILURE(status)) {
+            return;
+        }
+        if ((elem & mask) != 0) {
+            normLen++;
+        }
+    }
+
+    // Save them all
+    valueList = new int32_t[normLen];
+    int expandLen = 0;
+    it->reset();
+    
+    for (int32_t i = 0; i < normLen; i++)
+    {
+        elem = it->next(status);
+        if (U_FAILURE(status)) {
+            return;
+        }
+
+        if ((elem & mask) != 0) {
+            valueList[i] = elem & mask;
+            
+        }
+        // Keep track of whether there are any expanding-character
+        // sequences that can result in one of the characters that's in
+        // the pattern.  If there are, we have to reduce the shift
+        // distances calculated below to account for it.
+        expandLen += it->getMaxExpansion(elem) - 1;
+    }
+
+    //
+    // We need to remember the size of the composed and decomposed
+    // versions of the string.  Standard Boyer-Moore shift calculations
+    // can be wrong by an amount up to that difference, since a small
+    // small number of characters in the pattern can map to a larger
+    // number in the text being searched, or vice-versa.
+    //
+    int uniLen = pattern.length();
+    maxLen = uprv_max(normLen, uniLen);
+    minLen = uprv_min(normLen, uniLen) - expandLen; 
+
+
+    /*
+    if (DEBUG) debug("normLen=" + normLen + ", expandLen=" + expandLen
+                    + ", maxLen=" + maxLen + ", minLen=" + minLen);
+    */
+    // Now initialize the shift tables
+    //
+    // NOTE: This is the most conservative way to build them.  If we had a way
+    // of knowing that there were no expanding/contracting chars in the rules,
+    // we could get rid of the "- 1" in the shiftTable calculations.
+    // But all of the default collators have at least one expansion or
+    // contraction, so it probably doesn't matter anyway.
+    //
+    for (i = 0; i < 256; i++) {
+        shiftTable[i] = backShiftTable[i] = minLen;
+    }
+
+    for (i = 0; i < normLen-1; i++) {
+        shiftTable[hash(valueList[i])] = uprv_max(minLen - i - 1, 1);
+    }
+    shiftTable[hash(valueList[normLen-1])] = 1;
+    
+    for (i = normLen - 1; i > 0; i--) {
+        backShiftTable[hash(valueList[i])] = i;
+    }
+    backShiftTable[hash(valueList[0])] = 1;
+    
+    /* dumpTables(); */
+}
+
+/**
+ * Method used by StringSearch to determine how far to the right to
+ * shift the pattern during a Boyer-Moore search.  
+ *
+ * @param curValue  The current value in the target text
+ * @param curIndex  The index in the pattern at which we failed to match
+ *                  curValue in the target text.
+ */
+int32_t StringSearch::getShift( int32_t curValue, int32_t curIndex ) const
+{
+    int32_t shiftAmt = shiftTable[hash(curValue)];
+
+    if (minLen != maxLen) {
+        int adjust = normLen - curIndex;
+        if (shiftAmt > adjust + 1) {
+//            if (DEBUG) debug("getShift: adjusting by " + adjust);
+            shiftAmt -= adjust;
+        }
+    }
+    return shiftAmt;
+}
+
+/**
+ * Method used by StringSearch to determine how far to the left to
+ * shift the pattern during a reverse Boyer-Moore search.  
+ *
+ * @param curValue  The current value in the target text
+ * @param curIndex  The index in the pattern at which we failed to match
+ *                  curValue in the target text.
+ */
+int32_t StringSearch::getBackShift( int32_t curValue, int32_t curIndex ) const 
+{
+    int shiftAmt = backShiftTable[hash(curValue)];
+
+    if (minLen != maxLen) {
+        int adjust = normLen - (minLen - curIndex);
+        if (shiftAmt > adjust + 1) {
+            // if (DEBUG) debug("getBackShift: adjusting by " + adjust);
+            shiftAmt -= adjust;
+        }
+    }
+    return shiftAmt;
+}
+
+/**
+ * Hash a collation element from its full size (32 bits) down into a
+ * value that can be used as an index into the shift tables.  Right
+ * now we do a modulus by the size of the hash table.
+ *
+ * TODO: At some point I should experiment to see whether a slightly
+ * more complicated hash function gives us a better distribution
+ * on multilingual text.  I doubt it will have much effect on
+ * performance, though.
+ */
+int32_t StringSearch::hash(int32_t order) 
+{
+    return CollationElementIterator::primaryOrder(order) % 256;
+}
+
--- a/icu4c/source/samples/search/strsrch.h
+++ b/icu4c/source/samples/search/strsrch.h
@ -0,0 +1,393 @@
+/*
+**********************************************************************
+*   Copyright (C) 1999-2000 IBM and others. All rights reserved.
+**********************************************************************
+*   Date        Name        Description
+*  03/22/2000   helena      Creation.
+**********************************************************************
+*/
+#ifndef STRSRCH_H
+#define STRSRCH_H
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/chariter.h"
+#include "unicode/tblcoll.h"
+#include "unicode/brkiter.h"
+#include "srchiter.h"
+
+class SearchIterator;
+/**
+ * <code>StringSearch</code> is a <code>SearchIterator</code> that provides
+ * language-sensitive text searching based on the comparison rules defined
+ * in a {@link RuleBasedCollator} object.
+ * Instances of <code>StringSearch</code> function as iterators
+ * maintain a current position and scan over text returning the index of
+ * characters where the pattern occurs and the length of each match.
+ * <p>
+ * <code>StringSearch</code> uses a version of the fast Boyer-Moore search
+ * algorithm that has been adapted to work with the large character set of
+ * Unicode.  See "Efficient Text Searching in Java", to be published in
+ * <i>Java Report</i> in February, 1999, for further information on the algorithm.
+ * <p>
+ * Consult the <code>SearchIterator</code> documentation for information on
+ * and examples of how to use instances of this class to implement text
+ * searching.  <code>SearchIterator</code> provides all of the necessary
+ * API; this class only provides constructors and internal implementation
+ * methods.
+ *
+ * @see SearchIterator
+ * @see RuleBasedCollator
+ *
+ * @author Laura Werner
+ * @version 1.0
+ */
+
+class StringSearch : public SearchIterator
+{
+public:
+    /**
+     * Construct a <code>StringSearch</code> object using a specific collator and set
+     * of boundary-detection rules.
+     * <p>
+     * @param pat       The text for which this object will search.
+     *
+     * @param target    The text in which to search for the pattern.
+     *
+     * @param coll      A <code>RuleBasedCollator</code> object which defines the
+     *                  language-sensitive comparison rules used to determine 
+     *                  whether text in the pattern and target matches.
+     *
+     * @param breaker   A <code>BreakIterator</code> object used to constrain the matches
+     *                  that are found.  Matches whose start and end indices
+     *                  in the target text are not boundaries as determined
+     *                  by the <code>BreakIterator</code> are ignored.  If this behavior
+     *                  is not desired, <code>null</code> can be passed in instead.
+     */
+    StringSearch(const UnicodeString& pat, 
+                        CharacterIterator* target,
+                        RuleBasedCollator* coll, 
+                        BreakIterator* breaker,
+                        UErrorCode& status);
+
+    /**
+     * Construct a <code>StringSearch</code> object using a specific collator.
+     * <p>
+     * @param pattern   The text for which this object will search.
+     *
+     * @param target    The text in which to search for the pattern.
+     *
+     * @param collator  A <code>RuleBasedCollator</code> object which defines the
+     *                  language-sensitive comparison rules used to determine 
+     *                  whether text in the pattern and target matches.
+     */
+    StringSearch(const UnicodeString& pattern,
+                 CharacterIterator* target,
+                 RuleBasedCollator* collator,
+                 UErrorCode& status);
+
+    /**
+     * copy constructor
+     */
+    StringSearch(const StringSearch& that);
+
+    /**
+     * Construct a <code>StringSearch</code> object using the collator and
+     * character boundary detection rules for a given locale
+     * <p>
+     * @param pattern   The text for which this object will search.
+     *
+     * @param target    The text in which to search for the pattern.
+     *
+     * @param loc       The locale whose collation and break-detection rules
+     *                  should be used.
+     *
+     * @exception       ClassCastException thrown if the collator for the specified
+     *                  locale is not a RuleBasedCollator.
+     */
+    StringSearch(const UnicodeString& pattern, 
+                 CharacterIterator* target, 
+                 const Locale& loc,
+                 UErrorCode& status);
+    /**
+     * Construct a <code>StringSearch</code> object using the collator for the default
+     * locale
+     * <p>
+     * @param pattern   The text for which this object will search.
+     *
+     * @param target    The text in which to search for the pattern.
+     *
+     * @param collator  A <code>RuleBasedCollator</code> object which defines the
+     *                  language-sensitive comparison rules used to determine 
+     *                  whether text in the pattern and target matches.
+     */
+    StringSearch(const UnicodeString& pattern, 
+                 const UnicodeString& target,
+                 UErrorCode& status);
+
+    virtual ~StringSearch(void);
+    /**
+     * Assignment operator.  Sets this iterator to have the same behavior,
+     * and iterate over the same text, as the one passed in.
+     */
+    StringSearch& operator=(const StringSearch& that);
+
+    /**
+     * Equality operator.  Returns TRUE if both BreakIterators are of the
+     * same class, have the same behavior, and iterate over the same text.
+     */
+    virtual bool_t operator==(const SearchIterator& that) const;
+
+    /**
+     * Not-equal operator.  If operator== returns TRUE, this returns FALSE,
+     * and vice versa.
+     */
+    bool_t operator!=(const SearchIterator& that) const;
+
+    /**
+     * Returns a newly-constructed RuleBasedBreakIterator with the same
+     * behavior, and iterating over the same text, as this one.
+     */
+    virtual SearchIterator* clone(void) const;
+
+    //-------------------------------------------------------------------
+    // Getters and Setters
+    //-------------------------------------------------------------------
+    
+    /**
+     * Sets this object's strength property. The strength determines the
+     * minimum level of difference considered significant during a
+     * search.  Generally, {@link Collator#TERTIARY} and 
+     * {@link Collator#IDENTICAL} indicate that all differences are
+     * considered significant, {@link Collator#SECONDARY} indicates
+     * that upper/lower case distinctions should be ignored, and
+     * {@link Collator#PRIMARY} indicates that both case and accents
+     * should be ignored.  However, the exact meanings of these constants
+     * are determined by individual Collator objects.
+     * <p>
+     * @see Collator#PRIMARY
+     * @see Collator#SECONDARY
+     * @see Collator#TERTIARY
+     * @see Collator#IDENTICAL
+     */
+     void setStrength(Collator::ECollationStrength newStrength, UErrorCode& status);
+    
+    
+    /**
+     * Returns this object's strength property, which indicates what level
+     * of differences are considered significant during a search.
+     * <p>
+     * @see #setStrength
+     */
+     Collator::ECollationStrength getStrength(void) const;
+    
+    /**
+     * Set the collator to be used for this string search.  Also changes
+     * the search strength to match that of the new collator.
+     * <p>
+     * This method causes internal data such as Boyer-Moore shift tables
+     * to be recalculated, but the iterator's position is unchanged.
+     * <p>
+     * @see #getCollator
+     */
+     void setCollator(const RuleBasedCollator* coll, UErrorCode& status);
+    
+    /**
+     * Return the RuleBasedCollator being used for this string search.
+     */
+    const RuleBasedCollator&     getCollator() const;
+    
+    /**
+     * Set the pattern for which to search.  
+     * This method causes internal data such as Boyer-Moore shift tables
+     * to be recalculated, but the iterator's position is unchanged.
+     */
+    void setPattern(const UnicodeString& pat, UErrorCode& status);
+    
+    /**
+     * Returns the pattern for which this object is searching.
+     */
+    const UnicodeString& getPattern() const;
+    
+    /**
+     * Set the target text which should be searched and resets the
+     * iterator's position to point before the start of the new text.
+     * This method is useful if you want to re-use an iterator to
+     * search for the same pattern within a different body of text.
+     */
+    virtual void setTarget(const UnicodeString& newText);    
+
+    /**
+     * Set the target text which should be searched and resets the
+     * iterator's position to point before the start of the target text.
+     * This method is useful if you want to re-use an iterator to
+     * search for the same pattern within a different body of text.
+     *
+     * @see #getTarget
+     */
+    virtual void adoptTarget(CharacterIterator* iterator);
+
+    /** Reset iterator
+     */
+    virtual void reset(void);
+    /**
+     * Returns a unique class ID POLYMORPHICALLY.  Pure virtual override.
+     * This method is to implement a simple version of RTTI, since not all
+     * C++ compilers support genuine RTTI.  Polymorphic operator==() and
+     * clone() methods call this method.
+     *
+     * @return          The class ID for this object. All objects of a
+     *                  given class have the same class ID.  Objects of
+     *                  other classes have different class IDs.
+     */
+    inline virtual UClassID getDynamicClassID(void) const;
+
+    /**
+     * Returns the class ID for this class.  This is useful only for
+     * comparing to a return value from getDynamicClassID().  For example:
+     *
+     *      Base* polymorphic_pointer = createPolymorphicObject();
+     *      if (polymorphic_pointer->getDynamicClassID() ==
+     *          Derived::getStaticClassID()) ...
+     *
+     * @return          The class ID for all objects of this class.
+     */
+    inline static UClassID getStaticClassID(void);
+
+protected:
+    //-------------------------------------------------------------------
+    // Privates
+    //-------------------------------------------------------------------
+
+    /**
+     * Search forward for matching text, starting at a given location.
+     * Clients should not call this method directly; instead they should call
+     * {@link SearchIterator#next}.
+     * <p>
+     * If a match is found, this method returns the index at which the match
+     * starts and calls {@link SearchIterator#setMatchLength}
+     * with the number of characters in the target
+     * text that make up the match.  If no match is found, the method returns
+     * <code>DONE</code> and does not call <tt>setMatchLength</tt>.
+     * <p>
+     * @param start The index in the target text at which the search starts.
+     *
+     * @return      The index at which the matched text in the target starts, or DONE
+     *              if no match was found.
+     * <p>
+     * @see SearchIterator#next
+     * @see SearchIterator#DONE
+     */
+    virtual int32_t handleNext(int32_t start, UErrorCode& status);
+    /**
+     * Search backward for matching text ,starting at a given location.
+     * Clients should not call this method directly; instead they should call
+     * <code>SearchIterator.previous()</code>, which this method overrides.
+     * <p>
+     * If a match is found, this method returns the index at which the match
+     * starts and calls {@link SearchIterator#setMatchLength}
+     * with the number of characters in the target
+     * text that make up the match.  If no match is found, the method returns
+     * <code>DONE</code> and does not call <tt>setMatchLength</tt>.
+     * <p>
+     * @param start The index in the target text at which the search starts.
+     *
+     * @return      The index at which the matched text in the target starts, or DONE
+     *              if no match was found.
+     * <p>
+     * @see SearchIterator#previous
+     * @see SearchIterator#DONE
+     */
+    virtual int32_t handlePrev(int32_t start, UErrorCode& status);
+private:
+    /**
+     * Return a bitmask that will select only the portions of a collation 
+     * element that are significant at the given strength level.
+     */
+    static int32_t getMask(Collator::ECollationStrength strength);
+    
+
+    void initialize(UErrorCode& status);
+    /**
+     * Method used by StringSearch to determine how far to the right to
+     * shift the pattern during a Boyer-Moore search.  
+     *
+     * @param curValue  The current value in the target text
+     * @param curIndex  The index in the pattern at which we failed to match
+     *                  curValue in the target text.
+     */
+    int32_t getShift( int32_t curValue, int32_t curIndex ) const;
+
+    /**
+     * Method used by StringSearch to determine how far to the left to
+     * shift the pattern during a reverse Boyer-Moore search.  
+     *
+     * @param curValue  The current value in the target text
+     * @param curIndex  The index in the pattern at which we failed to match
+     *                  curValue in the target text.
+     */
+    int32_t getBackShift( int32_t curValue, int32_t curIndex ) const;
+
+    /**
+     * Hash a collation element from its full size (32 bits) down into a
+     * value that can be used as an index into the shift tables.  Right
+     * now we do a modulus by the size of the hash table.
+     *
+     * TODO: At some point I should experiment to see whether a slightly
+     * more complicated hash function gives us a better distribution
+     * on multilingual text.  I doubt it will have much effect on
+     * performance, though.
+     */
+    static int32_t hash(int32_t order);
+
+    //------------------------------------------------------------------------
+    // Private Data
+    //
+    CollationElementIterator      *iter;
+    RuleBasedCollator             *collator;
+    /* HSYS ? Why?  Changes to this will not affect collator.  no changes to the comparsion result */
+    Collator::ECollationStrength  strength;
+    
+    //------------------------------------------------------------------------
+    // Everything from here on down is the data used to represent the
+    // Boyer-Moore shift tables and the code that generates and manipulates
+    // them.
+    //    
+    int32_t         *valueList;
+    int32_t         valueListLen;
+    int32_t         shiftTable[256];
+    int32_t         backShiftTable[256];
+
+    UnicodeString   pattern;            // The pattern string
+    int32_t         normLen;        // num. of collation elements in pattern.
+    int32_t         minLen;         // Min of composed, decomposed versions
+    int32_t         maxLen;         // Max
+    CollationElementIterator *it;   // to be removed
+
+private:
+    /* to be removed */
+    void dumpTables();
+    /**
+     * Class ID
+     */
+    static char fgClassID;
+};
+
+inline bool_t StringSearch::operator!=(const SearchIterator& that) const 
+{
+    return !operator==(that);
+}
+
+inline UClassID StringSearch::getDynamicClassID(void) const 
+{
+    return StringSearch::getStaticClassID();
+}
+
+inline UClassID StringSearch::getStaticClassID(void) 
+{
+    return (UClassID)(&fgClassID);
+}
+
+
+#endif
+