From dd372dc1047c601113ee5a472bdcc9be2a82f056 Mon Sep 17 00:00:00 2001 From: Jeff Genovy <29107334+jefgen@users.noreply.github.com> Date: Wed, 9 Sep 2020 22:01:00 -0700 Subject: [PATCH] ICU-21196 Clarify API docs that input strings must out-live the object in ucoleiter.h and usearch.h --- icu4c/source/i18n/unicode/ucoleitr.h | 26 ++-- icu4c/source/i18n/unicode/usearch.h | 197 +++++++++++++++------------ 2 files changed, 124 insertions(+), 99 deletions(-) diff --git a/icu4c/source/i18n/unicode/ucoleitr.h b/icu4c/source/i18n/unicode/ucoleitr.h index 7d24b44ed4a..d50003622a2 100644 --- a/icu4c/source/i18n/unicode/ucoleitr.h +++ b/icu4c/source/i18n/unicode/ucoleitr.h @@ -23,7 +23,7 @@ #if !UCONFIG_NO_COLLATION /** - * This indicates an error has occured during processing or if no more CEs is + * This indicates an error has occurred during processing or if no more CEs is * to be returned. * @stable ICU 2.0 */ @@ -66,14 +66,14 @@ typedef struct UCollationElements UCollationElements; * . UCollationElements *c; * . UCollatorOld *coll; * . UErrorCode success = U_ZERO_ERROR; - * . s=(UChar*)malloc(sizeof(UChar) * (strlen("This is a test")+1) ); - * . u_uastrcpy(s, "This is a test"); + * . str=(UChar*)malloc(sizeof(UChar) * (strlen("This is a test")+1) ); + * . u_uastrcpy(str, "This is a test"); * . coll = ucol_open(NULL, &success); * . c = ucol_openElements(coll, str, u_strlen(str), &status); * . order = ucol_next(c, &success); * . ucol_reset(c); * . order = ucol_prev(c, &success); - * . free(s); + * . free(str); * . ucol_close(coll); * . ucol_closeElements(c); * . } @@ -102,6 +102,10 @@ typedef struct UCollationElements UCollationElements; /** * Open the collation elements for a string. * + * The UCollationElements retains a pointer to the supplied text. + * The caller must not modify or delete the text while the UCollationElements + * object is used to iterate over this text. + * * @param coll The collator containing the desired collation rules. * @param text The text to iterate over. * @param textLength The number of characters in text, or -1 if null-terminated @@ -115,7 +119,6 @@ ucol_openElements(const UCollator *coll, int32_t textLength, UErrorCode *status); - /** * get a hash code for a key... Not very useful! * @param key the given key. @@ -153,7 +156,7 @@ ucol_reset(UCollationElements *elems); * @param elems The UCollationElements containing the text. * @param status A pointer to a UErrorCode to receive any errors. * @return The next collation elements ordering, otherwise returns UCOL_NULLORDER - * if an error has occured or if the end of string has been reached + * if an error has occurred or if the end of string has been reached * @stable ICU 2.0 */ U_CAPI int32_t U_EXPORT2 @@ -168,7 +171,7 @@ ucol_next(UCollationElements *elems, UErrorCode *status); * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack * buffer has been exhausted. * @return The previous collation elements ordering, otherwise returns - * UCOL_NULLORDER if an error has occured or if the start of string has + * UCOL_NULLORDER if an error has occurred or if the start of string has * been reached. * @stable ICU 2.0 */ @@ -194,6 +197,11 @@ ucol_getMaxExpansion(const UCollationElements *elems, int32_t order); * Property settings for collation will remain the same. * In order to reset the iterator to the current collation property settings, * the API reset() has to be called. + * + * The UCollationElements retains a pointer to the supplied text. + * The caller must not modify or delete the text while the UCollationElements + * object is used to iterate over this text. + * * @param elems The UCollationElements to set. * @param text The source text containing the collation elements. * @param textLength The length of text, or -1 if null-terminated. @@ -202,7 +210,7 @@ ucol_getMaxExpansion(const UCollationElements *elems, int32_t order); * @stable ICU 2.0 */ U_CAPI void U_EXPORT2 -ucol_setText( UCollationElements *elems, +ucol_setText( UCollationElements *elems, const UChar *text, int32_t textLength, UErrorCode *status); @@ -233,7 +241,7 @@ ucol_getOffset(const UCollationElements *elems); */ U_CAPI void U_EXPORT2 ucol_setOffset(UCollationElements *elems, - int32_t offset, + int32_t offset, UErrorCode *status); /** diff --git a/icu4c/source/i18n/unicode/usearch.h b/icu4c/source/i18n/unicode/usearch.h index 2076ae022f7..65747cb1ed4 100644 --- a/icu4c/source/i18n/unicode/usearch.h +++ b/icu4c/source/i18n/unicode/usearch.h @@ -28,8 +28,8 @@ * \brief C API: StringSearch * * C APIs for an engine that provides language-sensitive text searching based - * on the comparison rules defined in a UCollator data struct, - * see ucol.h. This ensures that language eccentricity can be + * on the comparison rules defined in a UCollator data struct, + * see ucol.h. This ensures that language eccentricity can be * handled, e.g. for the German collator, characters ß and SS will be matched * if case is chosen to be ignored. * See the @@ -57,18 +57,18 @@ * Option 2. will be the default. *

* This search has APIs similar to that of other text iteration mechanisms - * such as the break iterators in ubrk.h. Using these + * such as the break iterators in ubrk.h. Using these * APIs, it is easy to scan through text looking for all occurrences of * a given pattern. This search iterator allows changing of direction by - * calling a reset followed by a next or previous. - * Though a direction change can occur without calling reset first, + * calling a reset followed by a next or previous. + * Though a direction change can occur without calling reset first, * this operation comes with some speed penalty. * Generally, match results in the forward direction will match the result * matches in the backwards direction in the reverse order *

- * usearch.h provides APIs to specify the starting position - * within the text string to be searched, e.g. usearch_setOffset, - * usearch_preceding and usearch_following. Since the + * usearch.h provides APIs to specify the starting position + * within the text string to be searched, e.g. usearch_setOffset, + * usearch_preceding and usearch_following. Since the * starting position will be set as it is specified, please take note that * there are some dangerous positions which the search may render incorrect * results: @@ -104,7 +104,7 @@ * Though collator attributes will be taken into consideration while * performing matches, there are no APIs here for setting and getting the * attributes. These attributes can be set by getting the collator - * from usearch_getCollator and using the APIs in ucol.h. + * from usearch_getCollator and using the APIs in ucol.h. * Lastly to update String Search to the new collator attributes, * usearch_reset() has to be called. *

@@ -280,9 +280,13 @@ typedef enum { /* open and close ------------------------------------------------------ */ /** -* Creating a search iterator data struct using the argument locale language +* Creates a String Search iterator data struct using the argument locale language * rule set. A collator will be created in the process, which will be owned by -* this search and will be deleted in usearch_close. +* this String Search and will be deleted in usearch_close. +* +* The UStringSearch retains a pointer to both the pattern and text strings. +* The caller must not modify or delete them while using the UStringSearch. +* * @param pattern for matching * @param patternlength length of the pattern, -1 for null-termination * @param text text string @@ -291,9 +295,9 @@ typedef enum { * @param breakiter A BreakIterator that will be used to restrict the points * at which matches are detected. If a match is found, but * the match's start or end index is not a boundary as -* determined by the BreakIterator, the match will +* determined by the BreakIterator, the match will * be rejected and another will be searched for. -* If this parameter is NULL, no break detection is +* If this parameter is NULL, no break detection is * attempted. * @param status for errors if it occurs. If pattern or text is NULL, or if * patternlength or textlength is 0 then an @@ -301,53 +305,58 @@ typedef enum { * @return search iterator data structure, or NULL if there is an error. * @stable ICU 2.4 */ -U_CAPI UStringSearch * U_EXPORT2 usearch_open(const UChar *pattern, - int32_t patternlength, - const UChar *text, +U_CAPI UStringSearch * U_EXPORT2 usearch_open(const UChar *pattern, + int32_t patternlength, + const UChar *text, int32_t textlength, const char *locale, UBreakIterator *breakiter, UErrorCode *status); /** -* Creating a search iterator data struct using the argument collator language -* rule set. Note, user retains the ownership of this collator, thus the +* Creates a String Search iterator data struct using the argument collator language +* rule set. Note, user retains the ownership of this collator, thus the * responsibility of deletion lies with the user. -* NOTE: string search cannot be instantiated from a collator that has -* collate digits as numbers (CODAN) turned on. + +* NOTE: String Search cannot be instantiated from a collator that has +* collate digits as numbers (CODAN) turned on (UCOL_NUMERIC_COLLATION). +* +* The UStringSearch retains a pointer to both the pattern and text strings. +* The caller must not modify or delete them while using the UStringSearch. +* * @param pattern for matching * @param patternlength length of the pattern, -1 for null-termination * @param text text string * @param textlength length of the text string, -1 for null-termination * @param collator used for the language rules * @param breakiter A BreakIterator that will be used to restrict the points -* at which matches are detected. If a match is found, but -* the match's start or end index is not a boundary as -* determined by the BreakIterator, the match will -* be rejected and another will be searched for. -* If this parameter is NULL, no break detection is +* at which matches are detected. If a match is found, but +* the match's start or end index is not a boundary as +* determined by the BreakIterator, the match will +* be rejected and another will be searched for. +* If this parameter is NULL, no break detection is * attempted. -* @param status for errors if it occurs. If collator, pattern or text is NULL, -* or if patternlength or textlength is 0 then an +* @param status for errors if it occurs. If collator, pattern or text is NULL, +* or if patternlength or textlength is 0 then an * U_ILLEGAL_ARGUMENT_ERROR is returned. * @return search iterator data structure, or NULL if there is an error. * @stable ICU 2.4 */ U_CAPI UStringSearch * U_EXPORT2 usearch_openFromCollator( - const UChar *pattern, + const UChar *pattern, int32_t patternlength, - const UChar *text, + const UChar *text, int32_t textlength, const UCollator *collator, UBreakIterator *breakiter, UErrorCode *status); /** -* Destroying and cleaning up the search iterator data struct. -* If a collator is created in usearch_open, it will be destroyed here. -* @param searchiter data struct to clean up -* @stable ICU 2.4 -*/ + * Destroys and cleans up the String Search iterator data struct. + * If a collator was created in usearch_open, then it will be destroyed here. + * @param searchiter The UStringSearch to clean up + * @stable ICU 2.4 + */ U_CAPI void U_EXPORT2 usearch_close(UStringSearch *searchiter); #if U_SHOW_CPLUSPLUS_API @@ -386,14 +395,14 @@ U_NAMESPACE_END * @param status error status if any. * @stable ICU 2.4 */ -U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch, - int32_t position, +U_CAPI void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch, + int32_t position, UErrorCode *status); /** * Return the current index in the string text being searched. * If the iteration has gone past the end of the text (or past the beginning -* for a backwards search), USEARCH_DONE is returned. +* for a backwards search), USEARCH_DONE is returned. * @param strsrch search iterator data struct * @see #USEARCH_DONE * @stable ICU 2.4 @@ -403,7 +412,7 @@ U_CAPI int32_t U_EXPORT2 usearch_getOffset(const UStringSearch *strsrch); /** * Sets the text searching attributes located in the enum USearchAttribute * with values from the enum USearchAttributeValue. -* USEARCH_DEFAULT can be used for all attributes for resetting. +* USEARCH_DEFAULT can be used for all attributes for resetting. * @param strsrch search iterator data struct * @param attribute text attribute to be set * @param value text attribute value @@ -411,7 +420,7 @@ U_CAPI int32_t U_EXPORT2 usearch_getOffset(const UStringSearch *strsrch); * @see #usearch_getAttribute * @stable ICU 2.4 */ -U_CAPI void U_EXPORT2 usearch_setAttribute(UStringSearch *strsrch, +U_CAPI void U_EXPORT2 usearch_setAttribute(UStringSearch *strsrch, USearchAttribute attribute, USearchAttributeValue value, UErrorCode *status); @@ -431,12 +440,12 @@ U_CAPI USearchAttributeValue U_EXPORT2 usearch_getAttribute( /** * Returns the index to the match in the text string that was searched. * This call returns a valid result only after a successful call to -* usearch_first, usearch_next, usearch_previous, -* or usearch_last. +* usearch_first, usearch_next, usearch_previous, +* or usearch_last. * Just after construction, or after a searching method returns -* USEARCH_DONE, this method will return USEARCH_DONE. +* USEARCH_DONE, this method will return USEARCH_DONE. *

-* Use usearch_getMatchedLength to get the matched string length. +* Use usearch_getMatchedLength to get the matched string length. * @param strsrch search iterator data struct * @return index to a substring within the text string that is being * searched. @@ -453,10 +462,10 @@ U_CAPI int32_t U_EXPORT2 usearch_getMatchedStart( /** * Returns the length of text in the string which matches the search pattern. * This call returns a valid result only after a successful call to -* usearch_first, usearch_next, usearch_previous, -* or usearch_last. +* usearch_first, usearch_next, usearch_previous, +* or usearch_last. * Just after construction, or after a searching method returns -* USEARCH_DONE, this method will return 0. +* USEARCH_DONE, this method will return 0. * @param strsrch search iterator data struct * @return The length of the match in the string text, or 0 if there is no * match currently. @@ -472,17 +481,17 @@ U_CAPI int32_t U_EXPORT2 usearch_getMatchedLength( /** * Returns the text that was matched by the most recent call to -* usearch_first, usearch_next, usearch_previous, -* or usearch_last. +* usearch_first, usearch_next, usearch_previous, +* or usearch_last. * If the iterator is not pointing at a valid match (e.g. just after -* construction or after USEARCH_DONE has been returned, returns +* construction or after USEARCH_DONE has been returned, returns * an empty string. If result is not large enough to store the matched text, * result will be filled with the partial text and an U_BUFFER_OVERFLOW_ERROR * will be returned in status. result will be null-terminated whenever * possible. If the buffer fits the matched text exactly, a null-termination * is not possible, then a U_STRING_NOT_TERMINATED_ERROR set in status. * Pre-flighting can be either done with length = 0 or the API -* usearch_getMatchedLength. +* usearch_getMatchedLength. * @param strsrch search iterator data struct * @param result UChar buffer to store the matched string * @param resultCapacity length of the result buffer @@ -509,9 +518,9 @@ U_CAPI int32_t U_EXPORT2 usearch_getMatchedText(const UStringSearch *strsrch, * @param breakiter A BreakIterator that will be used to restrict the points * at which matches are detected. If a match is found, but * the match's start or end index is not a boundary as -* determined by the BreakIterator, the match will +* determined by the BreakIterator, the match will * be rejected and another will be searched for. -* If this parameter is NULL, no break detection is +* If this parameter is NULL, no break detection is * attempted. * @param status for errors if it occurs * @see #usearch_getBreakIterator @@ -524,8 +533,8 @@ U_CAPI void U_EXPORT2 usearch_setBreakIterator(UStringSearch *strsrch, /** * Returns the BreakIterator that is used to restrict the points at which * matches are detected. This will be the same object that was passed to the -* constructor or to usearch_setBreakIterator. Note that -* NULL +* constructor or to usearch_setBreakIterator. Note that +* NULL * is a legal value; it means that break detection should not be attempted. * @param strsrch search iterator data struct * @return break iterator used @@ -536,11 +545,15 @@ U_CAPI const UBreakIterator * U_EXPORT2 usearch_getBreakIterator( const UStringSearch *strsrch); #endif - + /** * Set the string text to be searched. Text iteration will hence begin at the * start of the text string. This method is useful if you want to re-use an * iterator to search for the same pattern within a different body of text. +* +* The UStringSearch retains a pointer to the text string. The caller must not +* modify or delete the string while using the UStringSearch. +* * @param strsrch search iterator data struct * @param text new string to look for match * @param textlength length of the new string, -1 for null-termination @@ -569,9 +582,9 @@ U_CAPI const UChar * U_EXPORT2 usearch_getText(const UStringSearch *strsrch, /** * Gets the collator used for the language rules. *

-* Deleting the returned UCollator before calling -* usearch_close would cause the string search to fail. -* usearch_close will delete the collator if this search owns it. +* Deleting the returned UCollator before calling +* usearch_close would cause the string search to fail. +* usearch_close will delete the collator if this search owns it. * @param strsrch search iterator data struct * @return collator * @stable ICU 2.4 @@ -597,6 +610,10 @@ U_CAPI void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch, * Sets the pattern used for matching. * Internal data like the Boyer Moore table will be recalculated, but the * iterator's position is unchanged. +* +* The UStringSearch retains a pointer to the pattern string. The caller must not +* modify or delete the string while using the UStringSearch. +* * @param strsrch search iterator data struct * @param pattern string * @param patternlength pattern length, -1 for null-terminated string @@ -628,13 +645,13 @@ U_CAPI const UChar * U_EXPORT2 usearch_getPattern( * Returns the first index at which the string text matches the search * pattern. * The iterator is adjusted so that its current index (as returned by -* usearch_getOffset) is the match position if one was found. -* If a match is not found, USEARCH_DONE will be returned and -* the iterator will be adjusted to the index USEARCH_DONE. +* usearch_getOffset) is the match position if one was found. +* If a match is not found, USEARCH_DONE will be returned and +* the iterator will be adjusted to the index USEARCH_DONE. * @param strsrch search iterator data struct * @param status for errors if it occurs * @return The character index of the first match, or -* USEARCH_DONE if there are no matches. +* USEARCH_DONE if there are no matches. * @see #usearch_getOffset * @see #USEARCH_DONE * @stable ICU 2.4 @@ -643,13 +660,13 @@ U_CAPI int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch, UErrorCode *status); /** -* Returns the first index equal or greater than position at which +* Returns the first index equal or greater than position at which * the string text * matches the search pattern. The iterator is adjusted so that its current -* index (as returned by usearch_getOffset) is the match position if +* index (as returned by usearch_getOffset) is the match position if * one was found. -* If a match is not found, USEARCH_DONE will be returned and -* the iterator will be adjusted to the index USEARCH_DONE +* If a match is not found, USEARCH_DONE will be returned and +* the iterator will be adjusted to the index USEARCH_DONE *

* Search positions that may render incorrect results are highlighted in the * header comments. If position is less than or greater than the text range @@ -657,8 +674,8 @@ U_CAPI int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch, * @param strsrch search iterator data struct * @param position to start the search at * @param status for errors if it occurs -* @return The character index of the first match following pos, -* or USEARCH_DONE if there are no matches. +* @return The character index of the first match following pos, +* or USEARCH_DONE if there are no matches. * @see #usearch_getOffset * @see #USEARCH_DONE * @stable ICU 2.4 @@ -670,13 +687,13 @@ U_CAPI int32_t U_EXPORT2 usearch_following(UStringSearch *strsrch, /** * Returns the last index in the target text at which it matches the search * pattern. The iterator is adjusted so that its current -* index (as returned by usearch_getOffset) is the match position if +* index (as returned by usearch_getOffset) is the match position if * one was found. -* If a match is not found, USEARCH_DONE will be returned and -* the iterator will be adjusted to the index USEARCH_DONE. +* If a match is not found, USEARCH_DONE will be returned and +* the iterator will be adjusted to the index USEARCH_DONE. * @param strsrch search iterator data struct * @param status for errors if it occurs -* @return The index of the first match, or USEARCH_DONE if there +* @return The index of the first match, or USEARCH_DONE if there * are no matches. * @see #usearch_getOffset * @see #USEARCH_DONE @@ -686,26 +703,26 @@ U_CAPI int32_t U_EXPORT2 usearch_last(UStringSearch *strsrch, UErrorCode *status); /** -* Returns the first index less than position at which the string text +* Returns the first index less than position at which the string text * matches the search pattern. The iterator is adjusted so that its current -* index (as returned by usearch_getOffset) is the match position if +* index (as returned by usearch_getOffset) is the match position if * one was found. -* If a match is not found, USEARCH_DONE will be returned and -* the iterator will be adjusted to the index USEARCH_DONE +* If a match is not found, USEARCH_DONE will be returned and +* the iterator will be adjusted to the index USEARCH_DONE *

* Search positions that may render incorrect results are highlighted in the * header comments. If position is less than or greater than the text range * for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned. *

-* When USEARCH_OVERLAP option is off, the last index of the -* result match is always less than position. -* When USERARCH_OVERLAP is on, the result match may span across -* position. +* When USEARCH_OVERLAP option is off, the last index of the +* result match is always less than position. +* When USERARCH_OVERLAP is on, the result match may span across +* position. * @param strsrch search iterator data struct * @param position index position the search is to begin at * @param status for errors if it occurs -* @return The character index of the first match preceding pos, -* or USEARCH_DONE if there are no matches. +* @return The character index of the first match preceding pos, +* or USEARCH_DONE if there are no matches. * @see #usearch_getOffset * @see #USEARCH_DONE * @stable ICU 2.4 @@ -718,14 +735,14 @@ U_CAPI int32_t U_EXPORT2 usearch_preceding(UStringSearch *strsrch, * Returns the index of the next point at which the string text matches the * search pattern, starting from the current position. * The iterator is adjusted so that its current -* index (as returned by usearch_getOffset) is the match position if +* index (as returned by usearch_getOffset) is the match position if * one was found. -* If a match is not found, USEARCH_DONE will be returned and -* the iterator will be adjusted to the index USEARCH_DONE +* If a match is not found, USEARCH_DONE will be returned and +* the iterator will be adjusted to the index USEARCH_DONE * @param strsrch search iterator data struct * @param status for errors if it occurs * @return The index of the next match after the current position, or -* USEARCH_DONE if there are no more matches. +* USEARCH_DONE if there are no more matches. * @see #usearch_first * @see #usearch_getOffset * @see #USEARCH_DONE @@ -738,14 +755,14 @@ U_CAPI int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch, * Returns the index of the previous point at which the string text matches * the search pattern, starting at the current position. * The iterator is adjusted so that its current -* index (as returned by usearch_getOffset) is the match position if +* index (as returned by usearch_getOffset) is the match position if * one was found. -* If a match is not found, USEARCH_DONE will be returned and -* the iterator will be adjusted to the index USEARCH_DONE +* If a match is not found, USEARCH_DONE will be returned and +* the iterator will be adjusted to the index USEARCH_DONE * @param strsrch search iterator data struct * @param status for errors if it occurs * @return The index of the previous match before the current position, -* or USEARCH_DONE if there are no more matches. +* or USEARCH_DONE if there are no more matches. * @see #usearch_last * @see #usearch_getOffset * @see #USEARCH_DONE