From 5d32123d29ed5ef80a19959138ca919f835c1242 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Sat, 5 Aug 2006 21:27:11 +0000 Subject: [PATCH] ICU-4558 fix and clarify utfxx.h API docs for input string indexes for U16_ and U8_ macros X-SVN-Rev: 19988 --- icu4c/source/common/unicode/utf.h | 10 ++++++++++ icu4c/source/common/unicode/utf16.h | 20 +++++++++---------- icu4c/source/common/unicode/utf8.h | 30 ++++++++++++++--------------- 3 files changed, 35 insertions(+), 25 deletions(-) diff --git a/icu4c/source/common/unicode/utf.h b/icu4c/source/common/unicode/utf.h index 32c2d74a2e6..2dfef63d66f 100644 --- a/icu4c/source/common/unicode/utf.h +++ b/icu4c/source/common/unicode/utf.h @@ -63,6 +63,14 @@ * malformed sequences can be expressed unambiguously with a distinct subrange * of Unicode code points.) * + * The regular "safe" macros require that the initial, passed-in string index + * is within bounds. They only check the index when they read more than one + * code unit. This is usually done with code similar to the following loop: + *
while(i
+ *
  * When it is safe to assume that text is well-formed UTF-16
  * (does not contain single, unpaired surrogates), then one can use
  * U16_..._UNSAFE macros.
@@ -80,6 +88,8 @@
  * The unsafe UTF-8 macros are entirely implemented inside the macro definitions
  * and are fast, while the safe UTF-8 macros call functions for all but the
  * trivial (ASCII) cases.
+ * (ICU 3.6 optimizes U8_NEXT() and U8_APPEND() to handle most other common
+ * characters inline as well.)
  *
  * Unlike with UTF-16, malformed sequences cannot be expressed with distinct
  * code point values (0..U+10ffff). They are indicated with negative values instead.
diff --git a/icu4c/source/common/unicode/utf16.h b/icu4c/source/common/unicode/utf16.h
index 217c27429b7..cd8c5c1ed10 100644
--- a/icu4c/source/common/unicode/utf16.h
+++ b/icu4c/source/common/unicode/utf16.h
@@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1999-2005, International Business Machines
+*   Copyright (C) 1999-2006, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@@ -179,7 +179,7 @@
  *
  * @param s const UChar * string
  * @param start starting string offset (usually 0)
- * @param i string offset, start<=i>6)|0xc0); \
         (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
-    } else if((uint32_t)(c)<=0xd7ff && (i)+2<(length)) { \
+    } else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \
         (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
         (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
         (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
     } else { \
-        (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, &(isError)); \
+        (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(capacity), c, &(isError)); \
     } \
 }
 
@@ -362,7 +362,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
  * "Safe" macro, checks for illegal sequences and for string boundaries.
  *
  * @param s const uint8_t * string
- * @param i string offset, i