ICU-9917 Try to make the Unicode file comment stripping tool work for UCA files too.

X-SVN-Rev: 33141
2025-04-08 15:05:53 +00:00 · 2013-02-07 23:20:01 +00:00 · 2013-02-07 23:20:01 +00:00 · e7057c6ddc
commit e7057c6ddc
parent 96f4a704b2
1 changed files with 18 additions and 24 deletions
--- a/tools/unicode/c/genprops/misc/ucdstrip.c
+++ b/tools/unicode/c/genprops/misc/ucdstrip.c
@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 2003, International Business Machines
+*   Copyright (C) 2003-2013, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
@ -22,35 +22,29 @@

 #include <stdio.h>
 #include <string.h>
-#include <stdlib.h>
-
-/* return the first character position after the end of the data */
-static char *
-endOfData(const char *l) {
-    char *end;
-    char c;
-
-    end=strchr(l, '#');
-    if(end!=NULL) {
-        /* ignore whitespace before the comment */
-        while(l!=end && ((c=*(end-1))==' ' || c=='\t')) {
-            --end;
-        }
-    } else {
-        end=strchr(l, 0);
-    }
-    return end;
-}

 extern int
 main(int argc, const char *argv[]) {
    static char line[2000];
-    char *end;

+    /*
+     * Careful: Do not strip a comment right after the
+     * UTF-8 signature byte sequence EF BB BF (U+FEFF "BOM")
+     * which can occur on the first line of a UTF-8 text file.
+     */
    while(gets(line)!=NULL) {
-        if(strtol(line, &end, 16)>=0 && end!=line) {
-            /* code point or range followed by semicolon and data, remove comment */
-            *endOfData(line)=0;
+        char *end=strrchr(line, '#');
+        char c;
+        /*
+         * Assume that a data line comment is preceded by some white space.
+         * This also protects data like '#' in UCA_Rules.txt.
+         */
+        if(end!=NULL && end!=line && ((c=*(end-1))==' ' || c=='\t')) {
+            /* ignore whitespace before the comment */
+            while(end!=line && ((c=*(end-1))==' ' || c=='\t')) {
+                --end;
+            }
+            *end=0;
        }
        puts(line);
    }