ICU-9917 Try to make the Unicode file comment stripping tool work for UCA files too.

X-SVN-Rev: 33141
This commit is contained in:
Markus Scherer 2013-02-07 23:20:01 +00:00
parent 96f4a704b2
commit e7057c6ddc

View file

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Copyright (C) 2003-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -22,35 +22,29 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
/* return the first character position after the end of the data */
static char *
endOfData(const char *l) {
char *end;
char c;
end=strchr(l, '#');
if(end!=NULL) {
/* ignore whitespace before the comment */
while(l!=end && ((c=*(end-1))==' ' || c=='\t')) {
--end;
}
} else {
end=strchr(l, 0);
}
return end;
}
extern int
main(int argc, const char *argv[]) {
static char line[2000];
char *end;
/*
* Careful: Do not strip a comment right after the
* UTF-8 signature byte sequence EF BB BF (U+FEFF "BOM")
* which can occur on the first line of a UTF-8 text file.
*/
while(gets(line)!=NULL) {
if(strtol(line, &end, 16)>=0 && end!=line) {
/* code point or range followed by semicolon and data, remove comment */
*endOfData(line)=0;
char *end=strrchr(line, '#');
char c;
/*
* Assume that a data line comment is preceded by some white space.
* This also protects data like '#' in UCA_Rules.txt.
*/
if(end!=NULL && end!=line && ((c=*(end-1))==' ' || c=='\t')) {
/* ignore whitespace before the comment */
while(end!=line && ((c=*(end-1))==' ' || c=='\t')) {
--end;
}
*end=0;
}
puts(line);
}