mirror of
https://github.com/unicode-org/icu.git
synced 2025-04-10 07:39:16 +00:00
ICU-10923 Adding --filterDir option to genrb.
- Reads, parses, and applies the filter file syntax. - Removes unused keys from the resource bundle. - Adds sample filter txt file with test in intltest.
This commit is contained in:
parent
1b5c721ef2
commit
7ec3357d70
13 changed files with 620 additions and 89 deletions
|
@ -195,6 +195,8 @@ void NewResourceBundleTest::runIndexedTest( int32_t index, UBool exec, const cha
|
|||
#endif
|
||||
|
||||
case 5: name = "TestGetByFallback"; if(exec) TestGetByFallback(); break;
|
||||
case 6: name = "TestFilter"; if(exec) TestFilter(); break;
|
||||
|
||||
default: name = ""; break; //needed to end loop
|
||||
}
|
||||
}
|
||||
|
@ -1198,5 +1200,74 @@ NewResourceBundleTest::TestGetByFallback() {
|
|||
status = U_ZERO_ERROR;
|
||||
|
||||
}
|
||||
|
||||
|
||||
#define REQUIRE_SUCCESS(status) { \
|
||||
if (status.errIfFailureAndReset("line %d", __LINE__)) { \
|
||||
return; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define REQUIRE_ERROR(expected, status) { \
|
||||
if (!status.expectErrorAndReset(expected, "line %d", __LINE__)) { \
|
||||
return; \
|
||||
} \
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests the --filterDir option in genrb.
|
||||
*
|
||||
* Input resource text file: test/testdata/filtertest.txt
|
||||
* Input filter rule file: test/testdata/filters/filtertest.txt
|
||||
*
|
||||
* The resource bundle should contain no keys matched by the filter
|
||||
* and should contain all other keys.
|
||||
*/
|
||||
void NewResourceBundleTest::TestFilter() {
|
||||
IcuTestErrorCode status(*this, "TestFilter");
|
||||
|
||||
ResourceBundle rb(loadTestData(status), "filtertest", status);
|
||||
REQUIRE_SUCCESS(status);
|
||||
assertEquals("rb", rb.getType(), URES_TABLE);
|
||||
|
||||
ResourceBundle alabama = rb.get("alabama", status);
|
||||
REQUIRE_SUCCESS(status);
|
||||
assertEquals("alabama", alabama.getType(), URES_TABLE);
|
||||
|
||||
ResourceBundle alaska = alabama.get("alaska", status);
|
||||
REQUIRE_SUCCESS(status);
|
||||
assertEquals("alaska", alaska.getType(), URES_TABLE);
|
||||
|
||||
ResourceBundle arizona = alaska.get("arizona", status);
|
||||
REQUIRE_SUCCESS(status);
|
||||
assertEquals("arizona", arizona.getType(), URES_STRING);
|
||||
|
||||
assertEquals("arizona", u"arkansas", arizona.getString(status));
|
||||
REQUIRE_SUCCESS(status);
|
||||
|
||||
// Filter: california should not be included
|
||||
ResourceBundle california = alaska.get("california", status);
|
||||
REQUIRE_ERROR(U_MISSING_RESOURCE_ERROR, status);
|
||||
|
||||
// Filter: connecticut should not be included
|
||||
ResourceBundle connecticut = alabama.get("connecticut", status);
|
||||
REQUIRE_ERROR(U_MISSING_RESOURCE_ERROR, status);
|
||||
|
||||
ResourceBundle fornia = rb.get("fornia", status);
|
||||
REQUIRE_SUCCESS(status);
|
||||
assertEquals("fornia", fornia.getType(), URES_TABLE);
|
||||
|
||||
ResourceBundle hawaii = fornia.get("hawaii", status);
|
||||
REQUIRE_SUCCESS(status);
|
||||
assertEquals("hawaii", hawaii.getType(), URES_STRING);
|
||||
|
||||
assertEquals("hawaii", u"idaho", hawaii.getString(status));
|
||||
REQUIRE_SUCCESS(status);
|
||||
|
||||
// Filter: illinois should not be included
|
||||
ResourceBundle illinois = fornia.get("illinois", status);
|
||||
REQUIRE_ERROR(U_MISSING_RESOURCE_ERROR, status);
|
||||
}
|
||||
|
||||
//eof
|
||||
|
||||
|
|
|
@ -38,6 +38,8 @@ public:
|
|||
|
||||
void TestGetByFallback(void);
|
||||
|
||||
void TestFilter(void);
|
||||
|
||||
private:
|
||||
/**
|
||||
* The assignment operator has no real implementation.
|
||||
|
|
11
icu4c/source/test/testdata/BUILDRULES.py
vendored
11
icu4c/source/test/testdata/BUILDRULES.py
vendored
|
@ -98,6 +98,17 @@ def generate_rb(config, glob, common_vars):
|
|||
tool = IcuTool("genrb"),
|
||||
args = "-s {IN_DIR} -d {TMP_DIR} {INPUT_FILES[0]}",
|
||||
format_with = {}
|
||||
),
|
||||
SingleExecutionRequest(
|
||||
name = "filtertest",
|
||||
category = "tests",
|
||||
dep_files = [],
|
||||
input_files = [InFile("filtertest.txt")],
|
||||
output_files = [OutFile("filtertest.res")],
|
||||
tool = IcuTool("genrb"),
|
||||
args = "-s {IN_DIR} -d {OUT_DIR} -i {OUT_DIR} "
|
||||
"--filterDir {IN_DIR}/filters filtertest.txt",
|
||||
format_with = {}
|
||||
)
|
||||
]
|
||||
|
||||
|
|
6
icu4c/source/test/testdata/filters/filtertest.txt
vendored
Normal file
6
icu4c/source/test/testdata/filters/filtertest.txt
vendored
Normal file
|
@ -0,0 +1,6 @@
|
|||
# Copyright (C) 2018 and later: Unicode, Inc. and others.
|
||||
# License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
-/alabama
|
||||
+/alabama/alaska/arizona
|
||||
-/fornia/illinois
|
20
icu4c/source/test/testdata/filtertest.txt
vendored
Normal file
20
icu4c/source/test/testdata/filtertest.txt
vendored
Normal file
|
@ -0,0 +1,20 @@
|
|||
// Copyright (C) 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
filtertest {
|
||||
alabama {
|
||||
alaska {
|
||||
arizona {"arkansas"}
|
||||
california {"colorado"}
|
||||
}
|
||||
connecticut {
|
||||
arizona {"delaware"}
|
||||
california {"florida"}
|
||||
}
|
||||
}
|
||||
// test suffixes
|
||||
fornia {
|
||||
hawaii {"idaho"}
|
||||
illinois {"indiana"}
|
||||
}
|
||||
}
|
|
@ -38,7 +38,7 @@ CPPFLAGS += -DUNISTR_FROM_CHAR_EXPLICIT=explicit -DUNISTR_FROM_STRING_EXPLICIT=e
|
|||
LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
|
||||
|
||||
OBJECTS = errmsg.o genrb.o parse.o read.o reslist.o ustr.o rbutil.o \
|
||||
wrtjava.o rle.o wrtxml.o prscmnts.o
|
||||
wrtjava.o rle.o wrtxml.o prscmnts.o filterrb.o
|
||||
DERB_OBJ = derb.o
|
||||
|
||||
DEPS = $(OBJECTS:.o=.d)
|
||||
|
|
167
icu4c/source/tools/genrb/filterrb.cpp
Normal file
167
icu4c/source/tools/genrb/filterrb.cpp
Normal file
|
@ -0,0 +1,167 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#include <iostream>
|
||||
#include <stack>
|
||||
|
||||
#include "filterrb.h"
|
||||
#include "errmsg.h"
|
||||
|
||||
|
||||
const char* PathFilter::kEInclusionNames[] = {
|
||||
"INCLUDE",
|
||||
"PARTIAL",
|
||||
"EXCLUDE"
|
||||
};
|
||||
|
||||
|
||||
ResKeyPath::ResKeyPath() {}
|
||||
|
||||
ResKeyPath::ResKeyPath(const std::string& path, UErrorCode& status) {
|
||||
if (path.empty() || path[0] != '/') {
|
||||
std::cerr << "genrb error: path must start with /: " << path << std::endl;
|
||||
status = U_PARSE_ERROR;
|
||||
return;
|
||||
}
|
||||
size_t i;
|
||||
size_t j = 0;
|
||||
while (true) {
|
||||
i = j + 1;
|
||||
j = path.find('/', i);
|
||||
std::string key = path.substr(i, j - i);
|
||||
if (key.empty()) {
|
||||
std::cerr << "genrb error: empty subpaths and trailing slashes are not allowed: " << path << std::endl;
|
||||
status = U_PARSE_ERROR;
|
||||
return;
|
||||
}
|
||||
push(key);
|
||||
if (j == std::string::npos) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ResKeyPath::push(const std::string& key) {
|
||||
fPath.push_back(key);
|
||||
}
|
||||
|
||||
void ResKeyPath::pop() {
|
||||
fPath.pop_back();
|
||||
}
|
||||
|
||||
const std::list<std::string>& ResKeyPath::pieces() const {
|
||||
return fPath;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const ResKeyPath& value) {
|
||||
if (value.pieces().empty()) {
|
||||
out << "/";
|
||||
} else for (auto& key : value.pieces()) {
|
||||
out << "/" << key;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
void SimpleRuleBasedPathFilter::addRule(const std::string& ruleLine, UErrorCode& status) {
|
||||
if (ruleLine.empty()) {
|
||||
std::cerr << "genrb error: empty filter rules are not allowed" << std::endl;
|
||||
status = U_PARSE_ERROR;
|
||||
return;
|
||||
}
|
||||
bool inclusionRule = false;
|
||||
if (ruleLine[0] == '+') {
|
||||
inclusionRule = true;
|
||||
} else if (ruleLine[0] != '-') {
|
||||
std::cerr << "genrb error: rules must start with + or -: " << ruleLine << std::endl;
|
||||
status = U_PARSE_ERROR;
|
||||
return;
|
||||
}
|
||||
ResKeyPath path(ruleLine.substr(1), status);
|
||||
addRule(path, inclusionRule, status);
|
||||
}
|
||||
|
||||
void SimpleRuleBasedPathFilter::addRule(const ResKeyPath& path, bool inclusionRule, UErrorCode& status) {
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
Tree* node = &fRoot;
|
||||
for (auto& key : path.pieces()) {
|
||||
// note: operator[] auto-constructs default values
|
||||
node = &node->fChildren[key];
|
||||
}
|
||||
if (isVerbose() && (node->fIncluded != PARTIAL || !node->fChildren.empty())) {
|
||||
std::cout << "genrb info: rule on path " << path
|
||||
<< " overrides previous rules" << std::endl;
|
||||
}
|
||||
node->fIncluded = inclusionRule ? INCLUDE : EXCLUDE;
|
||||
node->fChildren.clear();
|
||||
}
|
||||
|
||||
PathFilter::EInclusion SimpleRuleBasedPathFilter::match(const ResKeyPath& path) const {
|
||||
const Tree* node = &fRoot;
|
||||
|
||||
// defaultResult "bubbles up" the nearest "definite" inclusion/exclusion rule
|
||||
EInclusion defaultResult = INCLUDE;
|
||||
if (node->fIncluded != PARTIAL) {
|
||||
// rules handled here: "+/" and "-/"
|
||||
defaultResult = node->fIncluded;
|
||||
}
|
||||
|
||||
// isLeaf is whether the filter tree can provide no additional information
|
||||
// even if additional subpaths are added to the given key
|
||||
bool isLeaf = false;
|
||||
|
||||
for (auto& key : path.pieces()) {
|
||||
auto child = node->fChildren.find(key);
|
||||
// Leaf case 1: input path descends outside the filter tree
|
||||
if (child == node->fChildren.end()) {
|
||||
isLeaf = true;
|
||||
break;
|
||||
}
|
||||
node = &child->second;
|
||||
if (node->fIncluded != PARTIAL) {
|
||||
defaultResult = node->fIncluded;
|
||||
}
|
||||
}
|
||||
|
||||
// Leaf case 2: input path exactly matches a filter leaf
|
||||
if (node->fChildren.empty()) {
|
||||
isLeaf = true;
|
||||
}
|
||||
|
||||
// Always return PARTIAL if we are not at a leaf
|
||||
if (!isLeaf) {
|
||||
return PARTIAL;
|
||||
}
|
||||
|
||||
// If leaf node is PARTIAL, return the default
|
||||
if (node->fIncluded == PARTIAL) {
|
||||
return defaultResult;
|
||||
}
|
||||
|
||||
return node->fIncluded;
|
||||
}
|
||||
|
||||
void SimpleRuleBasedPathFilter::Tree::print(std::ostream& out, int32_t indent) const {
|
||||
for (int32_t i=0; i<indent; i++) out << "\t";
|
||||
out << "included: " << kEInclusionNames[fIncluded] << std::endl;
|
||||
for (auto& child : fChildren) {
|
||||
for (int32_t i=0; i<indent; i++) out << "\t";
|
||||
out << child.first << ": {" << std::endl;
|
||||
child.second.print(out, indent + 1);
|
||||
for (int32_t i=0; i<indent; i++) out << "\t";
|
||||
out << "}" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void SimpleRuleBasedPathFilter::print(std::ostream& out) const {
|
||||
out << "SimpleRuleBasedPathFilter {" << std::endl;
|
||||
fRoot.print(out, 1);
|
||||
out << "}";
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const SimpleRuleBasedPathFilter& value) {
|
||||
value.print(out);
|
||||
return out;
|
||||
}
|
124
icu4c/source/tools/genrb/filterrb.h
Normal file
124
icu4c/source/tools/genrb/filterrb.h
Normal file
|
@ -0,0 +1,124 @@
|
|||
// © 2018 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
#ifndef __FILTERRB_H__
|
||||
#define __FILTERRB_H__
|
||||
|
||||
#include <list>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <ostream>
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
|
||||
|
||||
/**
|
||||
* Represents an absolute path into a resource bundle.
|
||||
* For example: "/units/length/meter"
|
||||
*/
|
||||
class ResKeyPath {
|
||||
public:
|
||||
/** Constructs an empty path (top of tree) */
|
||||
ResKeyPath();
|
||||
|
||||
/** Constructs from a string path */
|
||||
ResKeyPath(const std::string& path, UErrorCode& status);
|
||||
|
||||
void push(const std::string& key);
|
||||
void pop();
|
||||
|
||||
const std::list<std::string>& pieces() const;
|
||||
|
||||
private:
|
||||
std::list<std::string> fPath;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const ResKeyPath& value);
|
||||
|
||||
|
||||
/**
|
||||
* Interface used to determine whether to include or reject pieces of a
|
||||
* resource bundle based on their absolute path.
|
||||
*/
|
||||
class PathFilter {
|
||||
public:
|
||||
enum EInclusion {
|
||||
INCLUDE,
|
||||
PARTIAL,
|
||||
EXCLUDE
|
||||
};
|
||||
|
||||
static const char* kEInclusionNames[];
|
||||
|
||||
/**
|
||||
* Returns an EInclusion on whether or not the given path should be included.
|
||||
*
|
||||
* INCLUDE = include the whole subtree
|
||||
* PARTIAL = recurse into the subtree
|
||||
* EXCLUDE = reject the whole subtree
|
||||
*/
|
||||
virtual EInclusion match(const ResKeyPath& path) const = 0;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Implementation of PathFilter for a list of inclusion/exclusion rules.
|
||||
*
|
||||
* For example, given this list of filter rules:
|
||||
*
|
||||
* -/alabama
|
||||
* +/alabama/alaska/arizona
|
||||
* -/fornia/hawaii
|
||||
*
|
||||
* You get the following structure:
|
||||
*
|
||||
* SimpleRuleBasedPathFilter {
|
||||
* included: PARTIAL
|
||||
* alabama: {
|
||||
* included: EXCLUDE
|
||||
* alaska: {
|
||||
* included: PARTIAL
|
||||
* arizona: {
|
||||
* included: INCLUDE
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
* fornia: {
|
||||
* included: PARTIAL
|
||||
* hawaii: {
|
||||
* included: EXCLUDE
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
class SimpleRuleBasedPathFilter : public PathFilter {
|
||||
public:
|
||||
void addRule(const std::string& ruleLine, UErrorCode& status);
|
||||
void addRule(const ResKeyPath& path, bool inclusionRule, UErrorCode& status);
|
||||
|
||||
EInclusion match(const ResKeyPath& path) const override;
|
||||
|
||||
void print(std::ostream& out) const;
|
||||
|
||||
private:
|
||||
struct Tree {
|
||||
/**
|
||||
* Information on the USER-SPECIFIED inclusion/exclusion.
|
||||
*
|
||||
* INCLUDE = this path exactly matches a "+" rule
|
||||
* PARTIAL = this path does not match any rule, but subpaths exist
|
||||
* EXCLUDE = this path exactly matches a "-" rule
|
||||
*/
|
||||
EInclusion fIncluded = PARTIAL;
|
||||
std::map<std::string, Tree> fChildren;
|
||||
|
||||
void print(std::ostream& out, int32_t indent) const;
|
||||
};
|
||||
|
||||
Tree fRoot;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const SimpleRuleBasedPathFilter& value);
|
||||
|
||||
|
||||
#endif //__FILTERRB_H__
|
|
@ -18,6 +18,11 @@
|
|||
*******************************************************************************
|
||||
*/
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <string>
|
||||
|
||||
#include <assert.h>
|
||||
#include "genrb.h"
|
||||
#include "unicode/localpointer.h"
|
||||
|
@ -25,13 +30,15 @@
|
|||
#include "unicode/utf16.h"
|
||||
#include "charstr.h"
|
||||
#include "cmemory.h"
|
||||
#include "filterrb.h"
|
||||
#include "reslist.h"
|
||||
#include "ucmndata.h" /* TODO: for reading the pool bundle */
|
||||
|
||||
U_NAMESPACE_USE
|
||||
|
||||
/* Protos */
|
||||
void processFile(const char *filename, const char* cp, const char *inputDir, const char *outputDir,
|
||||
void processFile(const char *filename, const char* cp,
|
||||
const char *inputDir, const char *outputDir, const char *filterDir,
|
||||
const char *packageName,
|
||||
SRBRoot *newPoolBundle, UBool omitBinaryCollation, UErrorCode &status);
|
||||
static char *make_res_filename(const char *filename, const char *outputDir,
|
||||
|
@ -76,7 +83,8 @@ enum
|
|||
FORMAT_VERSION,
|
||||
WRITE_POOL_BUNDLE,
|
||||
USE_POOL_BUNDLE,
|
||||
INCLUDE_UNIHAN_COLL
|
||||
INCLUDE_UNIHAN_COLL,
|
||||
FILTERDIR
|
||||
};
|
||||
|
||||
UOption options[]={
|
||||
|
@ -102,6 +110,7 @@ UOption options[]={
|
|||
UOPTION_DEF("writePoolBundle", '\x01', UOPT_OPTIONAL_ARG),/* 19 */
|
||||
UOPTION_DEF("usePoolBundle", '\x01', UOPT_OPTIONAL_ARG),/* 20 */
|
||||
UOPTION_DEF("includeUnihanColl", '\x01', UOPT_NO_ARG),/* 21 */ /* temporary, don't display in usage info */
|
||||
UOPTION_DEF("filterDir", '\x01', UOPT_OPTIONAL_ARG), /* 22 */
|
||||
};
|
||||
|
||||
static UBool write_java = FALSE;
|
||||
|
@ -121,6 +130,7 @@ main(int argc,
|
|||
const char *arg = NULL;
|
||||
const char *outputDir = NULL; /* NULL = no output directory, use current */
|
||||
const char *inputDir = NULL;
|
||||
const char *filterDir = NULL;
|
||||
const char *encoding = "";
|
||||
int i;
|
||||
UBool illegalArg = FALSE;
|
||||
|
@ -228,6 +238,9 @@ main(int argc,
|
|||
"\t --usePoolBundle [directory] point to keys from the pool.res keys pool bundle if they are available there;\n"
|
||||
"\t makes .res files smaller but dependent on the pool bundle\n"
|
||||
"\t (--writePoolBundle and --usePoolBundle cannot be combined)\n");
|
||||
fprintf(stderr,
|
||||
"\t --filterDir Input directory where filter files are available.\n"
|
||||
"\t For more on filter files, see Python buildtool.\n");
|
||||
|
||||
return illegalArg ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
|
||||
}
|
||||
|
@ -254,6 +267,10 @@ main(int argc,
|
|||
outputDir = options[DESTDIR].value;
|
||||
}
|
||||
|
||||
if (options[FILTERDIR].doesOccur) {
|
||||
filterDir = options[FILTERDIR].value;
|
||||
}
|
||||
|
||||
if(options[ENCODING].doesOccur) {
|
||||
encoding = options[ENCODING].value;
|
||||
}
|
||||
|
@ -524,7 +541,7 @@ main(int argc,
|
|||
if (isVerbose()) {
|
||||
printf("Processing file \"%s\"\n", theCurrentFileName.data());
|
||||
}
|
||||
processFile(arg, encoding, inputDir, outputDir, NULL,
|
||||
processFile(arg, encoding, inputDir, outputDir, filterDir, NULL,
|
||||
newPoolBundle.getAlias(),
|
||||
options[NO_BINARY_COLLATION].doesOccur, status);
|
||||
}
|
||||
|
@ -558,16 +575,16 @@ main(int argc,
|
|||
/* Process a file */
|
||||
void
|
||||
processFile(const char *filename, const char *cp,
|
||||
const char *inputDir, const char *outputDir, const char *packageName,
|
||||
const char *inputDir, const char *outputDir, const char *filterDir,
|
||||
const char *packageName,
|
||||
SRBRoot *newPoolBundle,
|
||||
UBool omitBinaryCollation, UErrorCode &status) {
|
||||
LocalPointer<SRBRoot> data;
|
||||
UCHARBUF *ucbuf = NULL;
|
||||
char *rbname = NULL;
|
||||
char *openFileName = NULL;
|
||||
char *inputDirBuf = NULL;
|
||||
LocalUCHARBUFPointer ucbuf;
|
||||
CharString openFileName;
|
||||
CharString inputDirBuf;
|
||||
|
||||
char outputFileName[256];
|
||||
char outputFileName[256];
|
||||
|
||||
int32_t dirlen = 0;
|
||||
int32_t filelen = 0;
|
||||
|
@ -584,8 +601,6 @@ processFile(const char *filename, const char *cp,
|
|||
|
||||
if(inputDir == NULL) {
|
||||
const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
|
||||
openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
|
||||
openFileName[0] = '\0';
|
||||
if (filenameBegin != NULL) {
|
||||
/*
|
||||
* When a filename ../../../data/root.txt is specified,
|
||||
|
@ -594,31 +609,15 @@ processFile(const char *filename, const char *cp,
|
|||
* another file, like UCARules.txt or thaidict.brk.
|
||||
*/
|
||||
int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
|
||||
inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize);
|
||||
inputDirBuf.append(filename, filenameSize, status);
|
||||
|
||||
/* test for NULL */
|
||||
if(inputDirBuf == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto finish;
|
||||
}
|
||||
|
||||
inputDirBuf[filenameSize - 1] = 0;
|
||||
inputDir = inputDirBuf;
|
||||
dirlen = (int32_t)uprv_strlen(inputDir);
|
||||
inputDir = inputDirBuf.data();
|
||||
dirlen = inputDirBuf.length();
|
||||
}
|
||||
}else{
|
||||
dirlen = (int32_t)uprv_strlen(inputDir);
|
||||
|
||||
if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
|
||||
openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
|
||||
|
||||
/* test for NULL */
|
||||
if(openFileName == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto finish;
|
||||
}
|
||||
|
||||
openFileName[0] = '\0';
|
||||
/*
|
||||
* append the input dir to openFileName if the first char in
|
||||
* filename is not file seperation char and the last char input directory is not '.'.
|
||||
|
@ -631,49 +630,76 @@ processFile(const char *filename, const char *cp,
|
|||
* genrb -s. icu/data --- start from CWD and look in icu/data dir
|
||||
*/
|
||||
if( (filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
|
||||
uprv_strcpy(openFileName, inputDir);
|
||||
openFileName[dirlen] = U_FILE_SEP_CHAR;
|
||||
openFileName.append(inputDir, status);
|
||||
}
|
||||
openFileName[dirlen + 1] = '\0';
|
||||
} else {
|
||||
openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
|
||||
|
||||
/* test for NULL */
|
||||
if(openFileName == NULL) {
|
||||
status = U_MEMORY_ALLOCATION_ERROR;
|
||||
goto finish;
|
||||
}
|
||||
|
||||
uprv_strcpy(openFileName, inputDir);
|
||||
|
||||
openFileName.append(inputDir, status);
|
||||
}
|
||||
}
|
||||
openFileName.appendPathPart(filename, status);
|
||||
|
||||
uprv_strcat(openFileName, filename);
|
||||
// Test for CharString failure
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, &status);
|
||||
ucbuf.adoptInstead(ucbuf_open(openFileName.data(), &cp,getShowWarning(),TRUE, &status));
|
||||
if(status == U_FILE_ACCESS_ERROR) {
|
||||
|
||||
fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
|
||||
goto finish;
|
||||
fprintf(stderr, "couldn't open file %s\n", openFileName.data());
|
||||
return;
|
||||
}
|
||||
if (ucbuf == NULL || U_FAILURE(status)) {
|
||||
if (ucbuf.isNull() || U_FAILURE(status)) {
|
||||
fprintf(stderr, "An error occurred processing file %s. Error: %s\n",
|
||||
openFileName == NULL ? filename : openFileName, u_errorName(status));
|
||||
goto finish;
|
||||
openFileName.data(), u_errorName(status));
|
||||
return;
|
||||
}
|
||||
/* auto detected popular encodings? */
|
||||
if (cp!=NULL && isVerbose()) {
|
||||
printf("autodetected encoding %s\n", cp);
|
||||
}
|
||||
/* Parse the data into an SRBRoot */
|
||||
data.adoptInstead(parse(ucbuf, inputDir, outputDir, filename,
|
||||
data.adoptInstead(parse(ucbuf.getAlias(), inputDir, outputDir, filename,
|
||||
!omitBinaryCollation, options[NO_COLLATION_RULES].doesOccur, &status));
|
||||
|
||||
if (data.isNull() || U_FAILURE(status)) {
|
||||
fprintf(stderr, "couldn't parse the file %s. Error:%s\n", filename, u_errorName(status));
|
||||
goto finish;
|
||||
return;
|
||||
}
|
||||
|
||||
// Run filtering before writing pool bundle
|
||||
if (filterDir != nullptr) {
|
||||
CharString filterFileName(filterDir, status);
|
||||
filterFileName.appendPathPart(filename, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Open the file and read it into filter
|
||||
SimpleRuleBasedPathFilter filter;
|
||||
std::ifstream f(filterFileName.data());
|
||||
if (f.fail()) {
|
||||
std::cerr << "genrb error: unable to open " << filterFileName.data() << std::endl;
|
||||
status = U_FILE_ACCESS_ERROR;
|
||||
return;
|
||||
}
|
||||
std::string currentLine;
|
||||
while (std::getline(f, currentLine)) {
|
||||
// Ignore # comments and empty lines
|
||||
if (currentLine.empty() || currentLine[0] == '#') {
|
||||
continue;
|
||||
}
|
||||
filter.addRule(currentLine, status);
|
||||
if (U_FAILURE(status)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Apply the filter to the data
|
||||
ResKeyPath path;
|
||||
data->fRoot->applyFilter(filter, path, data.getAlias());
|
||||
}
|
||||
|
||||
if(options[WRITE_POOL_BUNDLE].doesOccur) {
|
||||
data->fWritePoolBundle = newPoolBundle;
|
||||
data->compactKeys(status);
|
||||
|
@ -683,7 +709,7 @@ processFile(const char *filename, const char *cp,
|
|||
if(U_FAILURE(status)) {
|
||||
fprintf(stderr, "bundle_compactKeys(%s) or bundle_getKeyBytes() failed: %s\n",
|
||||
filename, u_errorName(status));
|
||||
goto finish;
|
||||
return;
|
||||
}
|
||||
/* count the number of just-added key strings */
|
||||
for(const char *newKeysLimit = newKeys + newKeysLength; newKeys < newKeysLimit; ++newKeys) {
|
||||
|
@ -698,11 +724,11 @@ processFile(const char *filename, const char *cp,
|
|||
}
|
||||
|
||||
/* Determine the target rb filename */
|
||||
rbname = make_res_filename(filename, outputDir, packageName, status);
|
||||
uprv_free(make_res_filename(filename, outputDir, packageName, status));
|
||||
if(U_FAILURE(status)) {
|
||||
fprintf(stderr, "couldn't make the res fileName for bundle %s. Error:%s\n",
|
||||
filename, u_errorName(status));
|
||||
goto finish;
|
||||
return;
|
||||
}
|
||||
if(write_java== TRUE){
|
||||
bundle_write_java(data.getAlias(), outputDir, outputEnc,
|
||||
|
@ -719,24 +745,6 @@ processFile(const char *filename, const char *cp,
|
|||
if (U_FAILURE(status)) {
|
||||
fprintf(stderr, "couldn't write bundle %s. Error:%s\n", outputFileName, u_errorName(status));
|
||||
}
|
||||
|
||||
finish:
|
||||
|
||||
if (inputDirBuf != NULL) {
|
||||
uprv_free(inputDirBuf);
|
||||
}
|
||||
|
||||
if (openFileName != NULL) {
|
||||
uprv_free(openFileName);
|
||||
}
|
||||
|
||||
if(ucbuf) {
|
||||
ucbuf_close(ucbuf);
|
||||
}
|
||||
|
||||
if (rbname) {
|
||||
uprv_free(rbname);
|
||||
}
|
||||
}
|
||||
|
||||
/* Generate the target .res file name from the input file name */
|
||||
|
|
|
@ -226,6 +226,7 @@
|
|||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="errmsg.c" />
|
||||
<ClCompile Include="filterrb.cpp" />
|
||||
<ClCompile Include="genrb.cpp" />
|
||||
<ClCompile Include="parse.cpp">
|
||||
<DisableLanguageExtensions>false</DisableLanguageExtensions>
|
||||
|
@ -250,6 +251,7 @@
|
|||
<ItemGroup>
|
||||
<ClInclude Include="errmsg.h" />
|
||||
<ClInclude Include="genrb.h" />
|
||||
<ClInclude Include="filterrb.h" />
|
||||
<ClInclude Include="parse.h" />
|
||||
<ClInclude Include="prscmnts.h" />
|
||||
<ClInclude Include="rbutil.h" />
|
||||
|
|
|
@ -18,6 +18,9 @@
|
|||
<ClCompile Include="errmsg.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="filterrb.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="genrb.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
|
@ -53,6 +56,9 @@
|
|||
<ClInclude Include="errmsg.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="filterrb.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="genrb.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
|
|
|
@ -28,13 +28,17 @@
|
|||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "unicode/localpointer.h"
|
||||
#include "reslist.h"
|
||||
#include "unewdata.h"
|
||||
#include "unicode/ures.h"
|
||||
#include "unicode/putil.h"
|
||||
#include "errmsg.h"
|
||||
#include "filterrb.h"
|
||||
|
||||
#include "uarrsort.h"
|
||||
#include "uelement.h"
|
||||
|
@ -42,6 +46,8 @@
|
|||
#include "uinvchar.h"
|
||||
#include "ustr_imp.h"
|
||||
#include "unicode/utf16.h"
|
||||
#include "uassert.h"
|
||||
|
||||
/*
|
||||
* Align binary data at a 16-byte offset from the start of the resource bundle,
|
||||
* to be safe for any data type it may contain.
|
||||
|
@ -921,9 +927,6 @@ void SRBRoot::write(const char *outputDir, const char *outputPkg,
|
|||
if (f16BitUnits.length() & 1) {
|
||||
f16BitUnits.append((UChar)0xaaaa); /* pad to multiple of 4 bytes */
|
||||
}
|
||||
/* all keys have been mapped */
|
||||
uprv_free(fKeyMap);
|
||||
fKeyMap = NULL;
|
||||
|
||||
byteOffset = fKeysTop + f16BitUnits.length() * 2;
|
||||
fRoot->preWrite(&byteOffset);
|
||||
|
@ -1128,7 +1131,8 @@ SRBRoot::SRBRoot(const UString *comment, UBool isPoolBundle, UErrorCode &errorCo
|
|||
: fRoot(NULL), fLocale(NULL), fIndexLength(0), fMaxTableLength(0), fNoFallback(FALSE),
|
||||
fStringsForm(STRINGS_UTF16_V1), fIsPoolBundle(isPoolBundle),
|
||||
fKeys(NULL), fKeyMap(NULL),
|
||||
fKeysBottom(0), fKeysTop(0), fKeysCapacity(0), fKeysCount(0), fLocalKeyLimit(0),
|
||||
fKeysBottom(0), fKeysTop(0), fKeysCapacity(0),
|
||||
fKeysCount(0), fLocalKeyLimit(0),
|
||||
f16BitUnits(), f16BitStringsLength(0),
|
||||
fUsePoolBundle(&kNoPoolBundle),
|
||||
fPoolStringIndexLimit(0), fPoolStringIndex16Limit(0), fLocalStringIndexLimit(0),
|
||||
|
@ -1233,6 +1237,9 @@ int32_t
|
|||
SRBRoot::addKeyBytes(const char *keyBytes, int32_t length, UErrorCode &errorCode) {
|
||||
int32_t keypos;
|
||||
|
||||
// It is not legal to add new key bytes after compactKeys is run!
|
||||
U_ASSERT(fKeyMap == nullptr);
|
||||
|
||||
if (U_FAILURE(errorCode)) {
|
||||
return -1;
|
||||
}
|
||||
|
@ -1334,11 +1341,35 @@ compareKeyOldpos(const void * /*context*/, const void *l, const void *r) {
|
|||
return compareInt32(((const KeyMapEntry *)l)->oldpos, ((const KeyMapEntry *)r)->oldpos);
|
||||
}
|
||||
|
||||
void SResource::collectKeys(std::function<void(int32_t)> collector) const {
|
||||
collector(fKey);
|
||||
}
|
||||
|
||||
void ContainerResource::collectKeys(std::function<void(int32_t)> collector) const {
|
||||
collector(fKey);
|
||||
for (SResource* curr = fFirst; curr != NULL; curr = curr->fNext) {
|
||||
curr->collectKeys(collector);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
SRBRoot::compactKeys(UErrorCode &errorCode) {
|
||||
KeyMapEntry *map;
|
||||
char *keys;
|
||||
int32_t i;
|
||||
|
||||
// Except for pool bundles, keys might not be used.
|
||||
// Do not add unused keys to the final bundle.
|
||||
std::set<int32_t> keysInUse;
|
||||
if (!fIsPoolBundle) {
|
||||
fRoot->collectKeys([&keysInUse](int32_t key) {
|
||||
if (key >= 0) {
|
||||
keysInUse.insert(key);
|
||||
}
|
||||
});
|
||||
fKeysCount = keysInUse.size();
|
||||
}
|
||||
|
||||
int32_t keysCount = fUsePoolBundle->fKeysCount + fKeysCount;
|
||||
if (U_FAILURE(errorCode) || fKeysCount == 0 || fKeyMap != NULL) {
|
||||
return;
|
||||
|
@ -1357,11 +1388,23 @@ SRBRoot::compactKeys(UErrorCode &errorCode) {
|
|||
++keys; /* skip the NUL */
|
||||
}
|
||||
keys = fKeys + fKeysBottom;
|
||||
for (; i < keysCount; ++i) {
|
||||
map[i].oldpos = (int32_t)(keys - fKeys);
|
||||
map[i].newpos = 0;
|
||||
while (*keys != 0) { ++keys; } /* skip the key */
|
||||
++keys; /* skip the NUL */
|
||||
while (i < keysCount) {
|
||||
int32_t keyOffset = static_cast<int32_t>(keys - fKeys);
|
||||
if (!fIsPoolBundle && keysInUse.count(keyOffset) == 0) {
|
||||
// Mark the unused key as deleted
|
||||
while (*keys != 0) { *keys++ = 1; }
|
||||
*keys++ = 1;
|
||||
} else {
|
||||
map[i].oldpos = keyOffset;
|
||||
map[i].newpos = 0;
|
||||
while (*keys != 0) { ++keys; } /* skip the key */
|
||||
++keys; /* skip the NUL */
|
||||
i++;
|
||||
}
|
||||
}
|
||||
if (keys != fKeys + fKeysTop) {
|
||||
// Throw away any unused keys from the end
|
||||
fKeysTop = static_cast<int32_t>(keys - fKeys);
|
||||
}
|
||||
/* Sort the keys so that each one is immediately followed by all of its suffixes. */
|
||||
uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
|
||||
|
@ -1404,7 +1447,7 @@ SRBRoot::compactKeys(UErrorCode &errorCode) {
|
|||
for (k = keyLimit; suffix < suffixLimit && *--k == *--suffixLimit;) {}
|
||||
if (suffix == suffixLimit && *k == *suffixLimit) {
|
||||
map[j].newpos = map[i].oldpos + offset; /* yes, point to the earlier key */
|
||||
/* mark the suffix as deleted */
|
||||
// Mark the suffix as deleted
|
||||
while (*suffix != 0) { *suffix++ = 1; }
|
||||
*suffix = 1;
|
||||
} else {
|
||||
|
@ -1438,7 +1481,7 @@ SRBRoot::compactKeys(UErrorCode &errorCode) {
|
|||
keys[newpos++] = keys[oldpos++];
|
||||
}
|
||||
}
|
||||
assert(i == keysCount);
|
||||
U_ASSERT(i == keysCount);
|
||||
}
|
||||
fKeysTop = newpos;
|
||||
/* Re-sort once more, by old offsets for binary searching. */
|
||||
|
@ -1692,3 +1735,52 @@ SRBRoot::compactStringsV2(UHashtable *stringSet, UErrorCode &errorCode) {
|
|||
// +1 to account for the initial zero in f16BitUnits
|
||||
assert(f16BitUnits.length() <= (f16BitStringsLength + 1));
|
||||
}
|
||||
|
||||
void SResource::applyFilter(
|
||||
const PathFilter& /*filter*/,
|
||||
ResKeyPath& /*path*/,
|
||||
const SRBRoot* /*bundle*/) {
|
||||
// Only a few resource types (tables) are capable of being filtered.
|
||||
}
|
||||
|
||||
void TableResource::applyFilter(
|
||||
const PathFilter& filter,
|
||||
ResKeyPath& path,
|
||||
const SRBRoot* bundle) {
|
||||
SResource* prev = nullptr;
|
||||
SResource* curr = fFirst;
|
||||
for (; curr != nullptr;) {
|
||||
path.push(curr->getKeyString(bundle));
|
||||
auto inclusion = filter.match(path);
|
||||
if (inclusion == PathFilter::EInclusion::INCLUDE) {
|
||||
// Include whole subtree
|
||||
// no-op
|
||||
} else if (inclusion == PathFilter::EInclusion::EXCLUDE) {
|
||||
// Reject the whole subtree
|
||||
// Remove it from the linked list
|
||||
if (isVerbose()) {
|
||||
std::cout << "genrb removing subtree: " << bundle->fLocale << ": " << path << std::endl;
|
||||
}
|
||||
if (prev == nullptr) {
|
||||
fFirst = curr->fNext;
|
||||
} else {
|
||||
prev->fNext = curr->fNext;
|
||||
}
|
||||
fCount--;
|
||||
delete curr;
|
||||
curr = prev;
|
||||
} else {
|
||||
U_ASSERT(inclusion == PathFilter::EInclusion::PARTIAL);
|
||||
// Recurse into the child
|
||||
curr->applyFilter(filter, path, bundle);
|
||||
}
|
||||
path.pop();
|
||||
|
||||
prev = curr;
|
||||
if (curr == nullptr) {
|
||||
curr = fFirst;
|
||||
} else {
|
||||
curr = curr->fNext;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,8 @@
|
|||
#define KEY_SPACE_SIZE 65536
|
||||
#define RESLIST_MAX_INT_VECTOR 2048
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include "unicode/utypes.h"
|
||||
#include "unicode/unistr.h"
|
||||
#include "unicode/ures.h"
|
||||
|
@ -36,7 +38,9 @@
|
|||
|
||||
U_CDECL_BEGIN
|
||||
|
||||
class PathFilter;
|
||||
class PseudoListResource;
|
||||
class ResKeyPath;
|
||||
|
||||
struct ResFile {
|
||||
ResFile()
|
||||
|
@ -212,6 +216,19 @@ struct SResource {
|
|||
void write(UNewDataMemory *mem, uint32_t *byteOffset);
|
||||
virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset);
|
||||
|
||||
/**
|
||||
* Applies the given filter with the given base path to this resource.
|
||||
* Removes child resources rejected by the filter recursively.
|
||||
*
|
||||
* @param bundle Needed in order to access the key for this and child resources.
|
||||
*/
|
||||
virtual void applyFilter(const PathFilter& filter, ResKeyPath& path, const SRBRoot* bundle);
|
||||
|
||||
/**
|
||||
* Calls the given function for every key ID present in this tree.
|
||||
*/
|
||||
virtual void collectKeys(std::function<void(int32_t)> collector) const;
|
||||
|
||||
int8_t fType; /* nominal type: fRes (when != 0xffffffff) may use subtype */
|
||||
UBool fWritten; /* res_write() can exit early */
|
||||
uint32_t fRes; /* resource item word; RES_BOGUS=0xffffffff if not known yet */
|
||||
|
@ -231,7 +248,10 @@ public:
|
|||
fCount(0), fFirst(NULL) {}
|
||||
virtual ~ContainerResource();
|
||||
|
||||
virtual void handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode);
|
||||
void handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode) override;
|
||||
|
||||
void collectKeys(std::function<void(int32_t)> collector) const override;
|
||||
|
||||
protected:
|
||||
void writeAllRes16(SRBRoot *bundle);
|
||||
void preWriteAllRes(uint32_t *byteOffset);
|
||||
|
@ -254,9 +274,11 @@ public:
|
|||
|
||||
void add(SResource *res, int linenumber, UErrorCode &errorCode);
|
||||
|
||||
virtual void handleWrite16(SRBRoot *bundle);
|
||||
virtual void handlePreWrite(uint32_t *byteOffset);
|
||||
virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset);
|
||||
void handleWrite16(SRBRoot *bundle) override;
|
||||
void handlePreWrite(uint32_t *byteOffset) override;
|
||||
void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) override;
|
||||
|
||||
void applyFilter(const PathFilter& filter, ResKeyPath& path, const SRBRoot* bundle) override;
|
||||
|
||||
int8_t fTableType; // determined by table_write16() for table_preWrite() & table_write()
|
||||
SRBRoot *fRoot;
|
||||
|
|
Loading…
Add table
Reference in a new issue