ICU-10923 Adding --filterDir option to genrb.

- Reads, parses, and applies the filter file syntax.
- Removes unused keys from the resource bundle.
- Adds sample filter txt file with test in intltest.
This commit is contained in:
Shane Carr 2018-12-11 17:14:58 -08:00 committed by Shane F. Carr
parent 1b5c721ef2
commit 7ec3357d70
13 changed files with 620 additions and 89 deletions

View file

@ -195,6 +195,8 @@ void NewResourceBundleTest::runIndexedTest( int32_t index, UBool exec, const cha
#endif
case 5: name = "TestGetByFallback"; if(exec) TestGetByFallback(); break;
case 6: name = "TestFilter"; if(exec) TestFilter(); break;
default: name = ""; break; //needed to end loop
}
}
@ -1198,5 +1200,74 @@ NewResourceBundleTest::TestGetByFallback() {
status = U_ZERO_ERROR;
}
#define REQUIRE_SUCCESS(status) { \
if (status.errIfFailureAndReset("line %d", __LINE__)) { \
return; \
} \
}
#define REQUIRE_ERROR(expected, status) { \
if (!status.expectErrorAndReset(expected, "line %d", __LINE__)) { \
return; \
} \
}
/**
* Tests the --filterDir option in genrb.
*
* Input resource text file: test/testdata/filtertest.txt
* Input filter rule file: test/testdata/filters/filtertest.txt
*
* The resource bundle should contain no keys matched by the filter
* and should contain all other keys.
*/
void NewResourceBundleTest::TestFilter() {
IcuTestErrorCode status(*this, "TestFilter");
ResourceBundle rb(loadTestData(status), "filtertest", status);
REQUIRE_SUCCESS(status);
assertEquals("rb", rb.getType(), URES_TABLE);
ResourceBundle alabama = rb.get("alabama", status);
REQUIRE_SUCCESS(status);
assertEquals("alabama", alabama.getType(), URES_TABLE);
ResourceBundle alaska = alabama.get("alaska", status);
REQUIRE_SUCCESS(status);
assertEquals("alaska", alaska.getType(), URES_TABLE);
ResourceBundle arizona = alaska.get("arizona", status);
REQUIRE_SUCCESS(status);
assertEquals("arizona", arizona.getType(), URES_STRING);
assertEquals("arizona", u"arkansas", arizona.getString(status));
REQUIRE_SUCCESS(status);
// Filter: california should not be included
ResourceBundle california = alaska.get("california", status);
REQUIRE_ERROR(U_MISSING_RESOURCE_ERROR, status);
// Filter: connecticut should not be included
ResourceBundle connecticut = alabama.get("connecticut", status);
REQUIRE_ERROR(U_MISSING_RESOURCE_ERROR, status);
ResourceBundle fornia = rb.get("fornia", status);
REQUIRE_SUCCESS(status);
assertEquals("fornia", fornia.getType(), URES_TABLE);
ResourceBundle hawaii = fornia.get("hawaii", status);
REQUIRE_SUCCESS(status);
assertEquals("hawaii", hawaii.getType(), URES_STRING);
assertEquals("hawaii", u"idaho", hawaii.getString(status));
REQUIRE_SUCCESS(status);
// Filter: illinois should not be included
ResourceBundle illinois = fornia.get("illinois", status);
REQUIRE_ERROR(U_MISSING_RESOURCE_ERROR, status);
}
//eof

View file

@ -38,6 +38,8 @@ public:
void TestGetByFallback(void);
void TestFilter(void);
private:
/**
* The assignment operator has no real implementation.

View file

@ -98,6 +98,17 @@ def generate_rb(config, glob, common_vars):
tool = IcuTool("genrb"),
args = "-s {IN_DIR} -d {TMP_DIR} {INPUT_FILES[0]}",
format_with = {}
),
SingleExecutionRequest(
name = "filtertest",
category = "tests",
dep_files = [],
input_files = [InFile("filtertest.txt")],
output_files = [OutFile("filtertest.res")],
tool = IcuTool("genrb"),
args = "-s {IN_DIR} -d {OUT_DIR} -i {OUT_DIR} "
"--filterDir {IN_DIR}/filters filtertest.txt",
format_with = {}
)
]

View file

@ -0,0 +1,6 @@
# Copyright (C) 2018 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
-/alabama
+/alabama/alaska/arizona
-/fornia/illinois

View file

@ -0,0 +1,20 @@
// Copyright (C) 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
filtertest {
alabama {
alaska {
arizona {"arkansas"}
california {"colorado"}
}
connecticut {
arizona {"delaware"}
california {"florida"}
}
}
// test suffixes
fornia {
hawaii {"idaho"}
illinois {"indiana"}
}
}

View file

@ -38,7 +38,7 @@ CPPFLAGS += -DUNISTR_FROM_CHAR_EXPLICIT=explicit -DUNISTR_FROM_STRING_EXPLICIT=e
LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
OBJECTS = errmsg.o genrb.o parse.o read.o reslist.o ustr.o rbutil.o \
wrtjava.o rle.o wrtxml.o prscmnts.o
wrtjava.o rle.o wrtxml.o prscmnts.o filterrb.o
DERB_OBJ = derb.o
DEPS = $(OBJECTS:.o=.d)

View file

@ -0,0 +1,167 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include <iostream>
#include <stack>
#include "filterrb.h"
#include "errmsg.h"
const char* PathFilter::kEInclusionNames[] = {
"INCLUDE",
"PARTIAL",
"EXCLUDE"
};
ResKeyPath::ResKeyPath() {}
ResKeyPath::ResKeyPath(const std::string& path, UErrorCode& status) {
if (path.empty() || path[0] != '/') {
std::cerr << "genrb error: path must start with /: " << path << std::endl;
status = U_PARSE_ERROR;
return;
}
size_t i;
size_t j = 0;
while (true) {
i = j + 1;
j = path.find('/', i);
std::string key = path.substr(i, j - i);
if (key.empty()) {
std::cerr << "genrb error: empty subpaths and trailing slashes are not allowed: " << path << std::endl;
status = U_PARSE_ERROR;
return;
}
push(key);
if (j == std::string::npos) {
break;
}
}
}
void ResKeyPath::push(const std::string& key) {
fPath.push_back(key);
}
void ResKeyPath::pop() {
fPath.pop_back();
}
const std::list<std::string>& ResKeyPath::pieces() const {
return fPath;
}
std::ostream& operator<<(std::ostream& out, const ResKeyPath& value) {
if (value.pieces().empty()) {
out << "/";
} else for (auto& key : value.pieces()) {
out << "/" << key;
}
return out;
}
void SimpleRuleBasedPathFilter::addRule(const std::string& ruleLine, UErrorCode& status) {
if (ruleLine.empty()) {
std::cerr << "genrb error: empty filter rules are not allowed" << std::endl;
status = U_PARSE_ERROR;
return;
}
bool inclusionRule = false;
if (ruleLine[0] == '+') {
inclusionRule = true;
} else if (ruleLine[0] != '-') {
std::cerr << "genrb error: rules must start with + or -: " << ruleLine << std::endl;
status = U_PARSE_ERROR;
return;
}
ResKeyPath path(ruleLine.substr(1), status);
addRule(path, inclusionRule, status);
}
void SimpleRuleBasedPathFilter::addRule(const ResKeyPath& path, bool inclusionRule, UErrorCode& status) {
if (U_FAILURE(status)) {
return;
}
Tree* node = &fRoot;
for (auto& key : path.pieces()) {
// note: operator[] auto-constructs default values
node = &node->fChildren[key];
}
if (isVerbose() && (node->fIncluded != PARTIAL || !node->fChildren.empty())) {
std::cout << "genrb info: rule on path " << path
<< " overrides previous rules" << std::endl;
}
node->fIncluded = inclusionRule ? INCLUDE : EXCLUDE;
node->fChildren.clear();
}
PathFilter::EInclusion SimpleRuleBasedPathFilter::match(const ResKeyPath& path) const {
const Tree* node = &fRoot;
// defaultResult "bubbles up" the nearest "definite" inclusion/exclusion rule
EInclusion defaultResult = INCLUDE;
if (node->fIncluded != PARTIAL) {
// rules handled here: "+/" and "-/"
defaultResult = node->fIncluded;
}
// isLeaf is whether the filter tree can provide no additional information
// even if additional subpaths are added to the given key
bool isLeaf = false;
for (auto& key : path.pieces()) {
auto child = node->fChildren.find(key);
// Leaf case 1: input path descends outside the filter tree
if (child == node->fChildren.end()) {
isLeaf = true;
break;
}
node = &child->second;
if (node->fIncluded != PARTIAL) {
defaultResult = node->fIncluded;
}
}
// Leaf case 2: input path exactly matches a filter leaf
if (node->fChildren.empty()) {
isLeaf = true;
}
// Always return PARTIAL if we are not at a leaf
if (!isLeaf) {
return PARTIAL;
}
// If leaf node is PARTIAL, return the default
if (node->fIncluded == PARTIAL) {
return defaultResult;
}
return node->fIncluded;
}
void SimpleRuleBasedPathFilter::Tree::print(std::ostream& out, int32_t indent) const {
for (int32_t i=0; i<indent; i++) out << "\t";
out << "included: " << kEInclusionNames[fIncluded] << std::endl;
for (auto& child : fChildren) {
for (int32_t i=0; i<indent; i++) out << "\t";
out << child.first << ": {" << std::endl;
child.second.print(out, indent + 1);
for (int32_t i=0; i<indent; i++) out << "\t";
out << "}" << std::endl;
}
}
void SimpleRuleBasedPathFilter::print(std::ostream& out) const {
out << "SimpleRuleBasedPathFilter {" << std::endl;
fRoot.print(out, 1);
out << "}";
}
std::ostream& operator<<(std::ostream& out, const SimpleRuleBasedPathFilter& value) {
value.print(out);
return out;
}

View file

@ -0,0 +1,124 @@
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#ifndef __FILTERRB_H__
#define __FILTERRB_H__
#include <list>
#include <string>
#include <map>
#include <ostream>
#include "unicode/utypes.h"
/**
* Represents an absolute path into a resource bundle.
* For example: "/units/length/meter"
*/
class ResKeyPath {
public:
/** Constructs an empty path (top of tree) */
ResKeyPath();
/** Constructs from a string path */
ResKeyPath(const std::string& path, UErrorCode& status);
void push(const std::string& key);
void pop();
const std::list<std::string>& pieces() const;
private:
std::list<std::string> fPath;
};
std::ostream& operator<<(std::ostream& out, const ResKeyPath& value);
/**
* Interface used to determine whether to include or reject pieces of a
* resource bundle based on their absolute path.
*/
class PathFilter {
public:
enum EInclusion {
INCLUDE,
PARTIAL,
EXCLUDE
};
static const char* kEInclusionNames[];
/**
* Returns an EInclusion on whether or not the given path should be included.
*
* INCLUDE = include the whole subtree
* PARTIAL = recurse into the subtree
* EXCLUDE = reject the whole subtree
*/
virtual EInclusion match(const ResKeyPath& path) const = 0;
};
/**
* Implementation of PathFilter for a list of inclusion/exclusion rules.
*
* For example, given this list of filter rules:
*
* -/alabama
* +/alabama/alaska/arizona
* -/fornia/hawaii
*
* You get the following structure:
*
* SimpleRuleBasedPathFilter {
* included: PARTIAL
* alabama: {
* included: EXCLUDE
* alaska: {
* included: PARTIAL
* arizona: {
* included: INCLUDE
* }
* }
* }
* fornia: {
* included: PARTIAL
* hawaii: {
* included: EXCLUDE
* }
* }
* }
*/
class SimpleRuleBasedPathFilter : public PathFilter {
public:
void addRule(const std::string& ruleLine, UErrorCode& status);
void addRule(const ResKeyPath& path, bool inclusionRule, UErrorCode& status);
EInclusion match(const ResKeyPath& path) const override;
void print(std::ostream& out) const;
private:
struct Tree {
/**
* Information on the USER-SPECIFIED inclusion/exclusion.
*
* INCLUDE = this path exactly matches a "+" rule
* PARTIAL = this path does not match any rule, but subpaths exist
* EXCLUDE = this path exactly matches a "-" rule
*/
EInclusion fIncluded = PARTIAL;
std::map<std::string, Tree> fChildren;
void print(std::ostream& out, int32_t indent) const;
};
Tree fRoot;
};
std::ostream& operator<<(std::ostream& out, const SimpleRuleBasedPathFilter& value);
#endif //__FILTERRB_H__

View file

@ -18,6 +18,11 @@
*******************************************************************************
*/
#include <fstream>
#include <iostream>
#include <list>
#include <string>
#include <assert.h>
#include "genrb.h"
#include "unicode/localpointer.h"
@ -25,13 +30,15 @@
#include "unicode/utf16.h"
#include "charstr.h"
#include "cmemory.h"
#include "filterrb.h"
#include "reslist.h"
#include "ucmndata.h" /* TODO: for reading the pool bundle */
U_NAMESPACE_USE
/* Protos */
void processFile(const char *filename, const char* cp, const char *inputDir, const char *outputDir,
void processFile(const char *filename, const char* cp,
const char *inputDir, const char *outputDir, const char *filterDir,
const char *packageName,
SRBRoot *newPoolBundle, UBool omitBinaryCollation, UErrorCode &status);
static char *make_res_filename(const char *filename, const char *outputDir,
@ -76,7 +83,8 @@ enum
FORMAT_VERSION,
WRITE_POOL_BUNDLE,
USE_POOL_BUNDLE,
INCLUDE_UNIHAN_COLL
INCLUDE_UNIHAN_COLL,
FILTERDIR
};
UOption options[]={
@ -102,6 +110,7 @@ UOption options[]={
UOPTION_DEF("writePoolBundle", '\x01', UOPT_OPTIONAL_ARG),/* 19 */
UOPTION_DEF("usePoolBundle", '\x01', UOPT_OPTIONAL_ARG),/* 20 */
UOPTION_DEF("includeUnihanColl", '\x01', UOPT_NO_ARG),/* 21 */ /* temporary, don't display in usage info */
UOPTION_DEF("filterDir", '\x01', UOPT_OPTIONAL_ARG), /* 22 */
};
static UBool write_java = FALSE;
@ -121,6 +130,7 @@ main(int argc,
const char *arg = NULL;
const char *outputDir = NULL; /* NULL = no output directory, use current */
const char *inputDir = NULL;
const char *filterDir = NULL;
const char *encoding = "";
int i;
UBool illegalArg = FALSE;
@ -228,6 +238,9 @@ main(int argc,
"\t --usePoolBundle [directory] point to keys from the pool.res keys pool bundle if they are available there;\n"
"\t makes .res files smaller but dependent on the pool bundle\n"
"\t (--writePoolBundle and --usePoolBundle cannot be combined)\n");
fprintf(stderr,
"\t --filterDir Input directory where filter files are available.\n"
"\t For more on filter files, see Python buildtool.\n");
return illegalArg ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
}
@ -254,6 +267,10 @@ main(int argc,
outputDir = options[DESTDIR].value;
}
if (options[FILTERDIR].doesOccur) {
filterDir = options[FILTERDIR].value;
}
if(options[ENCODING].doesOccur) {
encoding = options[ENCODING].value;
}
@ -524,7 +541,7 @@ main(int argc,
if (isVerbose()) {
printf("Processing file \"%s\"\n", theCurrentFileName.data());
}
processFile(arg, encoding, inputDir, outputDir, NULL,
processFile(arg, encoding, inputDir, outputDir, filterDir, NULL,
newPoolBundle.getAlias(),
options[NO_BINARY_COLLATION].doesOccur, status);
}
@ -558,16 +575,16 @@ main(int argc,
/* Process a file */
void
processFile(const char *filename, const char *cp,
const char *inputDir, const char *outputDir, const char *packageName,
const char *inputDir, const char *outputDir, const char *filterDir,
const char *packageName,
SRBRoot *newPoolBundle,
UBool omitBinaryCollation, UErrorCode &status) {
LocalPointer<SRBRoot> data;
UCHARBUF *ucbuf = NULL;
char *rbname = NULL;
char *openFileName = NULL;
char *inputDirBuf = NULL;
LocalUCHARBUFPointer ucbuf;
CharString openFileName;
CharString inputDirBuf;
char outputFileName[256];
char outputFileName[256];
int32_t dirlen = 0;
int32_t filelen = 0;
@ -584,8 +601,6 @@ processFile(const char *filename, const char *cp,
if(inputDir == NULL) {
const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
openFileName[0] = '\0';
if (filenameBegin != NULL) {
/*
* When a filename ../../../data/root.txt is specified,
@ -594,31 +609,15 @@ processFile(const char *filename, const char *cp,
* another file, like UCARules.txt or thaidict.brk.
*/
int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
inputDirBuf = uprv_strncpy((char *)uprv_malloc(filenameSize), filename, filenameSize);
inputDirBuf.append(filename, filenameSize, status);
/* test for NULL */
if(inputDirBuf == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
goto finish;
}
inputDirBuf[filenameSize - 1] = 0;
inputDir = inputDirBuf;
dirlen = (int32_t)uprv_strlen(inputDir);
inputDir = inputDirBuf.data();
dirlen = inputDirBuf.length();
}
}else{
dirlen = (int32_t)uprv_strlen(inputDir);
if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
/* test for NULL */
if(openFileName == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
goto finish;
}
openFileName[0] = '\0';
/*
* append the input dir to openFileName if the first char in
* filename is not file seperation char and the last char input directory is not '.'.
@ -631,49 +630,76 @@ processFile(const char *filename, const char *cp,
* genrb -s. icu/data --- start from CWD and look in icu/data dir
*/
if( (filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
uprv_strcpy(openFileName, inputDir);
openFileName[dirlen] = U_FILE_SEP_CHAR;
openFileName.append(inputDir, status);
}
openFileName[dirlen + 1] = '\0';
} else {
openFileName = (char *) uprv_malloc(dirlen + filelen + 1);
/* test for NULL */
if(openFileName == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
goto finish;
}
uprv_strcpy(openFileName, inputDir);
openFileName.append(inputDir, status);
}
}
openFileName.appendPathPart(filename, status);
uprv_strcat(openFileName, filename);
// Test for CharString failure
if (U_FAILURE(status)) {
return;
}
ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, &status);
ucbuf.adoptInstead(ucbuf_open(openFileName.data(), &cp,getShowWarning(),TRUE, &status));
if(status == U_FILE_ACCESS_ERROR) {
fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
goto finish;
fprintf(stderr, "couldn't open file %s\n", openFileName.data());
return;
}
if (ucbuf == NULL || U_FAILURE(status)) {
if (ucbuf.isNull() || U_FAILURE(status)) {
fprintf(stderr, "An error occurred processing file %s. Error: %s\n",
openFileName == NULL ? filename : openFileName, u_errorName(status));
goto finish;
openFileName.data(), u_errorName(status));
return;
}
/* auto detected popular encodings? */
if (cp!=NULL && isVerbose()) {
printf("autodetected encoding %s\n", cp);
}
/* Parse the data into an SRBRoot */
data.adoptInstead(parse(ucbuf, inputDir, outputDir, filename,
data.adoptInstead(parse(ucbuf.getAlias(), inputDir, outputDir, filename,
!omitBinaryCollation, options[NO_COLLATION_RULES].doesOccur, &status));
if (data.isNull() || U_FAILURE(status)) {
fprintf(stderr, "couldn't parse the file %s. Error:%s\n", filename, u_errorName(status));
goto finish;
return;
}
// Run filtering before writing pool bundle
if (filterDir != nullptr) {
CharString filterFileName(filterDir, status);
filterFileName.appendPathPart(filename, status);
if (U_FAILURE(status)) {
return;
}
// Open the file and read it into filter
SimpleRuleBasedPathFilter filter;
std::ifstream f(filterFileName.data());
if (f.fail()) {
std::cerr << "genrb error: unable to open " << filterFileName.data() << std::endl;
status = U_FILE_ACCESS_ERROR;
return;
}
std::string currentLine;
while (std::getline(f, currentLine)) {
// Ignore # comments and empty lines
if (currentLine.empty() || currentLine[0] == '#') {
continue;
}
filter.addRule(currentLine, status);
if (U_FAILURE(status)) {
return;
}
}
// Apply the filter to the data
ResKeyPath path;
data->fRoot->applyFilter(filter, path, data.getAlias());
}
if(options[WRITE_POOL_BUNDLE].doesOccur) {
data->fWritePoolBundle = newPoolBundle;
data->compactKeys(status);
@ -683,7 +709,7 @@ processFile(const char *filename, const char *cp,
if(U_FAILURE(status)) {
fprintf(stderr, "bundle_compactKeys(%s) or bundle_getKeyBytes() failed: %s\n",
filename, u_errorName(status));
goto finish;
return;
}
/* count the number of just-added key strings */
for(const char *newKeysLimit = newKeys + newKeysLength; newKeys < newKeysLimit; ++newKeys) {
@ -698,11 +724,11 @@ processFile(const char *filename, const char *cp,
}
/* Determine the target rb filename */
rbname = make_res_filename(filename, outputDir, packageName, status);
uprv_free(make_res_filename(filename, outputDir, packageName, status));
if(U_FAILURE(status)) {
fprintf(stderr, "couldn't make the res fileName for bundle %s. Error:%s\n",
filename, u_errorName(status));
goto finish;
return;
}
if(write_java== TRUE){
bundle_write_java(data.getAlias(), outputDir, outputEnc,
@ -719,24 +745,6 @@ processFile(const char *filename, const char *cp,
if (U_FAILURE(status)) {
fprintf(stderr, "couldn't write bundle %s. Error:%s\n", outputFileName, u_errorName(status));
}
finish:
if (inputDirBuf != NULL) {
uprv_free(inputDirBuf);
}
if (openFileName != NULL) {
uprv_free(openFileName);
}
if(ucbuf) {
ucbuf_close(ucbuf);
}
if (rbname) {
uprv_free(rbname);
}
}
/* Generate the target .res file name from the input file name */

View file

@ -226,6 +226,7 @@
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="errmsg.c" />
<ClCompile Include="filterrb.cpp" />
<ClCompile Include="genrb.cpp" />
<ClCompile Include="parse.cpp">
<DisableLanguageExtensions>false</DisableLanguageExtensions>
@ -250,6 +251,7 @@
<ItemGroup>
<ClInclude Include="errmsg.h" />
<ClInclude Include="genrb.h" />
<ClInclude Include="filterrb.h" />
<ClInclude Include="parse.h" />
<ClInclude Include="prscmnts.h" />
<ClInclude Include="rbutil.h" />

View file

@ -18,6 +18,9 @@
<ClCompile Include="errmsg.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="filterrb.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="genrb.cpp">
<Filter>Source Files</Filter>
</ClCompile>
@ -53,6 +56,9 @@
<ClInclude Include="errmsg.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="filterrb.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="genrb.h">
<Filter>Header Files</Filter>
</ClInclude>

View file

@ -28,13 +28,17 @@
#endif
#include <assert.h>
#include <iostream>
#include <set>
#include <stdio.h>
#include "unicode/localpointer.h"
#include "reslist.h"
#include "unewdata.h"
#include "unicode/ures.h"
#include "unicode/putil.h"
#include "errmsg.h"
#include "filterrb.h"
#include "uarrsort.h"
#include "uelement.h"
@ -42,6 +46,8 @@
#include "uinvchar.h"
#include "ustr_imp.h"
#include "unicode/utf16.h"
#include "uassert.h"
/*
* Align binary data at a 16-byte offset from the start of the resource bundle,
* to be safe for any data type it may contain.
@ -921,9 +927,6 @@ void SRBRoot::write(const char *outputDir, const char *outputPkg,
if (f16BitUnits.length() & 1) {
f16BitUnits.append((UChar)0xaaaa); /* pad to multiple of 4 bytes */
}
/* all keys have been mapped */
uprv_free(fKeyMap);
fKeyMap = NULL;
byteOffset = fKeysTop + f16BitUnits.length() * 2;
fRoot->preWrite(&byteOffset);
@ -1128,7 +1131,8 @@ SRBRoot::SRBRoot(const UString *comment, UBool isPoolBundle, UErrorCode &errorCo
: fRoot(NULL), fLocale(NULL), fIndexLength(0), fMaxTableLength(0), fNoFallback(FALSE),
fStringsForm(STRINGS_UTF16_V1), fIsPoolBundle(isPoolBundle),
fKeys(NULL), fKeyMap(NULL),
fKeysBottom(0), fKeysTop(0), fKeysCapacity(0), fKeysCount(0), fLocalKeyLimit(0),
fKeysBottom(0), fKeysTop(0), fKeysCapacity(0),
fKeysCount(0), fLocalKeyLimit(0),
f16BitUnits(), f16BitStringsLength(0),
fUsePoolBundle(&kNoPoolBundle),
fPoolStringIndexLimit(0), fPoolStringIndex16Limit(0), fLocalStringIndexLimit(0),
@ -1233,6 +1237,9 @@ int32_t
SRBRoot::addKeyBytes(const char *keyBytes, int32_t length, UErrorCode &errorCode) {
int32_t keypos;
// It is not legal to add new key bytes after compactKeys is run!
U_ASSERT(fKeyMap == nullptr);
if (U_FAILURE(errorCode)) {
return -1;
}
@ -1334,11 +1341,35 @@ compareKeyOldpos(const void * /*context*/, const void *l, const void *r) {
return compareInt32(((const KeyMapEntry *)l)->oldpos, ((const KeyMapEntry *)r)->oldpos);
}
void SResource::collectKeys(std::function<void(int32_t)> collector) const {
collector(fKey);
}
void ContainerResource::collectKeys(std::function<void(int32_t)> collector) const {
collector(fKey);
for (SResource* curr = fFirst; curr != NULL; curr = curr->fNext) {
curr->collectKeys(collector);
}
}
void
SRBRoot::compactKeys(UErrorCode &errorCode) {
KeyMapEntry *map;
char *keys;
int32_t i;
// Except for pool bundles, keys might not be used.
// Do not add unused keys to the final bundle.
std::set<int32_t> keysInUse;
if (!fIsPoolBundle) {
fRoot->collectKeys([&keysInUse](int32_t key) {
if (key >= 0) {
keysInUse.insert(key);
}
});
fKeysCount = keysInUse.size();
}
int32_t keysCount = fUsePoolBundle->fKeysCount + fKeysCount;
if (U_FAILURE(errorCode) || fKeysCount == 0 || fKeyMap != NULL) {
return;
@ -1357,11 +1388,23 @@ SRBRoot::compactKeys(UErrorCode &errorCode) {
++keys; /* skip the NUL */
}
keys = fKeys + fKeysBottom;
for (; i < keysCount; ++i) {
map[i].oldpos = (int32_t)(keys - fKeys);
map[i].newpos = 0;
while (*keys != 0) { ++keys; } /* skip the key */
++keys; /* skip the NUL */
while (i < keysCount) {
int32_t keyOffset = static_cast<int32_t>(keys - fKeys);
if (!fIsPoolBundle && keysInUse.count(keyOffset) == 0) {
// Mark the unused key as deleted
while (*keys != 0) { *keys++ = 1; }
*keys++ = 1;
} else {
map[i].oldpos = keyOffset;
map[i].newpos = 0;
while (*keys != 0) { ++keys; } /* skip the key */
++keys; /* skip the NUL */
i++;
}
}
if (keys != fKeys + fKeysTop) {
// Throw away any unused keys from the end
fKeysTop = static_cast<int32_t>(keys - fKeys);
}
/* Sort the keys so that each one is immediately followed by all of its suffixes. */
uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
@ -1404,7 +1447,7 @@ SRBRoot::compactKeys(UErrorCode &errorCode) {
for (k = keyLimit; suffix < suffixLimit && *--k == *--suffixLimit;) {}
if (suffix == suffixLimit && *k == *suffixLimit) {
map[j].newpos = map[i].oldpos + offset; /* yes, point to the earlier key */
/* mark the suffix as deleted */
// Mark the suffix as deleted
while (*suffix != 0) { *suffix++ = 1; }
*suffix = 1;
} else {
@ -1438,7 +1481,7 @@ SRBRoot::compactKeys(UErrorCode &errorCode) {
keys[newpos++] = keys[oldpos++];
}
}
assert(i == keysCount);
U_ASSERT(i == keysCount);
}
fKeysTop = newpos;
/* Re-sort once more, by old offsets for binary searching. */
@ -1692,3 +1735,52 @@ SRBRoot::compactStringsV2(UHashtable *stringSet, UErrorCode &errorCode) {
// +1 to account for the initial zero in f16BitUnits
assert(f16BitUnits.length() <= (f16BitStringsLength + 1));
}
void SResource::applyFilter(
const PathFilter& /*filter*/,
ResKeyPath& /*path*/,
const SRBRoot* /*bundle*/) {
// Only a few resource types (tables) are capable of being filtered.
}
void TableResource::applyFilter(
const PathFilter& filter,
ResKeyPath& path,
const SRBRoot* bundle) {
SResource* prev = nullptr;
SResource* curr = fFirst;
for (; curr != nullptr;) {
path.push(curr->getKeyString(bundle));
auto inclusion = filter.match(path);
if (inclusion == PathFilter::EInclusion::INCLUDE) {
// Include whole subtree
// no-op
} else if (inclusion == PathFilter::EInclusion::EXCLUDE) {
// Reject the whole subtree
// Remove it from the linked list
if (isVerbose()) {
std::cout << "genrb removing subtree: " << bundle->fLocale << ": " << path << std::endl;
}
if (prev == nullptr) {
fFirst = curr->fNext;
} else {
prev->fNext = curr->fNext;
}
fCount--;
delete curr;
curr = prev;
} else {
U_ASSERT(inclusion == PathFilter::EInclusion::PARTIAL);
// Recurse into the child
curr->applyFilter(filter, path, bundle);
}
path.pop();
prev = curr;
if (curr == nullptr) {
curr = fFirst;
} else {
curr = curr->fNext;
}
}
}

View file

@ -23,6 +23,8 @@
#define KEY_SPACE_SIZE 65536
#define RESLIST_MAX_INT_VECTOR 2048
#include <functional>
#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "unicode/ures.h"
@ -36,7 +38,9 @@
U_CDECL_BEGIN
class PathFilter;
class PseudoListResource;
class ResKeyPath;
struct ResFile {
ResFile()
@ -212,6 +216,19 @@ struct SResource {
void write(UNewDataMemory *mem, uint32_t *byteOffset);
virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset);
/**
* Applies the given filter with the given base path to this resource.
* Removes child resources rejected by the filter recursively.
*
* @param bundle Needed in order to access the key for this and child resources.
*/
virtual void applyFilter(const PathFilter& filter, ResKeyPath& path, const SRBRoot* bundle);
/**
* Calls the given function for every key ID present in this tree.
*/
virtual void collectKeys(std::function<void(int32_t)> collector) const;
int8_t fType; /* nominal type: fRes (when != 0xffffffff) may use subtype */
UBool fWritten; /* res_write() can exit early */
uint32_t fRes; /* resource item word; RES_BOGUS=0xffffffff if not known yet */
@ -231,7 +248,10 @@ public:
fCount(0), fFirst(NULL) {}
virtual ~ContainerResource();
virtual void handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode);
void handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode) override;
void collectKeys(std::function<void(int32_t)> collector) const override;
protected:
void writeAllRes16(SRBRoot *bundle);
void preWriteAllRes(uint32_t *byteOffset);
@ -254,9 +274,11 @@ public:
void add(SResource *res, int linenumber, UErrorCode &errorCode);
virtual void handleWrite16(SRBRoot *bundle);
virtual void handlePreWrite(uint32_t *byteOffset);
virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset);
void handleWrite16(SRBRoot *bundle) override;
void handlePreWrite(uint32_t *byteOffset) override;
void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) override;
void applyFilter(const PathFilter& filter, ResKeyPath& path, const SRBRoot* bundle) override;
int8_t fTableType; // determined by table_write16() for table_preWrite() & table_write()
SRBRoot *fRoot;