[categories] Add category groups

This commit is contained in:
Ilya Zverev 2016-04-27 15:41:28 +03:00 committed by Alex Zolotarev
parent 6d508e0d28
commit 261b6619e8
2 changed files with 85 additions and 11 deletions

View file

@ -84,6 +84,8 @@ void CategoriesHolder::LoadFromStream(istream & s)
Category cat;
vector<uint32_t> types;
vector<string> currentGroups;
multimap<string, Category::Name> groupTranslations;
Classificator const & c = classif();
@ -92,6 +94,10 @@ void CategoriesHolder::LoadFromStream(istream & s)
{
++lineNumber;
getline(s, line);
strings::Trim(line);
// Allow for comments starting with '#' character.
if (!line.empty() && line[0] == '#')
continue;
strings::SimpleTokenizer iter(line, state == EParseTypes ? "|" : ":|");
switch (state)
@ -99,24 +105,34 @@ void CategoriesHolder::LoadFromStream(istream & s)
case EParseTypes:
{
AddCategory(cat, types);
currentGroups.clear();
while (iter)
{
// split category to sub categories for classificator
vector<string> v;
strings::Tokenize(*iter, "-", MakeBackInsertFunctor(v));
// get classificator type
uint32_t const type = c.GetTypeByPathSafe(v);
if (type != 0)
types.push_back(type);
// Check if category is a group reference.
if ((*iter)[0] == '@')
{
CHECK((currentGroups.empty() || !types.empty()), ("Two groups in a group definition at line", lineNumber));
currentGroups.push_back(*iter);
}
else
LOG(LWARNING, ("Invalid type:", v, "at line:", lineNumber));
{
// Split category to subcategories for classificator.
vector<string> v;
strings::Tokenize(*iter, "-", MakeBackInsertFunctor(v));
// Get classificator type.
uint32_t const type = c.GetTypeByPathSafe(v);
if (type != 0)
types.push_back(type);
else
LOG(LWARNING, ("Invalid type:", v, "at line:", lineNumber));
}
++iter;
}
if (!types.empty())
if (!types.empty() || currentGroups.size() == 1)
state = EParseLanguages;
}
break;
@ -173,7 +189,24 @@ void CategoriesHolder::LoadFromStream(istream & s)
}
}
cat.m_synonyms.push_back(name);
if (currentGroups.size() == 1 && types.empty())
{
// Not a translation, but a category group definition
groupTranslations.emplace(currentGroups[0], name);
}
else
cat.m_synonyms.push_back(name);
}
if (!types.empty())
{
// If a category group is specified, add translations from it.
for (string const & group : currentGroups)
{
auto trans = groupTranslations.equal_range(group);
for (auto it = trans.first; it != trans.second; ++it)
cat.m_synonyms.push_back(it->second);
}
}
}
break;

View file

@ -187,6 +187,47 @@ UNIT_TEST(CategoriesIndex_MultipleTokens)
testTypes("shop meat", {type2});
}
UNIT_TEST(CategoriesIndex_Groups)
{
char const kCategories[] =
"@shop\n"
"en:shop\n"
"ru:магазин\n"
"\n"
"@meat\n"
"en:meat\n"
"\n"
"shop-bakery|@shop\n"
"en:buns\n"
"\n"
"shop-butcher|@shop|@meat\n"
"en:butcher";
classificator::Load();
CategoriesHolder holder(make_unique<MemReader>(kCategories, sizeof(kCategories) - 1));
CategoriesIndex index(holder);
index.AddAllCategoriesInAllLangs();
auto testTypes = [&](string const & query, vector<uint32_t> const & expected)
{
vector<uint32_t> result;
index.GetAssociatedTypes(query, result);
TEST_EQUAL(result, expected, (query));
};
uint32_t type1 = classif().GetTypeByPath({"shop", "bakery"});
uint32_t type2 = classif().GetTypeByPath({"shop", "butcher"});
if (type1 > type2)
swap(type1, type2);
testTypes("buns", {type1});
testTypes("butcher", {type2});
testTypes("meat", {type2});
testTypes("shop", {type1, type2});
testTypes("магазин", {type1, type2});
testTypes("http", {});
}
#ifdef DEBUG
// A check that this data structure is not too heavy.
UNIT_TEST(CategoriesIndex_AllCategories)