From 261b6619e813d06203ab0737e469eecabe52b745 Mon Sep 17 00:00:00 2001 From: Ilya Zverev Date: Wed, 27 Apr 2016 15:41:28 +0300 Subject: [PATCH] [categories] Add category groups --- indexer/categories_holder.cpp | 55 ++++++++++++++++++----- indexer/indexer_tests/categories_test.cpp | 41 +++++++++++++++++ 2 files changed, 85 insertions(+), 11 deletions(-) diff --git a/indexer/categories_holder.cpp b/indexer/categories_holder.cpp index cf96e2a223..e080cbb0d9 100644 --- a/indexer/categories_holder.cpp +++ b/indexer/categories_holder.cpp @@ -84,6 +84,8 @@ void CategoriesHolder::LoadFromStream(istream & s) Category cat; vector types; + vector currentGroups; + multimap groupTranslations; Classificator const & c = classif(); @@ -92,6 +94,10 @@ void CategoriesHolder::LoadFromStream(istream & s) { ++lineNumber; getline(s, line); + strings::Trim(line); + // Allow for comments starting with '#' character. + if (!line.empty() && line[0] == '#') + continue; strings::SimpleTokenizer iter(line, state == EParseTypes ? "|" : ":|"); switch (state) @@ -99,24 +105,34 @@ void CategoriesHolder::LoadFromStream(istream & s) case EParseTypes: { AddCategory(cat, types); + currentGroups.clear(); while (iter) { - // split category to sub categories for classificator - vector v; - strings::Tokenize(*iter, "-", MakeBackInsertFunctor(v)); - - // get classificator type - uint32_t const type = c.GetTypeByPathSafe(v); - if (type != 0) - types.push_back(type); + // Check if category is a group reference. + if ((*iter)[0] == '@') + { + CHECK((currentGroups.empty() || !types.empty()), ("Two groups in a group definition at line", lineNumber)); + currentGroups.push_back(*iter); + } else - LOG(LWARNING, ("Invalid type:", v, "at line:", lineNumber)); + { + // Split category to subcategories for classificator. + vector v; + strings::Tokenize(*iter, "-", MakeBackInsertFunctor(v)); + + // Get classificator type. + uint32_t const type = c.GetTypeByPathSafe(v); + if (type != 0) + types.push_back(type); + else + LOG(LWARNING, ("Invalid type:", v, "at line:", lineNumber)); + } ++iter; } - if (!types.empty()) + if (!types.empty() || currentGroups.size() == 1) state = EParseLanguages; } break; @@ -173,7 +189,24 @@ void CategoriesHolder::LoadFromStream(istream & s) } } - cat.m_synonyms.push_back(name); + if (currentGroups.size() == 1 && types.empty()) + { + // Not a translation, but a category group definition + groupTranslations.emplace(currentGroups[0], name); + } + else + cat.m_synonyms.push_back(name); + } + + if (!types.empty()) + { + // If a category group is specified, add translations from it. + for (string const & group : currentGroups) + { + auto trans = groupTranslations.equal_range(group); + for (auto it = trans.first; it != trans.second; ++it) + cat.m_synonyms.push_back(it->second); + } } } break; diff --git a/indexer/indexer_tests/categories_test.cpp b/indexer/indexer_tests/categories_test.cpp index d7b340d305..e1aac7f594 100644 --- a/indexer/indexer_tests/categories_test.cpp +++ b/indexer/indexer_tests/categories_test.cpp @@ -187,6 +187,47 @@ UNIT_TEST(CategoriesIndex_MultipleTokens) testTypes("shop meat", {type2}); } +UNIT_TEST(CategoriesIndex_Groups) +{ + char const kCategories[] = + "@shop\n" + "en:shop\n" + "ru:магазин\n" + "\n" + "@meat\n" + "en:meat\n" + "\n" + "shop-bakery|@shop\n" + "en:buns\n" + "\n" + "shop-butcher|@shop|@meat\n" + "en:butcher"; + + classificator::Load(); + CategoriesHolder holder(make_unique(kCategories, sizeof(kCategories) - 1)); + CategoriesIndex index(holder); + + index.AddAllCategoriesInAllLangs(); + auto testTypes = [&](string const & query, vector const & expected) + { + vector result; + index.GetAssociatedTypes(query, result); + TEST_EQUAL(result, expected, (query)); + }; + + uint32_t type1 = classif().GetTypeByPath({"shop", "bakery"}); + uint32_t type2 = classif().GetTypeByPath({"shop", "butcher"}); + if (type1 > type2) + swap(type1, type2); + + testTypes("buns", {type1}); + testTypes("butcher", {type2}); + testTypes("meat", {type2}); + testTypes("shop", {type1, type2}); + testTypes("магазин", {type1, type2}); + testTypes("http", {}); +} + #ifdef DEBUG // A check that this data structure is not too heavy. UNIT_TEST(CategoriesIndex_AllCategories)