[booking] Save all translations we've got

This commit is contained in:
Ilya Zverev 2016-06-21 18:48:36 +03:00
parent fb53332984
commit 3fac043578
6 changed files with 55 additions and 27 deletions

View file

@ -729,3 +729,17 @@ UNIT_TEST(NormalizeDigits_UniString)
TEST_EQUAL(nd("a9 "), "a0192 ", ());
TEST_EQUAL(nd(""), "3456789", ());
}
UNIT_TEST(Split)
{
vector<string> target;
strings::Split(";Test\\;проверка;0;", ';', target);
vector<string> expected({"", "Test\\", "проверка", "0", ""});
TEST_EQUAL(target, expected, ());
strings::Split("and there was none", ' ', target);
vector<string> expected2({"and", "there", "", "was", "none"});
TEST_EQUAL(target, expected2, ());
strings::Split("", '!', target);
vector<string> expected3;
TEST_EQUAL(target, expected3, ());
}

View file

@ -328,4 +328,17 @@ bool AlmostEqual(string const & str1, string const & str2, size_t mismatchedCoun
return false;
}
void Split(string const & s, char delimiter, vector<string> & target)
{
target.clear();
// Special case: if the string is empty, return an empty array instead of {""}.
if (s.empty())
return;
using It = TokenizeIterator<SimpleDelimiter, string::const_iterator, true>;
for (It it(s, SimpleDelimiter(delimiter)); it; ++it)
target.push_back(*it);
}
} // namespace strings

View file

@ -306,6 +306,9 @@ void Tokenize(string const & str, char const * delims, TFunctor && f)
}
}
/// Splits a string by the delimiter, keeps empty parts, on an empty string returns an empty vector.
void Split(string const & s, char delimiter, vector<string> & target);
/// @return code of last symbol in string or 0 if s is empty
UniChar LastUniChar(string const & s);

View file

@ -51,12 +51,7 @@ BookingDataset::Hotel::Hotel(string const & src)
strings::to_uint(rec[Index(Fields::Type)], type);
langCode = rec[Index(Fields::Language)];
if (!langCode.empty())
{
nameLoc = rec[Index(Fields::NameLoc)];
addressLoc = rec[Index(Fields::AddressLoc)];
}
translations = rec[Index(Fields::Translations)];
}
ostream & operator<<(ostream & s, BookingDataset::Hotel const & h)
@ -178,10 +173,15 @@ void BookingDataset::BuildFeatures(function<void(OsmElement *)> const & fn) cons
e.AddTag("price_rate", strings::to_string(hotel.priceCategory));
e.AddTag("addr:full", hotel.address);
if (!hotel.langCode.empty())
if (!hotel.translations.empty())
{
e.AddTag("name:" + hotel.langCode, hotel.nameLoc);
e.AddTag("addr:full:" + hotel.langCode, hotel.addressLoc);
vector<string> parts;
strings::Split(hotel.translations, '|', parts);
for (auto i = 0; i < parts.size(); i += 3)
{
e.AddTag("name:" + parts[i], parts[i + 1]);
e.AddTag("addr:full:" + parts[i], parts[i + 2]);
}
}
switch (hotel.type)
@ -278,7 +278,8 @@ bool BookingDataset::MatchWithBooking(OsmElement const & e) const
return false;
// Find |kMaxSelectedElements| nearest values to a point.
auto const bookingIndexes = GetNearestHotels(e.lat, e.lon, kMaxSelectedElements, kDistanceLimitInMeters);
auto const bookingIndexes =
GetNearestHotels(e.lat, e.lon, kMaxSelectedElements, kDistanceLimitInMeters);
bool matched = false;

View file

@ -36,9 +36,7 @@ public:
RatingUsers = 8,
DescUrl = 9,
Type = 10,
Language = 11,
NameLoc = 12,
AddressLoc = 13,
Translations = 11,
Counter
};
@ -54,9 +52,7 @@ public:
double ratingUser = 0.0;
string descUrl;
uint32_t type = 0;
string langCode;
string nameLoc;
string addressLoc;
string translations;
static constexpr size_t Index(Fields field) { return static_cast<size_t>(field); }
static constexpr size_t FieldsCount() { return static_cast<size_t>(Fields::Counter); }

View file

@ -17,7 +17,7 @@ import urllib2
logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %(message)s')
# Names starting with '.' are calculated in get_hotel_field() below.
HOTEL_FIELDS = ('hotel_id', '.lat', '.lon', 'name', 'address', 'class', '.rate', 'ranking', 'review_score', 'url', 'hoteltype_id')
HOTEL_FIELDS = ('hotel_id', '.lat', '.lon', 'name', 'address', 'class', '.rate', 'ranking', 'review_score', 'url', 'hoteltype_id', '.trans')
class BookingApi:
@ -160,6 +160,16 @@ def translate(source, output):
return hotel['location']['longitude']
elif field == '.rate':
return rate
elif field == '.trans':
# Translations are packed into a single column: lang1|name1|address1|lang2|name2|address2|...
if 'translations' in hotel:
tr_list = []
for tr_lang, tr_values in hotel['translations'].items():
tr_list.append(tr_lang)
tr_list.extend([tr_values[e] for e in ('name', 'address')])
return '|'.join([s.replace('|', ';') for s in tr_list])
else:
return ''
elif field in hotel:
return hotel[field]
raise ValueError('Unknown hotel field: {0}'.format(field))
@ -175,16 +185,7 @@ def translate(source, output):
while rate <= len(rates) and price > avg * rates[rate - 1]:
rate += 1
l = [get_hotel_field(hotel, e, rate) for e in HOTEL_FIELDS]
# Add translations for hotel name and address if present.
if 'translations' in hotel:
tr_lang = hotel['languagecode']
if tr_lang not in hotel['translations']:
tr_lang = hotel['translations'].keys()[0]
l.append(tr_lang)
l.extend([hotel['translations'][tr_lang][e] for e in ('name', 'address')])
else:
l.extend([''] * 3)
print('\t'.join([unicode(f).encode('utf8').replace('\t', ' ') for f in l]), file=fd)
print('\t'.join([unicode(f).encode('utf8').replace('\t', ' ').replace('\n', ' ').replace('\r', '') for f in l]), file=fd)
def process_options():