[booking] Add translated name and address

This commit is contained in:
Ilya Zverev 2016-06-10 16:21:55 +03:00 committed by Vladimir Byko-Ianko
parent 3ad9aad3f1
commit a011a5a5fa
3 changed files with 55 additions and 8 deletions

View file

@ -50,6 +50,13 @@ BookingDataset::Hotel::Hotel(string const & src)
descUrl = rec[Index(Fields::DescUrl)];
strings::to_uint(rec[Index(Fields::Type)], type);
langCode = rec[Index(Fields::Language)];
if (!langCode.empty())
{
nameLoc = rec[Index(Fields::NameLoc)];
addressLoc = rec[Index(Fields::AddressLoc)];
}
}
ostream & operator<<(ostream & s, BookingDataset::Hotel const & h)
@ -171,6 +178,12 @@ void BookingDataset::BuildFeatures(function<void(OsmElement *)> const & fn) cons
e.AddTag("price_rate", strings::to_string(hotel.priceCategory));
e.AddTag("addr:full", hotel.address);
if (!hotel.langCode.empty())
{
e.AddTag("name:" + hotel.langCode, hotel.nameLoc);
e.AddTag("addr:full:" + hotel.langCode, hotel.addressLoc);
}
switch (hotel.type)
{
case 19:

View file

@ -36,6 +36,9 @@ public:
RatingUsers = 8,
DescUrl = 9,
Type = 10,
Language = 11,
NameLoc = 12,
AddressLoc = 13,
Counter
};
@ -51,6 +54,9 @@ public:
double ratingUser = 0.0;
string descUrl;
uint32_t type = 0;
string langCode;
string nameLoc;
string addressLoc;
static constexpr size_t Index(Fields field) { return static_cast<size_t>(field); }
static constexpr size_t FieldsCount() { return static_cast<size_t>(Fields::Counter); }

View file

@ -81,26 +81,45 @@ def download(user, password, path):
countrycode = country['countrycode']
logging.info(u'Download[{0}]: {1}'.format(countrycode, country['name']))
allhotels = []
allhotels = {}
while True:
hotels = api.call('getHotels',
dict(new_hotel_type=1, offset=len(allhotels), rows=maxrows, countrycodes=countrycode))
# Check for error.
if not hotels:
if hotels is None:
exit(1)
allhotels.extend(hotels)
for h in hotels:
allhotels[h['hotel_id']] = h
# If hotels in answer less then maxrows, we reach end of data.
if len(hotels) < maxrows:
break
logging.info('Num of hotels: {0}'.format(len(allhotels)))
# Now the same for hotel translations
offset = 0
while True:
hotels = api.call('getHotelTranslations', dict(offset=offset, rows=maxrows, countrycodes=countrycode))
if hotels is None:
exit(1)
# Add translations for each hotel
for h in hotels:
if h['hotel_id'] in allhotels:
if 'translations' not in allhotels[h['hotel_id']]:
allhotels[h['hotel_id']]['translations'] = {}
allhotels[h['hotel_id']]['translations'][h['languagecode']] = {'name': h['name'], 'address': h['address']}
offset += len(hotels)
if len(hotels) < maxrows:
break
logging.info('Num of hotels: {0}, translations: {1}'.format(len(allhotels), offset))
filename = os.path.join(path,
'{0} - {1}.pkl'.format(country['area'].encode('utf8'), country['name'].encode('utf8')))
with open(filename, 'wb') as fd:
pickle.dump(allhotels, fd, pickle.HIGHEST_PROTOCOL)
pickle.dump(allhotels.values(), fd, pickle.HIGHEST_PROTOCOL)
def translate(source, output):
@ -110,7 +129,7 @@ def translate(source, output):
files = [filename for filename in os.listdir(source) if filename.endswith('.pkl')]
data = []
for filename in files:
for filename in sorted(files):
logging.info('Processing {0}'.format(filename))
with open(filename, 'rb') as fd:
data += pickle.load(fd)
@ -155,8 +174,17 @@ def translate(source, output):
# Find a range that contains the price
while rate <= len(rates) and price > avg * rates[rate - 1]:
rate += 1
l = [unicode(get_hotel_field(hotel, e, rate)).encode('utf8').replace('\t', ' ') for e in HOTEL_FIELDS]
print('\t'.join(l), file=fd)
l = [get_hotel_field(hotel, e, rate) for e in HOTEL_FIELDS]
# Add translations for hotel name and address if present.
if 'translations' in hotel:
tr_lang = hotel['languagecode']
if tr_lang not in hotel['translations']:
tr_lang = hotel['translations'].keys()[0]
l.append(tr_lang)
l.extend([hotel['translations'][tr_lang][e] for e in ('name', 'address')])
else:
l.extend([''] * 3)
print('\t'.join([unicode(f).encode('utf8').replace('\t', ' ') for f in l]), file=fd)
def process_options():