diff --git a/base/string_utils.cpp b/base/string_utils.cpp index 22dfad3030..de986d77a3 100644 --- a/base/string_utils.cpp +++ b/base/string_utils.cpp @@ -11,7 +11,6 @@ namespace strings { - bool UniString::IsEqualAscii(char const * s) const { return (size() == strlen(s) && equal(begin(), end(), s)); @@ -71,7 +70,6 @@ bool to_uint(char const * start, unsigned int & i, int base /*= 10*/) return IntegerCheck(start, stop, v, i); } - bool to_uint64(char const * s, uint64_t & i) { char * stop; @@ -183,33 +181,21 @@ void NormalizeDigits(UniString & us) namespace { - char ascii_to_lower(char in) - { - char const diff = 'z' - 'Z'; - static_assert(diff == 'a' - 'A', ""); - static_assert(diff > 0, ""); - - if (in >= 'A' && in <= 'Z') - return (in + diff); - return in; - } -} - -void AsciiToLower(string & s) +char ascii_to_lower(char in) { - transform(s.begin(), s.end(), s.begin(), &ascii_to_lower); -} - -void Trim(string & s) -{ - boost::trim(s); -} - -void Trim(string & s, char const * anyOf) -{ - boost::trim_if(s, boost::is_any_of(anyOf)); + char const diff = 'z' - 'Z'; + static_assert(diff == 'a' - 'A', ""); + static_assert(diff > 0, ""); + + if (in >= 'A' && in <= 'Z') + return (in + diff); + return in; +} } +void AsciiToLower(string & s) { transform(s.begin(), s.end(), s.begin(), &ascii_to_lower); } +void Trim(string & s) { boost::trim(s); } +void Trim(string & s, char const * anyOf) { boost::trim_if(s, boost::is_any_of(anyOf)); } bool EqualNoCase(string const & s1, string const & s2) { return MakeLowerCase(s1) == MakeLowerCase(s2); @@ -238,9 +224,7 @@ bool IsASCIIString(string const & str) } bool IsASCIIDigit(UniChar c) { return c >= '0' && c <= '9'; } - bool IsASCIILatin(UniChar c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } - bool StartsWith(UniString const & s, UniString const & p) { if (p.size() > s.size()) @@ -253,11 +237,7 @@ bool StartsWith(UniString const & s, UniString const & p) return true; } -bool StartsWith(string const & s1, char const * s2) -{ - return (s1.compare(0, strlen(s2), s2) == 0); -} - +bool StartsWith(string const & s1, char const * s2) { return (s1.compare(0, strlen(s2), s2) == 0); } bool EndsWith(string const & s1, char const * s2) { size_t const n = s1.size(); @@ -344,4 +324,4 @@ bool AlmostEqual(string const & str1, string const & str2, size_t mismatchedCoun return false; } -} // namespace strings +} // namespace strings diff --git a/base/string_utils.hpp b/base/string_utils.hpp index ca5449ae2b..85396b7f19 100644 --- a/base/string_utils.hpp +++ b/base/string_utils.hpp @@ -15,18 +15,22 @@ /// All methods work with strings in utf-8 format namespace strings { - typedef uint32_t UniChar; -//typedef buffer_vector UniString; +// typedef buffer_vector UniString; /// Make new type, not typedef. Need to specialize DebugPrint. class UniString : public buffer_vector { typedef buffer_vector BaseT; + public: UniString() {} explicit UniString(size_t n, UniChar c = UniChar()) : BaseT(n, c) {} - template UniString(IterT b, IterT e) : BaseT(b, e) {} + template + UniString(IterT b, IterT e) + : BaseT(b, e) + { + } bool IsEqualAscii(char const * s) const; @@ -83,10 +87,7 @@ bool IsASCIIString(string const & str); bool IsASCIIDigit(UniChar c); bool IsASCIILatin(UniChar c); -inline string DebugPrint(UniString const & s) -{ - return ToUtf8(s); -} +inline string DebugPrint(UniString const & s) { return ToUtf8(s); } template class TokenizeIterator @@ -117,14 +118,14 @@ class TokenizeIterator public: /// @warning string S must be not temporary! TokenizeIterator(string const & s, DelimFuncT const & delimFunc) - : m_beg(s.begin()), m_end(s.begin()), m_finish(s.end()), m_delimFunc(delimFunc) + : m_beg(s.begin()), m_end(s.begin()), m_finish(s.end()), m_delimFunc(delimFunc) { move(); } /// @warning unistring S must be not temporary! TokenizeIterator(UniString const & s, DelimFuncT const & delimFunc) - : m_beg(s.begin()), m_end(s.begin()), m_finish(s.end()), m_delimFunc(delimFunc) + : m_beg(s.begin()), m_end(s.begin()), m_finish(s.end()), m_delimFunc(delimFunc) { move(); } @@ -137,12 +138,12 @@ public: string operator*() const { - ASSERT( m_beg != m_finish, ("dereferencing of empty iterator") ); + ASSERT(m_beg != m_finish, ("dereferencing of empty iterator")); return string(m_beg.base(), m_end.base()); } operator bool() const { return m_beg != m_finish; } - + TokenizeIterator & operator++() { move(); @@ -159,11 +160,7 @@ public: return !copy; } - UniString GetUniString() const - { - return UniString(m_beg, m_end); - } - + UniString GetUniString() const { return UniString(m_beg, m_end); } /// Same as operator bool() in expression it == end(...) bool operator==(TokenizeIterator const &) { return !(*this); } /// Same as operator bool() in expression it != end(...) @@ -173,6 +170,7 @@ public: class SimpleDelimiter { UniString m_delims; + public: SimpleDelimiter(char const * delimChars); // Used in TokenizeIterator to allow past the end iterator construction. @@ -181,8 +179,8 @@ public: bool operator()(UniChar c) const; }; -typedef TokenizeIterator > SimpleTokenizer; +typedef TokenizeIterator> + SimpleTokenizer; template void Tokenize(string const & str, char const * delims, TFunctor && f) @@ -198,7 +196,8 @@ void Tokenize(string const & str, char const * delims, TFunctor && f) /// @return code of last symbol in string or 0 if s is empty UniChar LastUniChar(string const & s); -template bool IsInArray(T (&arr) [N], TT const & t) +template +bool IsInArray(T(&arr)[N], TT const & t) { for (size_t i = 0; i < N; ++i) if (arr[i] == t) @@ -214,10 +213,17 @@ bool to_uint64(char const * s, uint64_t & i); bool to_int64(char const * s, int64_t & i); bool to_double(char const * s, double & d); -inline bool is_number(string const & s) { int64_t dummy; return to_int64(s.c_str(), dummy); } +inline bool is_number(string const & s) +{ + int64_t dummy; + return to_int64(s.c_str(), dummy); +} inline bool to_int(string const & s, int & i, int base = 10) { return to_int(s.c_str(), i, base); } -inline bool to_uint(string const & s, unsigned int & i, int base = 10) { return to_uint(s.c_str(), i, base); } +inline bool to_uint(string const & s, unsigned int & i, int base = 10) +{ + return to_uint(s.c_str(), i, base); +} inline bool to_uint64(string const & s, uint64_t & i) { return to_uint64(s.c_str(), i); } inline bool to_int64(string const & s, int64_t & i) { return to_int64(s.c_str(), i); } inline bool to_double(string const & s, double & d) { return to_double(s.c_str(), d); } @@ -225,17 +231,10 @@ inline bool to_double(string const & s, double & d) { return to_double(s.c_str() /// @name From numeric to string. //@{ -inline string to_string(string const & s) -{ - return s; -} - -inline string to_string(char const * s) -{ - return s; -} - -template string to_string(T t) +inline string to_string(string const & s) { return s; } +inline string to_string(char const * s) { return s; } +template +string to_string(T t) { ostringstream ss; ss << t; @@ -261,7 +260,8 @@ int UpperBoundOnChars() return numeric_limits::digits10 + is_signed::value + 1; } -template char * to_string_digits(char * buf, T i) +template +char * to_string_digits(char * buf, T i) { do { @@ -272,7 +272,8 @@ template char * to_string_digits(char * buf, T i) return buf; } -template string to_string_signed(T i) +template +string to_string_signed(T i) { bool const negative = i < 0; int const sz = UpperBoundOnChars(); @@ -287,7 +288,8 @@ template string to_string_signed(T i) return string(beg, end - beg); } -template string to_string_unsigned(T i) +template +string to_string_unsigned(T i) { int const sz = UpperBoundOnChars(); char buf[sz]; @@ -295,19 +297,10 @@ template string to_string_unsigned(T i) char * beg = to_string_digits(end, i); return string(beg, end - beg); } - -} - -inline string to_string(int64_t i) -{ - return impl::to_string_signed(i); -} - -inline string to_string(uint64_t i) -{ - return impl::to_string_unsigned(i); } +inline string to_string(int64_t i) { return impl::to_string_signed(i); } +inline string to_string(uint64_t i) { return impl::to_string_unsigned(i); } /// Use this function to get string with fixed count of /// "Digits after comma". string to_string_dac(double d, int dac); @@ -399,7 +392,7 @@ size_t EditDistance(TIter const & b1, TIter const & e1, TIter const & b2, TIter namespace std { -template +template struct iterator_traits> { using difference_type = std::ptrdiff_t; @@ -408,4 +401,4 @@ struct iterator_traits> using reference = string; using iterator_category = std::input_iterator_tag; }; -} // namespace std +} // namespace std diff --git a/generator/booking_dataset.cpp b/generator/booking_dataset.cpp index f62f52d4aa..d2ac8ca57e 100644 --- a/generator/booking_dataset.cpp +++ b/generator/booking_dataset.cpp @@ -14,7 +14,6 @@ namespace generator { - namespace { bool CheckForValues(string const & value) @@ -28,7 +27,7 @@ bool CheckForValues(string const & value) return false; } } // namespace - + BookingDataset::Hotel::Hotel(string const & src) { vector rec(FieldsCount()); @@ -39,7 +38,7 @@ BookingDataset::Hotel::Hotel(string const & src) strings::to_uint(rec[Index(Fields::Id)], id); strings::to_double(rec[Index(Fields::Latitude)], lat); strings::to_double(rec[Index(Fields::Longtitude)], lon); - + name = rec[Index(Fields::Name)]; address = rec[Index(Fields::Address)]; @@ -55,13 +54,14 @@ BookingDataset::Hotel::Hotel(string const & src) ostream & operator<<(ostream & s, BookingDataset::Hotel const & h) { - return s << "Name: " << h.name << "\t Address: " << h.address << "\t lat: " << h.lat << " lon: " << h.lon; + return s << "Name: " << h.name << "\t Address: " << h.address << "\t lat: " << h.lat + << " lon: " << h.lon; } BookingDataset::BookingDataset(string const & dataPath) { LoadHotels(dataPath); - + size_t counter = 0; for (auto const & hotel : m_hotels) { @@ -73,12 +73,18 @@ BookingDataset::BookingDataset(string const & dataPath) bool BookingDataset::BookingFilter(OsmElement const & e) const { - return Filter(e, [&](OsmElement const & e){ return MatchWithBooking(e); }); + return Filter(e, [&](OsmElement const & e) + { + return MatchWithBooking(e); + }); } bool BookingDataset::TourismFilter(OsmElement const & e) const { - return Filter(e, [&](OsmElement const & e){ return true; }); + return Filter(e, [&](OsmElement const & e) + { + return true; + }); } BookingDataset::Hotel const & BookingDataset::GetHotel(size_t index) const @@ -91,7 +97,7 @@ vector BookingDataset::GetNearestHotels(double lat, double lon, size_t l double maxDistance /* = 0.0 */) const { namespace bgi = boost::geometry::index; - + vector indexes; for_each(bgi::qbegin(m_rtree, bgi::nearest(TPoint(lat, lon), limit)), bgi::qend(m_rtree), [&](TValue const & v) @@ -100,47 +106,49 @@ vector BookingDataset::GetNearestHotels(double lat, double lon, size_t l double const dist = ms::DistanceOnEarth(lat, lon, hotel.lat, hotel.lon); if (maxDistance != 0.0 && dist > maxDistance /* max distance in meters */) return; - + indexes.emplace_back(v.second); }); return indexes; } -bool BookingDataset::MatchByName(string const & osmName, vector const & bookingIndexes) const +bool BookingDataset::MatchByName(string const & osmName, + vector const & bookingIndexes) const { return false; - + // Match name. -// vector osmTokens; -// NormalizeAndTokenizeString(name, osmTokens, search::Delimiters()); -// -// cout << "\n------------- " << name << endl; -// -// bool matched = false; -// for (auto const & index : indexes) -// { -// vector bookingTokens; -// NormalizeAndTokenizeString(m_hotels[index].name, bookingTokens, search::Delimiters()); -// -// map>> weightPair; -// -// for (size_t j = 0; j < osmTokens.size(); ++j) -// { -// for (size_t i = 0; i < bookingTokens.size(); ++i) -// { -// size_t distance = strings::EditDistance(osmTokens[j].begin(), osmTokens[j].end(), -// bookingTokens[i].begin(), bookingTokens[i].end()); -// if (distance < 3) -// weightPair[distance].emplace_back(i, j); -// } -// } -// -// if (!weightPair.empty()) -// { -// cout << m_hotels[e.second] << endl; -// matched = true; -// } -// } + // vector osmTokens; + // NormalizeAndTokenizeString(name, osmTokens, search::Delimiters()); + // + // cout << "\n------------- " << name << endl; + // + // bool matched = false; + // for (auto const & index : indexes) + // { + // vector bookingTokens; + // NormalizeAndTokenizeString(m_hotels[index].name, bookingTokens, search::Delimiters()); + // + // map>> weightPair; + // + // for (size_t j = 0; j < osmTokens.size(); ++j) + // { + // for (size_t i = 0; i < bookingTokens.size(); ++i) + // { + // size_t distance = strings::EditDistance(osmTokens[j].begin(), osmTokens[j].end(), + // bookingTokens[i].begin(), + // bookingTokens[i].end()); + // if (distance < 3) + // weightPair[distance].emplace_back(i, j); + // } + // } + // + // if (!weightPair.empty()) + // { + // cout << m_hotels[e.second] << endl; + // matched = true; + // } + // } } void BookingDataset::BuildFeatures(function const & fn) const @@ -150,10 +158,10 @@ void BookingDataset::BuildFeatures(function const & fn) cons OsmElement e; e.type = OsmElement::EntityType::Node; e.id = 1; - + e.lat = hotel.lat; e.lon = hotel.lon; - + e.AddTag("name", hotel.name); e.AddTag("ref:sponsored", strings::to_string(hotel.id)); e.AddTag("website", hotel.descUrl); @@ -161,56 +169,56 @@ void BookingDataset::BuildFeatures(function const & fn) cons e.AddTag("stars", strings::to_string(hotel.stars)); e.AddTag("price_rate", strings::to_string(hotel.priceCategory)); e.AddTag("addr:full", hotel.address); - + switch (hotel.type) { - case 19: - case 205: e.AddTag("tourism", "motel"); break; - - case 21: - case 206: - case 212: e.AddTag("tourism", "resort"); break; - - case 3: - case 23: - case 24: - case 25: - case 202: - case 207: - case 208: - case 209: - case 210: - case 216: - case 220: - case 223: e.AddTag("tourism", "guest_house"); break; - - case 14: - case 204: - case 213: - case 218: - case 219: - case 226: - case 222: e.AddTag("tourism", "hotel"); break; - - case 211: - case 224: - case 228: e.AddTag("tourism", "chalet"); break; - - case 13: - case 225: - case 203: e.AddTag("tourism", "hostel"); break; - - case 215: - case 221: - case 227: - case 2: - case 201: e.AddTag("tourism", "apartment"); break; - - case 214: e.AddTag("tourism", "camp_site"); break; - - default: e.AddTag("tourism", "hotel"); break; + case 19: + case 205: e.AddTag("tourism", "motel"); break; + + case 21: + case 206: + case 212: e.AddTag("tourism", "resort"); break; + + case 3: + case 23: + case 24: + case 25: + case 202: + case 207: + case 208: + case 209: + case 210: + case 216: + case 220: + case 223: e.AddTag("tourism", "guest_house"); break; + + case 14: + case 204: + case 213: + case 218: + case 219: + case 226: + case 222: e.AddTag("tourism", "hotel"); break; + + case 211: + case 224: + case 228: e.AddTag("tourism", "chalet"); break; + + case 13: + case 225: + case 203: e.AddTag("tourism", "hostel"); break; + + case 215: + case 221: + case 227: + case 2: + case 201: e.AddTag("tourism", "apartment"); break; + + case 214: e.AddTag("tourism", "camp_site"); break; + + default: e.AddTag("tourism", "hotel"); break; } - + fn(&e); } } @@ -218,17 +226,17 @@ void BookingDataset::BuildFeatures(function const & fn) cons void BookingDataset::LoadHotels(string const & path) { m_hotels.clear(); - + if (path.empty()) return; - + ifstream src(path); if (!src.is_open()) { LOG(LERROR, ("Error while opening", path, ":", strerror(errno))); return; } - + for (string line; getline(src, line);) m_hotels.emplace_back(line); } @@ -244,20 +252,21 @@ bool BookingDataset::MatchWithBooking(OsmElement const & e) const break; } } - + if (name.empty()) return false; - + // Find 3 nearest values to a point. auto const indexes = GetNearestHotels(e.lat, e.lon, 3, 150 /* max distance in meters */); if (indexes.empty()) return false; - + bool matched = MatchByName(name, indexes); return matched; } -bool BookingDataset::Filter(OsmElement const & e, function const & fn) const +bool BookingDataset::Filter(OsmElement const & e, + function const & fn) const { if (e.type != OsmElement::EntityType::Node) return false; diff --git a/generator/booking_dataset.hpp b/generator/booking_dataset.hpp index d0810e0590..6ce74c7118 100644 --- a/generator/booking_dataset.hpp +++ b/generator/booking_dataset.hpp @@ -57,10 +57,12 @@ public: bool TourismFilter(OsmElement const & e) const; Hotel const & GetHotel(size_t index) const; - vector GetNearestHotels(double lat, double lon, size_t limit, double maxDistance = 0.0) const; + vector GetNearestHotels(double lat, double lon, size_t limit, + double maxDistance = 0.0) const; bool MatchByName(string const & osmName, vector const & bookingIndexes) const; void BuildFeatures(function const & fn) const; + protected: vector m_hotels; @@ -77,5 +79,5 @@ protected: }; ostream & operator<<(ostream & s, BookingDataset::Hotel const & h); - + } // namespace generator