forked from organicmaps/organicmaps-tmp
[strings] Add search category consistence tool
- Review fixes
This commit is contained in:
parent
799cf4e476
commit
514423cabd
2 changed files with 164 additions and 0 deletions
65
tools/ruby/category_consistency/check_consistency.rb
Executable file
65
tools/ruby/category_consistency/check_consistency.rb
Executable file
|
@ -0,0 +1,65 @@
|
|||
#!/usr/bin/env ruby
|
||||
|
||||
require_relative './omim_parsers'
|
||||
|
||||
ROOT = File.expand_path(File.dirname(__FILE__))
|
||||
OMIM_ROOT = File.join(ROOT, '..', '..', '..')
|
||||
CPP_CATEGORIES_FILENAME = File.join(OMIM_ROOT, 'search', 'displayed_categories.cpp')
|
||||
CATEGORIES_FILENAME = File.join(OMIM_ROOT, 'data', 'categories.txt')
|
||||
STRINGS_FILENAME = File.join(OMIM_ROOT, 'strings.txt')
|
||||
CATEGORIES_MATCHER = /m_keys = \{(.*)\};/m
|
||||
|
||||
def load_categories_from_cpp(filename)
|
||||
raw_categories = File.read(CPP_CATEGORIES_FILENAME)
|
||||
match = CATEGORIES_MATCHER.match(raw_categories)
|
||||
if match
|
||||
cpp_categories = match[1].split(/,\s+/)
|
||||
# Delete quotes
|
||||
cpp_categories.map { |cat| cat.gsub!(/^"|"$/, '') }
|
||||
cpp_categories
|
||||
end
|
||||
end
|
||||
|
||||
def compare_categories(string_cats, search_cats)
|
||||
inconsistent_strings = {}
|
||||
|
||||
string_cats.each do |category_name, category|
|
||||
if !search_cats.include? category_name
|
||||
puts "Category '#{category_name}' not found in categories.txt"
|
||||
next
|
||||
end
|
||||
category.each do |lang, translation|
|
||||
if search_cats[category_name].include? lang
|
||||
if !search_cats[category_name][lang].include? translation
|
||||
not_found_cats_list = search_cats[category_name][lang]
|
||||
(inconsistent_strings[category_name] ||= {})[lang] = [translation, not_found_cats_list]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
inconsistent_strings.each do |name, languages|
|
||||
puts "\nInconsistent category \"#{name}\""
|
||||
languages.each do |lang, values|
|
||||
string_value, category_value = values
|
||||
puts "\t#{lang} : \"#{string_value}\" is not matched by #{category_value}"
|
||||
end
|
||||
end
|
||||
inconsistent_strings.empty?
|
||||
end
|
||||
|
||||
def check_search_categories_consistent
|
||||
cpp_categories = load_categories_from_cpp(CPP_CATEGORIES_FILENAME)
|
||||
categories_txt_parser = OmimParsers::CategoriesParser.new cpp_categories
|
||||
strings_txt_parser = OmimParsers::StringsParser.new cpp_categories
|
||||
|
||||
search_categories = categories_txt_parser.parse_file(CATEGORIES_FILENAME)
|
||||
string_categories = strings_txt_parser.parse_file(STRINGS_FILENAME)
|
||||
|
||||
compare_categories(string_categories, search_categories) ? 0 : 1
|
||||
end
|
||||
|
||||
|
||||
if __FILE__ == $0
|
||||
exit check_search_categories_consistent()
|
||||
end
|
99
tools/ruby/category_consistency/omim_parsers.rb
Normal file
99
tools/ruby/category_consistency/omim_parsers.rb
Normal file
|
@ -0,0 +1,99 @@
|
|||
module OmimParsers
|
||||
LANGUAGES = %w(en ru ar cs da nl fi fr de hu id it ja ko nb pl
|
||||
pt ro es sv th tr uk vi zh-Hans zh-Hant he sk)
|
||||
|
||||
class AbstractParser
|
||||
def initialize(keys)
|
||||
@keys = keys
|
||||
end
|
||||
|
||||
def parse_line(line)
|
||||
raise NotImplementedError.new("You must implement parse_file.")
|
||||
end
|
||||
|
||||
def match_category(line, result)
|
||||
category_match = category.match(line)
|
||||
if !category_match.nil?
|
||||
category = category_match[1]
|
||||
if @keys.include? category
|
||||
result[category] ||= {}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def parse_file(filename)
|
||||
current_string = nil
|
||||
result = {}
|
||||
File.open(filename, 'r:UTF-8').each do |line|
|
||||
line.strip!
|
||||
next if should_exclude_line? line
|
||||
|
||||
# If line is empty -> next category block started
|
||||
if line.empty?
|
||||
current_string = nil
|
||||
next
|
||||
end
|
||||
|
||||
current_string ||= match_category(line, result)
|
||||
|
||||
parsed = parse_line(line)
|
||||
if !parsed.nil? and !current_string.nil?
|
||||
lang, translation = parsed
|
||||
current_string[lang] = translation
|
||||
end
|
||||
end
|
||||
result
|
||||
end
|
||||
|
||||
def category
|
||||
raise NotImplementedError.new("You must implement category.")
|
||||
end
|
||||
|
||||
def should_exclude_line?(line)
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
class CategoriesParser < AbstractParser
|
||||
def parse_line(line)
|
||||
line_match = /^([^:]+):(\S+)$/u.match(line)
|
||||
return if !line_match
|
||||
|
||||
lang = $1.strip
|
||||
return if !LANGUAGES.include? lang
|
||||
|
||||
translation = $2.strip
|
||||
synonyms = []
|
||||
translation.split('|').each do |token|
|
||||
token_match = /\d?\^?(.*)$/.match(token)
|
||||
synonyms.push(token_match[1]) if token_match
|
||||
end
|
||||
[lang, synonyms]
|
||||
end
|
||||
|
||||
def should_exclude_line?(line)
|
||||
line.start_with? '#'
|
||||
end
|
||||
|
||||
def category
|
||||
# We match only global categories ('food', 'bank'...)
|
||||
/^@([A-Za-z0-9]+)$/
|
||||
end
|
||||
end
|
||||
|
||||
class StringsParser < AbstractParser
|
||||
def parse_line(line)
|
||||
line_match = /^([^=]+)=(.*)$/.match(line)
|
||||
if line_match
|
||||
lang = $1.strip
|
||||
if LANGUAGES.include? lang
|
||||
[lang, $2.strip]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def category
|
||||
/^\[(.+)\]/
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Add table
Reference in a new issue