[strings] Add search category consistence tool

- Review fixes
This commit is contained in:
greshilov 2018-04-27 16:19:14 +03:00 committed by mpimenov
parent 799cf4e476
commit 514423cabd
2 changed files with 164 additions and 0 deletions

View file

@ -0,0 +1,65 @@
#!/usr/bin/env ruby
require_relative './omim_parsers'
ROOT = File.expand_path(File.dirname(__FILE__))
OMIM_ROOT = File.join(ROOT, '..', '..', '..')
CPP_CATEGORIES_FILENAME = File.join(OMIM_ROOT, 'search', 'displayed_categories.cpp')
CATEGORIES_FILENAME = File.join(OMIM_ROOT, 'data', 'categories.txt')
STRINGS_FILENAME = File.join(OMIM_ROOT, 'strings.txt')
CATEGORIES_MATCHER = /m_keys = \{(.*)\};/m
def load_categories_from_cpp(filename)
raw_categories = File.read(CPP_CATEGORIES_FILENAME)
match = CATEGORIES_MATCHER.match(raw_categories)
if match
cpp_categories = match[1].split(/,\s+/)
# Delete quotes
cpp_categories.map { |cat| cat.gsub!(/^"|"$/, '') }
cpp_categories
end
end
def compare_categories(string_cats, search_cats)
inconsistent_strings = {}
string_cats.each do |category_name, category|
if !search_cats.include? category_name
puts "Category '#{category_name}' not found in categories.txt"
next
end
category.each do |lang, translation|
if search_cats[category_name].include? lang
if !search_cats[category_name][lang].include? translation
not_found_cats_list = search_cats[category_name][lang]
(inconsistent_strings[category_name] ||= {})[lang] = [translation, not_found_cats_list]
end
end
end
end
inconsistent_strings.each do |name, languages|
puts "\nInconsistent category \"#{name}\""
languages.each do |lang, values|
string_value, category_value = values
puts "\t#{lang} : \"#{string_value}\" is not matched by #{category_value}"
end
end
inconsistent_strings.empty?
end
def check_search_categories_consistent
cpp_categories = load_categories_from_cpp(CPP_CATEGORIES_FILENAME)
categories_txt_parser = OmimParsers::CategoriesParser.new cpp_categories
strings_txt_parser = OmimParsers::StringsParser.new cpp_categories
search_categories = categories_txt_parser.parse_file(CATEGORIES_FILENAME)
string_categories = strings_txt_parser.parse_file(STRINGS_FILENAME)
compare_categories(string_categories, search_categories) ? 0 : 1
end
if __FILE__ == $0
exit check_search_categories_consistent()
end

View file

@ -0,0 +1,99 @@
module OmimParsers
LANGUAGES = %w(en ru ar cs da nl fi fr de hu id it ja ko nb pl
pt ro es sv th tr uk vi zh-Hans zh-Hant he sk)
class AbstractParser
def initialize(keys)
@keys = keys
end
def parse_line(line)
raise NotImplementedError.new("You must implement parse_file.")
end
def match_category(line, result)
category_match = category.match(line)
if !category_match.nil?
category = category_match[1]
if @keys.include? category
result[category] ||= {}
end
end
end
def parse_file(filename)
current_string = nil
result = {}
File.open(filename, 'r:UTF-8').each do |line|
line.strip!
next if should_exclude_line? line
# If line is empty -> next category block started
if line.empty?
current_string = nil
next
end
current_string ||= match_category(line, result)
parsed = parse_line(line)
if !parsed.nil? and !current_string.nil?
lang, translation = parsed
current_string[lang] = translation
end
end
result
end
def category
raise NotImplementedError.new("You must implement category.")
end
def should_exclude_line?(line)
false
end
end
class CategoriesParser < AbstractParser
def parse_line(line)
line_match = /^([^:]+):(\S+)$/u.match(line)
return if !line_match
lang = $1.strip
return if !LANGUAGES.include? lang
translation = $2.strip
synonyms = []
translation.split('|').each do |token|
token_match = /\d?\^?(.*)$/.match(token)
synonyms.push(token_match[1]) if token_match
end
[lang, synonyms]
end
def should_exclude_line?(line)
line.start_with? '#'
end
def category
# We match only global categories ('food', 'bank'...)
/^@([A-Za-z0-9]+)$/
end
end
class StringsParser < AbstractParser
def parse_line(line)
line_match = /^([^=]+)=(.*)$/.match(line)
if line_match
lang = $1.strip
if LANGUAGES.include? lang
[lang, $2.strip]
end
end
end
def category
/^\[(.+)\]/
end
end
end