forked from organicmaps/organicmaps
[tools] Rewrite Ruby scripts in Python for consistency
Signed-off-by: Meenbeese <meenbeese@tutanota.com>
This commit is contained in:
parent
3c0fbeaa9c
commit
d8dafb7973
5 changed files with 224 additions and 171 deletions
|
@ -58,9 +58,10 @@ By default, it searches `strings.txt`, to check `types_strings.txt` add a `-t` o
|
|||
There are many more other options, e.g. print various translation statistics, validate and re-format translation files.
|
||||
Check `tools/python/strings_utils.py -h` to see all of them.
|
||||
|
||||
To check consistency of types_strings.txt with categories.txt run:
|
||||
To ensure that the category definitions in `categories.txt` are consistent with their
|
||||
associated translations in `strings.txt`, you can use the category consistency script.
|
||||
```
|
||||
ruby tools/ruby/category_consistency/check_consistency.rb
|
||||
python tools/python/category/check_consistency.py
|
||||
```
|
||||
|
||||
## Automatic translations
|
||||
|
|
123
tools/python/category/check_consistency.py
Normal file
123
tools/python/category/check_consistency.py
Normal file
|
@ -0,0 +1,123 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
This script checks for consistency between categories defined in:
|
||||
1. `displayed_categories.cpp`: A C++ source file containing category keys.
|
||||
2. `categories.txt`: A data file defining category details.
|
||||
3. `strings.txt`: A data file containing translations of category strings.
|
||||
|
||||
The script:
|
||||
- Parses category keys from the C++ file.
|
||||
- Loads corresponding data from `categories.txt` and `strings.txt`.
|
||||
- Compares the parsed data for inconsistencies in definitions and translations.
|
||||
- Prints detailed messages if inconsistencies are found.
|
||||
|
||||
It exits with:
|
||||
- `0` on success (all categories are consistent),
|
||||
- `1` on failure (inconsistencies detected).
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
from omim_parsers import CategoriesParser, StringsParser, LANGUAGES
|
||||
|
||||
ROOT = os.path.dirname(os.path.abspath(__file__))
|
||||
OMIM_ROOT = os.path.join(ROOT, '..', '..', '..')
|
||||
CPP_CATEGORIES_FILENAME = os.path.join(OMIM_ROOT, 'search', 'displayed_categories.cpp')
|
||||
CATEGORIES_FILENAME = os.path.join(OMIM_ROOT, 'data', 'categories.txt')
|
||||
STRINGS_FILENAME = os.path.join(OMIM_ROOT, 'data', 'strings', 'strings.txt')
|
||||
CATEGORIES_MATCHER = re.compile(r"m_keys = \{(.*?)};", re.DOTALL)
|
||||
|
||||
|
||||
def extract_cpp_categories(filename):
|
||||
if not os.path.exists(filename):
|
||||
print(f"Error: {filename} not found.")
|
||||
return []
|
||||
|
||||
with open(filename, "r", encoding="utf-8") as cpp_file:
|
||||
content = cpp_file.read()
|
||||
|
||||
match = CATEGORIES_MATCHER.search(content)
|
||||
if not match:
|
||||
print(f"Error: No categories found in {filename}.")
|
||||
return []
|
||||
|
||||
raw_categories = match.group(1)
|
||||
return [cat.strip().strip('"') for cat in raw_categories.split(",")]
|
||||
|
||||
|
||||
def compare_categories(string_cats, search_cats, cpp_cats):
|
||||
inconsistent_strings = {}
|
||||
missing_categories = []
|
||||
extra_categories = []
|
||||
|
||||
for category_name in search_cats.keys():
|
||||
if category_name not in string_cats:
|
||||
missing_categories.append(category_name)
|
||||
|
||||
for cpp_cat in cpp_cats:
|
||||
if cpp_cat not in search_cats:
|
||||
extra_categories.append(cpp_cat)
|
||||
|
||||
for category_name, translations in search_cats.items():
|
||||
if category_name not in string_cats:
|
||||
continue
|
||||
|
||||
for lang, search_translation in translations.items():
|
||||
if lang not in string_cats[category_name]:
|
||||
inconsistent_strings.setdefault(category_name, {})[lang] = (
|
||||
"Missing translation",
|
||||
search_translation,
|
||||
)
|
||||
elif string_cats[category_name][lang] != search_translation:
|
||||
inconsistent_strings.setdefault(category_name, {})[lang] = (
|
||||
string_cats[category_name][lang],
|
||||
search_translation,
|
||||
)
|
||||
|
||||
if missing_categories:
|
||||
print("\nMissing translations for categories in strings.txt:")
|
||||
for category_name in missing_categories:
|
||||
print(f" - {category_name}")
|
||||
|
||||
if extra_categories:
|
||||
print("\nExtra categories found in displayed_categories.cpp but not in categories.txt:")
|
||||
for cpp_cat in extra_categories:
|
||||
print(f" - {cpp_cat}")
|
||||
|
||||
if inconsistent_strings:
|
||||
print("\nInconsistent category translations:")
|
||||
for category_name, langs in inconsistent_strings.items():
|
||||
print(f"Category \"{category_name}\":")
|
||||
for lang, (strings_value, search_value) in langs.items():
|
||||
print(f" {lang}: strings.txt=\"{strings_value}\" vs categories.txt=\"{search_value}\"")
|
||||
|
||||
return not (missing_categories or extra_categories or inconsistent_strings)
|
||||
|
||||
|
||||
def check_search_categories_consistent():
|
||||
categories_txt_parser = CategoriesParser(LANGUAGES)
|
||||
strings_txt_parser = StringsParser(LANGUAGES)
|
||||
|
||||
if not os.path.exists(CATEGORIES_FILENAME):
|
||||
print(f"Error: {CATEGORIES_FILENAME} not found.")
|
||||
return 1
|
||||
|
||||
if not os.path.exists(STRINGS_FILENAME):
|
||||
print(f"Error: {STRINGS_FILENAME} not found.")
|
||||
return 1
|
||||
|
||||
search_categories = categories_txt_parser.parse_file(CATEGORIES_FILENAME)
|
||||
string_categories = strings_txt_parser.parse_file(STRINGS_FILENAME)
|
||||
cpp_categories = extract_cpp_categories(CPP_CATEGORIES_FILENAME)
|
||||
|
||||
if compare_categories(string_categories, search_categories, cpp_categories):
|
||||
print("Success: All categories are consistent.")
|
||||
return 0
|
||||
else:
|
||||
print("Failure: Inconsistencies found in category definitions.")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(check_search_categories_consistent())
|
98
tools/python/category/omim_parsers.py
Normal file
98
tools/python/category/omim_parsers.py
Normal file
|
@ -0,0 +1,98 @@
|
|||
import re
|
||||
from typing import Optional, Tuple, Dict, List
|
||||
|
||||
LANGUAGES = [
|
||||
"af", "ar", "be", "bg", "ca", "cs", "da", "de", "el", "en", "en-GB", "es", "es-MX", "et",
|
||||
"eu", "fa", "fi", "fr", "fr-CA", "he", "hi", "hu", "id", "it", "ja", "ko", "lt", "mr", "nb",
|
||||
"nl", "pl", "pt", "pt-BR", "ro", "ru", "sk", "sv", "sw", "th", "tr", "uk", "vi", "zh-Hans", "zh-Hant"
|
||||
]
|
||||
|
||||
|
||||
class AbstractParser:
|
||||
def __init__(self, keys: List[str]):
|
||||
self.keys = keys
|
||||
|
||||
def parse_line(self, line: str) -> Optional[Tuple[str, str]]:
|
||||
raise NotImplementedError("You must implement parse_line.")
|
||||
|
||||
def match_category(self, line: str, result: Dict[str, Dict]):
|
||||
category_match = self.category().search(line)
|
||||
if category_match:
|
||||
category = category_match.group(1)
|
||||
if category in self.keys:
|
||||
if category not in result:
|
||||
result[category] = {}
|
||||
|
||||
def parse_file(self, filename: str) -> Dict[str, Dict]:
|
||||
result = {}
|
||||
current_category = None
|
||||
|
||||
with open(filename, "r", encoding="utf-8") as file:
|
||||
for line in file:
|
||||
line = line.strip()
|
||||
|
||||
# Skip comments and empty lines
|
||||
if self.should_exclude_line(line):
|
||||
continue
|
||||
|
||||
# Match a new category
|
||||
category_match = self.category().match(line)
|
||||
if category_match:
|
||||
current_category = category_match.group(1)
|
||||
if current_category not in result:
|
||||
result[current_category] = {}
|
||||
continue
|
||||
|
||||
# Parse translations for the current category
|
||||
if current_category:
|
||||
parsed = self.parse_line(line)
|
||||
if parsed:
|
||||
lang, translation = parsed
|
||||
result[current_category].setdefault(lang, []).append(translation)
|
||||
|
||||
return result
|
||||
|
||||
def category(self) -> re.Pattern:
|
||||
raise NotImplementedError("You must implement category.")
|
||||
|
||||
def is_new_category(self, line: str) -> bool:
|
||||
return bool(self.category().match(line))
|
||||
|
||||
def extract_category(self, line: str) -> Optional[str]:
|
||||
match = self.category().match(line)
|
||||
return match.group(1) if match else None
|
||||
|
||||
def should_exclude_line(self, line: str) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
class CategoriesParser(AbstractParser):
|
||||
def parse_line(self, line: str) -> Optional[Tuple[str, str]]:
|
||||
line_match = re.match(r"^([^:]+):(.+)$", line)
|
||||
if line_match:
|
||||
lang = line_match.group(1).strip()
|
||||
translation = line_match.group(2).strip()
|
||||
return lang, translation
|
||||
return None
|
||||
|
||||
def category(self) -> re.Pattern:
|
||||
return re.compile(r"^([a-zA-Z0-9_-]+)\|@(.+)$")
|
||||
|
||||
def should_exclude_line(self, line: str) -> bool:
|
||||
return line.startswith("#") or not line
|
||||
|
||||
|
||||
class StringsParser(AbstractParser):
|
||||
def parse_line(self, line: str) -> Optional[Tuple[str, str]]:
|
||||
line_match = re.match(r"^([^=]+)=(.*)$", line)
|
||||
if line_match:
|
||||
lang = line_match.group(1).strip()
|
||||
translation = line_match.group(2).strip()
|
||||
return lang, translation
|
||||
return None
|
||||
|
||||
def category(self) -> re.Pattern:
|
||||
return re.compile(r"^\[([a-zA-Z0-9_]+)]$")
|
||||
|
||||
def should_exclude_line(self, line: str) -> bool:
|
||||
return line.startswith("tags") or not line
|
|
@ -1,65 +0,0 @@
|
|||
#!/usr/bin/env ruby
|
||||
|
||||
require_relative './omim_parsers'
|
||||
|
||||
ROOT = File.expand_path(File.dirname(__FILE__))
|
||||
OMIM_ROOT = File.join(ROOT, '..', '..', '..')
|
||||
CPP_CATEGORIES_FILENAME = File.join(OMIM_ROOT, 'search', 'displayed_categories.cpp')
|
||||
CATEGORIES_FILENAME = File.join(OMIM_ROOT, 'data', 'categories.txt')
|
||||
STRINGS_FILENAME = File.join(OMIM_ROOT, 'data', 'strings', 'strings.txt')
|
||||
CATEGORIES_MATCHER = /m_keys = \{(.*)\};/m
|
||||
|
||||
def load_categories_from_cpp(filename)
|
||||
raw_categories = File.read(CPP_CATEGORIES_FILENAME)
|
||||
match = CATEGORIES_MATCHER.match(raw_categories)
|
||||
if match
|
||||
cpp_categories = match[1].split(/,\s+/)
|
||||
# Delete quotes
|
||||
cpp_categories.map { |cat| cat.gsub!(/^"|"$/, '') }
|
||||
cpp_categories
|
||||
end
|
||||
end
|
||||
|
||||
def compare_categories(string_cats, search_cats)
|
||||
inconsistent_strings = {}
|
||||
|
||||
string_cats.each do |category_name, category|
|
||||
if !search_cats.include? category_name
|
||||
puts "Category '#{category_name}' not found in categories.txt"
|
||||
next
|
||||
end
|
||||
category.each do |lang, translation|
|
||||
if search_cats[category_name].include? lang
|
||||
if !search_cats[category_name][lang].include? translation
|
||||
not_found_cats_list = search_cats[category_name][lang]
|
||||
(inconsistent_strings[category_name] ||= {})[lang] = [translation, not_found_cats_list]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
inconsistent_strings.each do |name, languages|
|
||||
puts "\nInconsistent category \"#{name}\""
|
||||
languages.each do |lang, values|
|
||||
string_value, category_value = values
|
||||
puts "\t#{lang} : \"#{string_value}\" is not matched by #{category_value}"
|
||||
end
|
||||
end
|
||||
inconsistent_strings.empty?
|
||||
end
|
||||
|
||||
def check_search_categories_consistent
|
||||
cpp_categories = load_categories_from_cpp(CPP_CATEGORIES_FILENAME)
|
||||
categories_txt_parser = OmimParsers::CategoriesParser.new cpp_categories
|
||||
strings_txt_parser = OmimParsers::StringsParser.new cpp_categories
|
||||
|
||||
search_categories = categories_txt_parser.parse_file(CATEGORIES_FILENAME)
|
||||
string_categories = strings_txt_parser.parse_file(STRINGS_FILENAME)
|
||||
|
||||
compare_categories(string_categories, search_categories) ? 0 : 1
|
||||
end
|
||||
|
||||
|
||||
if __FILE__ == $0
|
||||
exit check_search_categories_consistent()
|
||||
end
|
|
@ -1,104 +0,0 @@
|
|||
module OmimParsers
|
||||
|
||||
# To update the list, run in root directory:
|
||||
# sed -nEe "s/ +([a-zA-Z]{2}(-[a-zA-Z]{2,})?) = .*$/\1/p" "data/strings/strings.txt" | sort -u | tr '\n' ' ' | sed -e 's/,$//' | fold -s -w48; echo
|
||||
LANGUAGES = %w(af ar be bg ca cs da de el en en-GB es es-MX et
|
||||
eu fa fi fr fr-CA he hi hu id it ja ko lt mr nb
|
||||
nl pl pt pt-BR ro ru sk sv sw th tr uk vi
|
||||
zh-Hans zh-Hant)
|
||||
|
||||
class AbstractParser
|
||||
def initialize(keys)
|
||||
@keys = keys
|
||||
end
|
||||
|
||||
def parse_line(line)
|
||||
raise NotImplementedError.new("You must implement parse_file.")
|
||||
end
|
||||
|
||||
def match_category(line, result)
|
||||
category_match = category.match(line)
|
||||
if !category_match.nil?
|
||||
category = category_match[1]
|
||||
if @keys.include? category
|
||||
result[category] ||= {}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def parse_file(filename)
|
||||
current_string = nil
|
||||
result = {}
|
||||
File.open(filename, 'r:UTF-8').each do |line|
|
||||
line.strip!
|
||||
next if should_exclude_line? line
|
||||
|
||||
# If line is empty -> next category block started
|
||||
if line.empty?
|
||||
current_string = nil
|
||||
next
|
||||
end
|
||||
|
||||
current_string ||= match_category(line, result)
|
||||
|
||||
parsed = parse_line(line)
|
||||
if !parsed.nil? and !current_string.nil?
|
||||
lang, translation = parsed
|
||||
current_string[lang] = translation
|
||||
end
|
||||
end
|
||||
result
|
||||
end
|
||||
|
||||
def category
|
||||
raise NotImplementedError.new("You must implement category.")
|
||||
end
|
||||
|
||||
def should_exclude_line?(line)
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
class CategoriesParser < AbstractParser
|
||||
def parse_line(line)
|
||||
line_match = /^([^:]+):(\S+)$/u.match(line)
|
||||
return if !line_match
|
||||
|
||||
lang = $1.strip
|
||||
return if !LANGUAGES.include? lang
|
||||
|
||||
translation = $2.strip
|
||||
synonyms = []
|
||||
translation.split('|').each do |token|
|
||||
token_match = /\d?\^?(.*)$/.match(token)
|
||||
synonyms.push(token_match[1]) if token_match
|
||||
end
|
||||
[lang, synonyms]
|
||||
end
|
||||
|
||||
def should_exclude_line?(line)
|
||||
line.start_with? '#'
|
||||
end
|
||||
|
||||
def category
|
||||
# We match only global categories ('food', 'bank'...)
|
||||
/^@([A-Za-z0-9]+)$/
|
||||
end
|
||||
end
|
||||
|
||||
class StringsParser < AbstractParser
|
||||
def parse_line(line)
|
||||
line_match = /^([^=]+)=(.*)$/.match(line)
|
||||
if line_match
|
||||
lang = $1.strip
|
||||
if LANGUAGES.include? lang
|
||||
[lang, $2.strip]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def category
|
||||
/^\[(.+)\]/
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Add table
Reference in a new issue