[tools] Update find_untranslated_strings.py

- introduced CLI
- added languages stats
- filter output by selected languages
- plurals support
- more thorough file format check
- added validation mode
- preliminary "ref=" support
- general refactoring
- beautified by autopep8

Needed-for: #1703

Signed-off-by: Konstantin Pastbin <konstantin.pastbin@gmail.com>
This commit is contained in:
Konstantin Pastbin 2021-12-29 00:45:39 +03:00 committed by Alexander Borsuk
parent a107eb9084
commit 51ac4c26a5
2 changed files with 433 additions and 171 deletions

View file

@ -128,11 +128,11 @@ def parenthesize(strings):
def write_filtered_strings_txt(filtered, filepath, languages=None):
logging.info("Writing strings to file {0}".format(filepath))
strings_txt = StringsTxt()
strings_txt = StringsTxt("{0}/{1}".format(OMIM_ROOT, StringsTxt.STRINGS_TXT_PATH))
strings_dict = {key : dict(strings_txt.translations[key]) for key in filtered}
strings_txt.translations = strings_dict
strings_txt.comments_and_tags = {}
strings_txt.write_formatted(filepath, languages=languages)
strings_txt.write_formatted(target_file=filepath, langs=languages)
def get_args():
@ -201,7 +201,7 @@ def get_args():
parser.add_argument(
"-ct", "--categories",
dest="hardcoded_cagegories",
dest="hardcoded_categories",
default="{0}/data/hardcoded_categories.txt".format(find_omim()),
help="""Path to the list of the categories that are displayed in the
interface, but are not taken from strings.txt"""
@ -261,13 +261,13 @@ def do_single(args):
filtered.update(android)
filtered.update(core)
strings_txt = StringsTxt()
strings_txt = StringsTxt("{0}/{1}".format(OMIM_ROOT, StringsTxt.STRINGS_TXT_PATH))
strings_txt.translations = {key: dict(strings_txt.translations[key]) for key in filtered}
strings_txt.comments_and_tags = new_comments_and_tags(strings_txt, filtered, new_tags)
path = args.output if isabs(args.output) else "{0}/{1}".format(OMIM_ROOT, args.output)
strings_txt.write_formatted(languages=args.langs, target_file=path)
strings_txt.write_formatted(target_file=path, langs=args.langs)
if args.generate:
exec_shell(
@ -325,7 +325,7 @@ if __name__ == "__main__":
OMIM_ROOT=args.omim_root
HARDCODED_CATEGORIES = read_hardcoded_categories(
args.hardcoded_cagegories
args.hardcoded_categories
)
args.langs = set(args.langs) if args.langs else None

View file

@ -1,39 +1,47 @@
#!/usr/bin/env python3
from argparse import ArgumentParser
from collections import namedtuple, defaultdict
from itertools import combinations
from os.path import join, dirname
from os.path import join, dirname, abspath, isabs
import re
from sys import argv
TransAndKey = namedtuple("TransAndKey", "translation, key")
TRANSLATION = re.compile(r"(.*)\s*=\s*.*$", re.S | re.MULTILINE)
MANY_DOTS = re.compile(r"\.{4,}")
SPACE_PUNCTUATION = re.compile(r"\s[.,?!:;]")
PLACEHOLDERS = re.compile(r"(%\d*\$@|%[@dqus]|\^)")
SIMILARITY_THRESHOLD = 20.0 #%
class StringsTxt:
def __init__(self, strings_path=None):
if not strings_path:
self.strings_path = join(dirname(argv[0]), "..", "..", "data", "strings", "strings.txt")
else:
self.strings_path = strings_path
STRINGS_TXT_PATH = "data/strings/strings.txt"
TYPES_STRINGS_TXT_PATH = "data/strings/types_strings.txt"
self.translations = defaultdict(lambda: defaultdict(str)) # dict<key, dict<lang, translation>>
self.translations_by_language = defaultdict(dict) # dict<lang, dict<key, translation>>
self.comments_and_tags = defaultdict(dict)
self.with_english = []
self.all_langs = set()
self.duplicates = {} # dict<lang, TransAndKey>
SECTION = re.compile(r"\[\[\w+.*\]\]")
DEFINITION = re.compile(r"\[[\w.]+\]")
LANG_KEY = re.compile(r"^[a-z]{2}(-[a-zA-Z]{2,4})?(:[a-z]+)?$")
TRANSLATION = re.compile(r"^\s*\S+\s*=\s*\S+.*$", re.S | re.MULTILINE)
MANY_DOTS = re.compile(r"\.{4,}")
SPACE_PUNCTUATION = re.compile(r"\s[.,?!:;]")
PLACEHOLDERS = re.compile(r"(%\d*\$@|%[@dqus]|\^)")
PLURAL_KEYS = frozenset(("zero", "one", "two", "few", "many", "other"))
SIMILARITY_THRESHOLD = 20.0 # %
TransAndKey = namedtuple("TransAndKey", "translation, key")
def __init__(self, strings_path):
self.strings_path = strings_path
# dict<key, dict<lang, translation>>
self.translations = defaultdict(lambda: defaultdict(str))
self.translations_by_language = defaultdict(
dict) # dict<lang, dict<key, translation>>
self.comments_and_tags = defaultdict(
dict) # dict<lang, dict<key, value>>
self.all_langs = set() # including plural keys, e.g. en:few
self.langs = set() # without plural keys
self.duplicates = {} # dict<lang, TransAndKey>
self.keys_in_order = []
self._read_file()
self.validation_errors = False
self._read_file()
def process_file(self):
self._populate_translations_by_langs()
@ -43,63 +51,154 @@ class StringsTxt:
self.similarity_indices = []
self._find_most_similar()
def add_translation(self, translation, key, lang):
if key not in self.keys_in_order:
self.keys_in_order.append(key)
self.translations[key][lang] = translation
self.all_langs.add(lang)
lang, plural_key = self._parse_lang(lang)
self.langs.add(lang)
def append_to_translation(self, key, lang, tail):
self.translations[key][lang] = self.translations[key][lang] + tail
def _read_file(self):
with open(self.strings_path, encoding='utf-8') as strings:
for line in strings:
line = line.strip()
if not line:
continue
if line.startswith("[["):
if self.SECTION.match(line):
self.keys_in_order.append(line)
continue
if line.startswith("["):
# if line in self.translations:
# print("Duplicate key {}".format(line))
# continue
if self.DEFINITION.match(line):
if line in self.translations:
self._print_validation_issue(
"Duplicate definition: {0}".format(line))
self.translations[line] = {}
current_key = line
if current_key not in self.keys_in_order:
self.keys_in_order.append(current_key)
continue
if TRANSLATION.match(line):
if self.TRANSLATION.match(line):
lang, tran = self._parse_lang_and_translation(line)
if lang == "comment" or lang == "tags":
if lang == "comment" or lang == "tags" or lang == "ref":
self.comments_and_tags[current_key][lang] = tran
continue
self.translations[current_key][lang] = tran
self.all_langs.add(lang)
if line.startswith("en = "):
self.with_english.append(current_key)
continue
lang, plural_key = self._parse_lang(lang)
self.langs.add(lang)
else:
self._print_validation_issue(
"Could't parse line: {0}".format(line))
def print_statistics(self):
stats = [(x, len(self.translations[x])) for x in list(self.translations.keys())]
stats.sort(key=lambda x: x[1], reverse=True)
def print_languages_stats(self, langs=None):
self._print_header("Languages statistics")
print("All languages in the file ({0} total):\n{1}\n".format(
len(self.langs), ",".join(sorted(self.langs)))
)
print("Regional languages:\n{0}\n".format(
",".join([lang for lang in sorted(self.langs) if len(lang) > 2]))
)
print("Languages using plurals:\n{0}\n".format(
",".join([lang for lang in sorted(self.all_langs) if lang.find(":") > -1]))
)
for k, v in stats:
print("{0}\t{1}".format(k, v))
self.print_invalid_languages()
print_plurals = True
if not langs:
print_plurals = False
langs = self.langs
def print_duplicates(self):
print(self._header("Duplicates:"))
for lang, trans_and_keys in list(self.duplicates.items()):
print("{0}\n {1}\n".format("=" * (len(lang) + 2), lang))
langs_stats = []
plurals_stats = defaultdict(dict) # dict<lang, dict<plural, int>>
for lang in langs:
lang_defs = set()
if lang in self.translations_by_language:
lang_defs = set(self.translations_by_language[lang].keys())
plurals_stats[lang][lang] = len(lang_defs)
for plural_key in self.PLURAL_KEYS:
lang_plural = "{0}:{1}".format(lang, plural_key)
if lang_plural in self.translations_by_language:
plural_defs = set(
self.translations_by_language[lang_plural].keys())
plurals_stats[lang][lang_plural] = len(plural_defs)
lang_defs = lang_defs.union(plural_defs)
langs_stats.append((lang, len(lang_defs)))
print("\nNumber of translations out of total:\n")
langs_stats.sort(key=lambda x: x[1], reverse=True)
n_trans = len(self.translations)
for lang, lang_stat in langs_stats:
print("{0:7} : {1} / {2} ({3}%)".format(
lang, lang_stat, n_trans, round(100 * lang_stat / n_trans)
))
if print_plurals and not (len(plurals_stats[lang]) == 1 and lang in plurals_stats[lang]):
for lang_plural, plural_stat in plurals_stats[lang].items():
print(" {0:13} : {1}".format(lang_plural, plural_stat))
def print_invalid_languages(self):
invalid_langs = []
invalid_plurals = []
for lang in self.all_langs:
if not self.LANG_KEY.match(lang):
invalid_langs.append(lang)
lang_key, plural_key = self._parse_lang(lang)
if plural_key and plural_key not in self.PLURAL_KEYS:
invalid_plurals.append(lang)
if invalid_langs:
self._print_validation_issue("Invalid languages: {0}".format(
",".join(sorted(invalid_langs))
))
if invalid_plurals:
self._print_validation_issue("Invalid plurals: {0}".format(
",".join(sorted(invalid_plurals))
))
def print_definitions_stats(self, langs=None):
self._print_header("Definitions stats")
print("Number of translations out of total:\n")
if not langs:
langs = self.langs
def_stats = []
for definition in self.translations.keys():
def_langs = set()
for def_lang in self.translations[definition].keys():
def_lang, plural_key = self._parse_lang(def_lang)
if def_lang in langs:
def_langs.add(def_lang)
def_stats.append((definition, len(def_langs)))
def_stats.sort(key=lambda x: x[1], reverse=True)
n_langs = len(langs)
for definition, n_trans in def_stats:
print("{0}\t{1} / {2} ({3}%)".format(
definition, n_trans, n_langs, round(100 * n_trans / n_langs)
))
def print_duplicates(self, langs=None):
self._print_header("Duplicate translations")
print("Same translations used in several definitions:")
langs = self._expand_plurals(langs) if langs else self.all_langs
dups = list(self.duplicates.items())
dups.sort(key=lambda x: x[0])
for lang, trans_and_keys in dups:
if lang not in langs:
continue
print("\nLanguage: {0}".format(lang))
last_one = ""
keys = []
for tr in trans_and_keys:
@ -110,41 +209,47 @@ class StringsTxt:
keys.append(tr.key)
self._print_keys_for_duplicates(keys, last_one)
def _print_keys_for_duplicates(self, keys, last_one):
if last_one:
print("{0}: {1}\n".format(", ".join(keys), last_one))
print("\t{0}: {1}".format(",".join(keys), last_one))
def _expand_plurals(self, langs):
expanded_langs = set()
for lang_plural in self.all_langs:
lang, plural_key = self._parse_lang(lang_plural)
if lang in langs:
expanded_langs.add(lang_plural)
return expanded_langs
def _process_string(self, string):
if MANY_DOTS.search(string):
print("WARNING: 4 or more dots in the string: {0}".format(string))
return str.strip(string).replace("...", "")
def _parse_lang(self, lang):
plural_key = None
sep_pos = lang.find(":")
if sep_pos > -1:
lang, plural_key = lang.split(":")
return lang, plural_key
def _parse_lang_and_translation(self, line):
ret = tuple(map(self._process_string, line.split("=", 1)))
if len(ret) < 2:
print("ERROR: Couldn't parse the line: {0}".format(line))
assert len(ret) == 2
return ret
lang, trans = line.split("=", 1)
if self.MANY_DOTS.search(trans):
self._print_validation_issue(
"4 or more dots in the string: {0}".format(line), warning=True)
return (lang.strip(), trans.strip())
def _populate_translations_by_langs(self):
for lang in self.all_langs:
trans_for_lang = {}
for key, tran in list(self.translations.items()): # (tran = dict<lang, translation>)
for key, tran in self.translations.items(): # (tran = dict<lang, translation>)
if lang not in tran:
continue
trans_for_lang[key] = tran[lang]
self.translations_by_language[lang] = trans_for_lang
def _find_duplicates(self):
for lang, tran in list(self.translations_by_language.items()):
trans_for_lang = [TransAndKey(x[1], x[0]) for x in list(tran.items())]
for lang, tran in self.translations_by_language.items():
trans_for_lang = [self.TransAndKey(
x[1], x[0]) for x in tran.items()]
trans_for_lang.sort(key=lambda x: x.translation)
prev_tran = TransAndKey("", "")
prev_tran = self.TransAndKey("", "")
possible_duplicates = set()
for curr_tran in trans_for_lang:
if curr_tran.translation == prev_tran.translation:
@ -157,35 +262,53 @@ class StringsTxt:
def _find_most_duplicated(self):
most_duplicated = defaultdict(int)
for trans_and_keys in list(self.duplicates.values()):
for trans_and_keys in self.duplicates.values():
for trans_and_key in trans_and_keys:
most_duplicated[trans_and_key.key] += 1
self.most_duplicated = sorted(list(most_duplicated.items()), key=lambda x: x[1], reverse=True)
self.most_duplicated = sorted(
most_duplicated.items(), key=lambda x: x[1], reverse=True)
def print_most_duplicated(self):
print(self._header("Most duplicated"))
self._print_header("Most duplicated")
print("Definitions with the most translations shared with other definitions:\n")
for pair in self.most_duplicated:
print("{}\t{}".format(pair[0], pair[1]))
def print_missing_translations(self):
print(self._header("Missing translations for languages:"))
print(self.all_langs)
def print_missing_translations(self, langs=None):
self._print_header("Untranslated definitions")
if not langs:
langs = sorted(self.langs)
all_translation_keys = set(self.translations.keys())
for lang in self.all_langs:
for lang in langs:
keys_for_lang = set(self.translations_by_language[lang].keys())
missing_keys = sorted(list(all_translation_keys - keys_for_lang))
print("{0}:\n{1}\n".format(lang, "\n".join(missing_keys)))
missing_keys = all_translation_keys - keys_for_lang
for plural_key in self.PLURAL_KEYS:
lang_plural = "{0}:{1}".format(lang, plural_key)
if lang_plural in self.translations_by_language:
missing_keys -= set(
self.translations_by_language[lang_plural].keys())
missing_keys = sorted(missing_keys)
print("Language: {0} ({1} missing)\n\t{2}\n".format(
lang, len(missing_keys), "\n\t".join(missing_keys)))
def write_formatted(self, target_file=None, languages=None):
def write_formatted(self, target_file=None, langs=None):
before_block = ""
langs = self._expand_plurals(langs) if langs else self.all_langs
en_langs = []
other_langs = []
for lang in langs:
if lang.startswith("en"):
en_langs.append(lang)
else:
other_langs.append(lang)
sorted_langs = sorted(en_langs) + sorted(other_langs)
if target_file is None:
target_file = self.strings_path
with open(target_file, "w") as outfile:
for key in self.keys_in_order:
# TODO: sort definitions and sections too?
if not key:
continue
if key in self.translations:
@ -200,30 +323,17 @@ class StringsTxt:
before_block = "\n"
if key in self.comments_and_tags:
for k, v in list(self.comments_and_tags[key].items()):
for k, v in self.comments_and_tags[key].items():
outfile.write(" {0} = {1}\n".format(k, v))
en_langs = []
for lang in self.all_langs:
if lang.startswith('en'):
en_langs.append(lang)
sorted_langs = sorted(en_langs) + sorted(self.all_langs - set(en_langs))
self._write_translations_for_langs(sorted_langs, tran, outfile, only_langs=languages)
self._write_translations_for_langs(sorted_langs, tran, outfile)
def _write_translations_for_langs(self, langs, tran, outfile, only_langs=None):
langs_to_write = []
if only_langs:
for lang in only_langs:
if lang in langs:
langs_to_write.append(lang)
else:
langs_to_write = langs
for lang in langs_to_write:
def _write_translations_for_langs(self, langs, tran, outfile):
for lang in langs:
if lang in tran:
outfile.write(" {0} = {1}\n".format(lang, tran[lang]))
outfile.write(" {0} = {1}\n".format(
lang, tran[lang].replace("...", "")
))
def _compare_blocks(self, key_1, key_2):
block_1 = self.translations[key_1]
@ -236,104 +346,256 @@ class StringsTxt:
if block_1[key] == block_2[key]:
common_elements += 1
return [x for x in [
(self._similarity_string(key_1, key_2), self._similarity_index(len(block_1), common_elements)),
(self._similarity_string(key_2, key_1), self._similarity_index(len(block_2), common_elements))
] if x[1] > SIMILARITY_THRESHOLD]
def _similarity_string(self, key_1, key_2):
return "{} -> {}".format(key_1, key_2)
def _similarity_index(self, total_number, number_from_other):
return 100.0 * number_from_other / total_number
sim_index = round(100 * 2 * common_elements /
(len(block_1) + len(block_2)))
if sim_index >= self.SIMILARITY_THRESHOLD:
return [("{} <-> {}".format(key_1, key_2), sim_index)]
return []
def _find_most_similar(self):
search_scope = [x for x in self.most_duplicated if x[1] > len(self.translations[x[0]]) / 10]
search_scope = [x for x in self.most_duplicated if x[1]
> len(self.translations[x[0]]) / 10]
for one, two in combinations(search_scope, 2):
self.similarity_indices.extend(self._compare_blocks(one[0], two[0]))
self.similarity_indices.extend(
self._compare_blocks(one[0], two[0]))
self.similarity_indices.sort(key=lambda x: x[1], reverse=True)
def print_most_similar(self):
print(self._header("Most similar blocks"))
self._print_header("Most similar definitions")
print("Definitions most similar to other definitions, i.e. with a lot of same translations:\n")
for index in self.similarity_indices:
print("{} : {}".format(index[0], index[1]))
print("{} : {}%".format(index[0], index[1]))
def _print_header(self, string):
# print headers in green colour
print("\n{line} \033[0;32m{str}\033[0m {line}\n".format(
line="=" * round((70 - len(string)) / 2),
str=string
))
def _header(self, string):
return "\n\n{line}\n{string}\n{line}\n".format(
line="=" * 80,
string=string
)
def _print_validation_issue(self, issue, warning=False):
if warning:
# print warnings in yellow colour
print("\033[0;33mWARNING: {0}\033[0m".format(issue))
return
self.validation_errors = True
# print errors in red colour
print("\033[0;31mERROR: {0}\033[0m".format(issue))
def _has_space_before_punctuation(self, lang, string):
if lang == "fr":
if lang == "fr": # make exception for French
return False
if SPACE_PUNCTUATION.search(string):
if self.SPACE_PUNCTUATION.search(string):
return True
return False
def print_strings_with_spaces_before_punctuation(self):
print(self._header("Strings with spaces before punctuation:"))
for key, lang_and_trans in list(self.translations.items()):
def print_strings_with_spaces_before_punctuation(self, langs=None):
self._print_header("Strings with spaces before punctuation")
langs = self._expand_plurals(langs) if langs else self.all_langs
for key, lang_and_trans in self.translations.items():
wrote_key = False
for lang, translation in list(lang_and_trans.items()):
if self._has_space_before_punctuation(lang, translation):
if not wrote_key:
print("\n{}".format(key))
wrote_key = True
print("{} : {}".format(lang, translation))
for lang, translation in lang_and_trans.items():
if lang in langs:
if self._has_space_before_punctuation(lang, translation):
if not wrote_key:
print("\n{}".format(key))
wrote_key = True
self._print_validation_issue(
"{0} : {1}".format(lang, translation), warning=True)
def _check_placeholders_in_block(self, block_key):
def _check_placeholders_in_block(self, block_key, langs):
wrong_placeholders_strings = []
key = self.translations[block_key].get("en")
if not key:
print("No english for key: {}".format(block_key))
print("Existing keys are: {}".format(",".join(list(self.translations[block_key].keys()))))
raise KeyError
en_lang = "en"
en_trans = self.translations[block_key].get(en_lang)
if not en_trans:
for plural_key in sorted(self.PLURAL_KEYS):
if en_trans:
break
en_lang = "en:{0}".format(plural_key)
en_trans = self.translations[block_key].get(en_lang)
if not en_trans:
self._print_validation_issue(
"No English for definition: {}".format(block_key))
return None, wrong_placeholders_strings
en_placeholders = sorted(PLACEHOLDERS.findall(key))
en_placeholders = sorted(self.PLACEHOLDERS.findall(en_trans))
for lang, translation in list(self.translations[block_key].items()):
if lang == "en":
continue
found = sorted(PLACEHOLDERS.findall(translation))
if not en_placeholders == found: #must be sorted
wrong_placeholders_strings.append("{} : {}".format(lang, translation))
for lang, translation in self.translations[block_key].items():
found = sorted(self.PLACEHOLDERS.findall(translation))
if not en_placeholders == found: # must be sorted
wrong_placeholders_strings.append(
"{} = {}".format(lang, translation))
return wrong_placeholders_strings
return en_lang, wrong_placeholders_strings
def print_strings_with_wrong_paceholders(self):
print(self._header("Strings with a wrong number of placeholders:"))
for key, lang_and_trans in list(self.translations.items()):
wrong_placeholders = self._check_placeholders_in_block(key)
def print_strings_with_wrong_placeholders(self, langs=None):
self._print_header("Strings with a wrong number of placeholders")
langs = self._expand_plurals(langs) if langs else self.all_langs
for key, lang_and_trans in self.translations.items():
en_lang, wrong_placeholders = self._check_placeholders_in_block(
key, langs)
if not wrong_placeholders:
continue
print("\n{0}".format(key))
print("English: {0}".format(lang_and_trans["en"]))
for string in wrong_placeholders:
print(string)
print("{0} = {1}".format(en_lang, lang_and_trans[en_lang]))
for wp in wrong_placeholders:
self._print_validation_issue(wp)
def validate(self, langs=None):
self._print_header("Validating the file...")
if self.validation_errors:
self._print_validation_issue(
"There were errors reading the file, check the output above")
self._print_header("Invalid languages")
self.print_invalid_languages()
self.print_strings_with_spaces_before_punctuation(langs=args.langs)
self.print_strings_with_wrong_placeholders(langs=args.langs)
return not self.validation_errors
def find_project_root():
my_path = abspath(__file__)
tools_index = my_path.rfind("/tools/python")
project_root = my_path[:tools_index]
return project_root
def get_args():
parser = ArgumentParser(
description="""
Validates and formats translation files (strings.txt, types_strings.txt),
prints file's statistics, finds duplicate and missing translations, etc."""
)
parser.add_argument(
"input",
nargs="?", default=None,
help="input file path, defaults to <organicmaps>/data/strings/strings.txt"
)
parser.add_argument(
"-t", "--types-strings",
action="store_true",
help="use <organicmaps>/data/strings/types_strings.txt as input file by default"
)
parser.add_argument(
"-o", "--output",
default=None, nargs="?", const=True,
help="""path to write formatted output file to with languages
sorted in alphabetic order except English translations going first
(overwrites the input file by default)"""
)
parser.add_argument(
"-l", "--languages",
dest="langs", default=None,
help="a comma-separated list of languages to limit output to, if applicable"
)
parser.add_argument(
"-pl", "--print-languages",
dest="print_langs",
action="store_true",
help="print languages statistics"
)
parser.add_argument(
"-pf", "--print-definitions",
dest="print_defs",
action="store_true",
help="print definitions stattistics"
)
parser.add_argument(
"-pd", "--print-duplicates",
dest="print_dups",
action="store_true",
help="print same translations used in several definitions"
)
parser.add_argument(
"-po", "--print-most-duplicated",
dest="print_mdups",
action="store_true",
help="""print definitions with the most translations shared
with other definitions"""
)
parser.add_argument(
"-ps", "--print-similar",
dest="print_similar",
action="store_true",
help="""print definitions most similar to other definitions,
i.e. with a lot of same translations"""
)
parser.add_argument(
"-pm", "--missing-translations",
dest="print_missing",
action="store_true",
help="print untranslated definitions"
)
parser.add_argument(
"-v", "--validate",
dest="validate",
action="store_true",
help="""validate file format, placeholders usage, whitespace
before punctuation, etc; exit with error if not valid"""
)
return parser.parse_args()
if __name__ == "__main__":
import sys
strings = StringsTxt(sys.argv[1] if len(sys.argv) > 1 else None)
args = get_args()
if not args.input:
args.input = StringsTxt.TYPES_STRINGS_TXT_PATH if args.types_strings else StringsTxt.STRINGS_TXT_PATH
args.input = "{0}/{1}".format(find_project_root(), args.input)
args.input = abspath(args.input)
print("Input file: {0}\n".format(args.input))
strings = StringsTxt(args.input)
strings.process_file()
strings.print_statistics()
strings.print_duplicates()
strings.print_most_duplicated()
strings.print_most_similar()
strings.print_missing_translations()
strings.write_formatted()
strings.print_strings_with_spaces_before_punctuation()
strings.print_strings_with_wrong_paceholders()
if args.langs:
args.langs = args.langs.split(",")
print("Limit output to languages:\n{0}\n".format(",".join(args.langs)))
if args.print_langs:
strings.print_languages_stats(langs=args.langs)
if args.print_defs:
strings.print_definitions_stats(langs=args.langs)
if args.print_dups:
strings.print_duplicates(langs=args.langs)
if args.print_mdups:
strings.print_most_duplicated()
if args.print_similar:
strings.print_most_similar()
if args.print_missing:
strings.print_missing_translations(langs=args.langs)
if args.validate:
if not strings.validate(langs=args.langs):
# print in red color
print("\n\033[0;31mThe file is not valid, terminating\033[0m")
sys.exit(1)
if args.output:
if args.output == True:
args.output = args.input
else:
args.output = abspath(args.output)
print("\nWriting formatted output file: {0}\n".format(args.output))
strings.write_formatted(target_file=args.output, langs=args.langs)