From 68c6da563fab01c13f2e9c8b16879da4f9b1f078 Mon Sep 17 00:00:00 2001 From: Konstantin Pastbin Date: Fri, 15 Apr 2022 10:55:38 +0300 Subject: [PATCH 1/2] [python] Updated get_version() to use version.sh Signed-off-by: Konstantin Pastbin --- pyhelpers/setup.py | 27 +++++---------------------- tools/python/descriptions/setup.py | 2 +- 2 files changed, 6 insertions(+), 23 deletions(-) diff --git a/pyhelpers/setup.py b/pyhelpers/setup.py index 1f5d696ac7..dea8cb36fb 100644 --- a/pyhelpers/setup.py +++ b/pyhelpers/setup.py @@ -5,6 +5,7 @@ import linecache import multiprocessing import os import re +import subprocess import sys from contextlib import contextmanager from distutils import log @@ -409,12 +410,6 @@ class BuildOmimBindingCommand(build_ext, object): self.cmake_pybindings() super(BuildOmimBindingCommand, self).run() - -VERSIONS_LOCATIONS = { - 'xcode/common.xcconfig': 'CURRENT_PROJECT_VERSION', - 'android/gradle.properties': 'propVersionName', -} - PYBINDINGS = { 'pygen': { 'path': 'generator/pygen', @@ -451,22 +446,10 @@ PYBINDINGS = { def get_version(): - versions = [] - for path, varname in VERSIONS_LOCATIONS.items(): - with open(os.path.join(OMIM_ROOT, os.path.normpath(path))) as f: - for line in f: - match = re.search( - r'^\s*{}\s*=\s*(?P.*)'.format(varname), - line.strip(), - ) - if match: - versions.append(LooseVersion(match.group('version'))) - break - code_version = max(versions) - - env_version_addendum = os.environ.get('OMIM_SCM_VERSION', '') - - return "{}{}".format(code_version, env_version_addendum) + return subprocess.check_output( + [os.path.join(OMIM_ROOT, 'tools', 'unix', 'version.sh'), 'android_code'], + universal_newlines=True, + ).strip(' \n\r') def transform_omim_requirement(requirement, omim_package_version): diff --git a/tools/python/descriptions/setup.py b/tools/python/descriptions/setup.py index 196d1117fb..78c4a6d9d3 100755 --- a/tools/python/descriptions/setup.py +++ b/tools/python/descriptions/setup.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import os import sys -- 2.45.3 From a540b379792c47771a8517ed73793676f57be206 Mon Sep 17 00:00:00 2001 From: Konstantin Pastbin Date: Sat, 16 Apr 2022 22:39:37 +0300 Subject: [PATCH 2/2] [generator] Download Wikipedia articles' summaries only Signed-off-by: Konstantin Pastbin --- tools/python/descriptions/__main__.py | 12 ++++++++++-- tools/python/descriptions/descriptions_downloader.py | 7 ++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/tools/python/descriptions/__main__.py b/tools/python/descriptions/__main__.py index f74a1fac72..f9d944d3f0 100644 --- a/tools/python/descriptions/__main__.py +++ b/tools/python/descriptions/__main__.py @@ -14,7 +14,11 @@ from descriptions.descriptions_downloader import log def parse_args(): parser = argparse.ArgumentParser(description="Download wiki pages.") parser.add_argument( - "--output_dir", metavar="PATH", type=str, help="Output dir for saving pages" + "--output_dir", + metavar="PATH", + type=str, + required=True, + help="Output dir for saving pages", ) parser.add_argument( "--popularity", @@ -32,7 +36,10 @@ def parse_args(): help="Input file with wikipedia url.", ) parser.add_argument( - "--wikidata", metavar="PATH", type=str, help="Input file with wikidata ids." + "--wikidata", + metavar="PATH", + type=str, + help="Input file with wikidata ids.", ) parser.add_argument( "--langs", @@ -47,6 +54,7 @@ def parse_args(): def main(): + logging.basicConfig(level=logging.WARNING) log.setLevel(logging.WARNING) wikipediaapi.log.setLevel(logging.WARNING) args = parse_args() diff --git a/tools/python/descriptions/descriptions_downloader.py b/tools/python/descriptions/descriptions_downloader.py index 7ffe0a3609..3241cabb1d 100644 --- a/tools/python/descriptions/descriptions_downloader.py +++ b/tools/python/descriptions/descriptions_downloader.py @@ -138,7 +138,7 @@ def beautify_page(html, lang): for x in soup.find_all(): if len(x.text.strip()) == 0: x.extract() - soup = remove_bad_sections(soup, lang) + # soup = remove_bad_sections(soup, lang) html = str(soup.prettify()) html = htmlmin.minify(html, remove_empty_space=True) return html @@ -181,7 +181,8 @@ def download(directory, url): return None page = get_wiki_page(lang, page_name) try: - text = try_get(page, "text") + # text = try_get(page, "text") + text = try_get(page, "summary") except GettingError: log.exception(f"Error: page is not downloaded {page_name}.") return None @@ -236,7 +237,7 @@ def wikipedia_worker(output_dir, checker, langs): if not checker(ident): return url = url.strip() - except (AttributeError, IndexError): + except (AttributeError, IndexError, ValueError): log.exception(f"{line} is incorrect.") return parsed = urllib.parse.urlparse(url) -- 2.45.3