Compare commits

...
Sign in to create a new pull request.

2 commits

Author SHA1 Message Date
a540b37979 [generator] Download Wikipedia articles' summaries only
Signed-off-by: Konstantin Pastbin <konstantin.pastbin@gmail.com>
2022-04-16 22:39:37 +03:00
68c6da563f [python] Updated get_version() to use version.sh
Signed-off-by: Konstantin Pastbin <konstantin.pastbin@gmail.com>
2022-04-15 10:55:38 +03:00
4 changed files with 20 additions and 28 deletions

View file

@ -5,6 +5,7 @@ import linecache
import multiprocessing
import os
import re
import subprocess
import sys
from contextlib import contextmanager
from distutils import log
@ -409,12 +410,6 @@ class BuildOmimBindingCommand(build_ext, object):
self.cmake_pybindings()
super(BuildOmimBindingCommand, self).run()
VERSIONS_LOCATIONS = {
'xcode/common.xcconfig': 'CURRENT_PROJECT_VERSION',
'android/gradle.properties': 'propVersionName',
}
PYBINDINGS = {
'pygen': {
'path': 'generator/pygen',
@ -451,22 +446,10 @@ PYBINDINGS = {
def get_version():
versions = []
for path, varname in VERSIONS_LOCATIONS.items():
with open(os.path.join(OMIM_ROOT, os.path.normpath(path))) as f:
for line in f:
match = re.search(
r'^\s*{}\s*=\s*(?P<version>.*)'.format(varname),
line.strip(),
)
if match:
versions.append(LooseVersion(match.group('version')))
break
code_version = max(versions)
env_version_addendum = os.environ.get('OMIM_SCM_VERSION', '')
return "{}{}".format(code_version, env_version_addendum)
return subprocess.check_output(
[os.path.join(OMIM_ROOT, 'tools', 'unix', 'version.sh'), 'android_code'],
universal_newlines=True,
).strip(' \n\r')
def transform_omim_requirement(requirement, omim_package_version):

View file

@ -14,7 +14,11 @@ from descriptions.descriptions_downloader import log
def parse_args():
parser = argparse.ArgumentParser(description="Download wiki pages.")
parser.add_argument(
"--output_dir", metavar="PATH", type=str, help="Output dir for saving pages"
"--output_dir",
metavar="PATH",
type=str,
required=True,
help="Output dir for saving pages",
)
parser.add_argument(
"--popularity",
@ -32,7 +36,10 @@ def parse_args():
help="Input file with wikipedia url.",
)
parser.add_argument(
"--wikidata", metavar="PATH", type=str, help="Input file with wikidata ids."
"--wikidata",
metavar="PATH",
type=str,
help="Input file with wikidata ids.",
)
parser.add_argument(
"--langs",
@ -47,6 +54,7 @@ def parse_args():
def main():
logging.basicConfig(level=logging.WARNING)
log.setLevel(logging.WARNING)
wikipediaapi.log.setLevel(logging.WARNING)
args = parse_args()

View file

@ -138,7 +138,7 @@ def beautify_page(html, lang):
for x in soup.find_all():
if len(x.text.strip()) == 0:
x.extract()
soup = remove_bad_sections(soup, lang)
# soup = remove_bad_sections(soup, lang)
html = str(soup.prettify())
html = htmlmin.minify(html, remove_empty_space=True)
return html
@ -181,7 +181,8 @@ def download(directory, url):
return None
page = get_wiki_page(lang, page_name)
try:
text = try_get(page, "text")
# text = try_get(page, "text")
text = try_get(page, "summary")
except GettingError:
log.exception(f"Error: page is not downloaded {page_name}.")
return None
@ -236,7 +237,7 @@ def wikipedia_worker(output_dir, checker, langs):
if not checker(ident):
return
url = url.strip()
except (AttributeError, IndexError):
except (AttributeError, IndexError, ValueError):
log.exception(f"{line} is incorrect.")
return
parsed = urllib.parse.urlparse(url)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
import os
import sys