[build] remove edit-page tag from articles.

This commit is contained in:
Dmitry Kunin 2013-10-04 17:37:04 +02:00
parent 662d1f0b1b
commit 75192514b2
2 changed files with 3 additions and 1 deletions

View file

@ -21,6 +21,7 @@ def cleanUp(soup):
[s.decompose() for s in content.findAll("a", {"id": "mw-mf-last-modified"})]
[s.decompose() for s in content.findAll("span", {"class": "mw-editsection"})]
[s.decompose() for s in content.findAll("a", {"class": "edit-page"})]
[s.decompose() for s in content.findAll("table", {"class": "articleState"})]
[s.decompose() for s in content.findAll("button", {"class": "languageSelector"})]
[s.decompose() for s in content.findAll("a", {"class": "section_anchors"})]

View file

@ -12,11 +12,12 @@ DUMP_FILES = page.sql.gz redirect.sql.gz category.sql.gz page_props.sql.gz image
.PHONY: all
all: download_images rename_articles countries.txt geocodes.txt process_html
all: download_images rename_articles countries.txt geocodes.txt process_html make_obb make_apk
.PHONY: clean
clean:
rm *.sql.gz || true
rm -r Countries
$(DUMP_FILES):
wget $(DUMP_URL_PREFIX)"-"$@ -O $@