Fixed text references for missing images like File:Image.JPG

This commit is contained in:
Alexander Zolotarev 2013-08-12 18:55:01 +03:00
parent c8e55580de
commit f7392dc84b

View file

@ -22,7 +22,10 @@ def cleanUp(soup):
[s.decompose() for s in content.findAll("div", {"id": "mw-mf-language-section"})]
# cut off geo coords as we process them separately in original files
[s.decompose() for s in content.findAll("div", {"id": "geoCoord"})]
# cut off missing images (looks like text File:Image.JPG on pages)
for s in content.findAll("div", {"class": "thumb"}):
if (not s.find("img")):
s.decompose();
# delete empty sections
sections = content.findAll("div", {"class": "section"})