Fixed text references for missing images like File:Image.JPG
This commit is contained in:
parent
c8e55580de
commit
f7392dc84b
1 changed files with 4 additions and 1 deletions
|
@ -22,7 +22,10 @@ def cleanUp(soup):
|
|||
[s.decompose() for s in content.findAll("div", {"id": "mw-mf-language-section"})]
|
||||
# cut off geo coords as we process them separately in original files
|
||||
[s.decompose() for s in content.findAll("div", {"id": "geoCoord"})]
|
||||
|
||||
# cut off missing images (looks like text File:Image.JPG on pages)
|
||||
for s in content.findAll("div", {"class": "thumb"}):
|
||||
if (not s.find("img")):
|
||||
s.decompose();
|
||||
|
||||
# delete empty sections
|
||||
sections = content.findAll("div", {"class": "section"})
|
||||
|
|
Reference in a new issue