commit
0d4b1ba779
5 changed files with 254 additions and 176 deletions
25
builder/htmlprocessor/convert_img.sh
Normal file
25
builder/htmlprocessor/convert_img.sh
Normal file
|
@ -0,0 +1,25 @@
|
|||
#!/bin/bash
|
||||
|
||||
if test "$1" == "" ; then
|
||||
echo "USAGE: $0 [thumbsInDir] [imagesInDir] [outDir]"
|
||||
exit
|
||||
fi
|
||||
|
||||
outdir = $3
|
||||
|
||||
mkdir -p $3
|
||||
|
||||
mkdir -p $3/thumb
|
||||
pushd $1
|
||||
for i in *.png; do convert $i -auto-orient -quality 53 -thumbnail '256x256>' $outdir/thumb/$(basename -s.png $i).jpg; echo $i; done
|
||||
for i in *.svg; do convert $i -auto-orient -quality 53 -thumbnail '256x256>' $outdir/thumb/$(basename -s.svg $i).jpg; echo $i; done
|
||||
for i in *.jpg *.JPG *.jpeg; do convert -define jpeg:size=400x280 $i -auto-orient -quality 53 -thumbnail '500x280>' -strip -liquid-rescale '256x256!>' $outdir/thumb/$i; echo $i; done
|
||||
popd
|
||||
|
||||
mkdir -p $3/images
|
||||
pushd $2
|
||||
for i in *.jpg; do convert $i -auto-orient -quality 53 -strip -thumbnail '1536x1536>' $outdir/images/$i; echo $i; done
|
||||
for i in *.png; do convert $i -auto-orient -quality 99 -strip -thumbnail '4000x3000>' PNG8:$outdir/images/$i; echo $i; done
|
||||
cp *.svg $outdir/images/
|
||||
popd
|
||||
|
|
@ -4,7 +4,10 @@ import os
|
|||
import urllib
|
||||
import shutil
|
||||
import unicodedata
|
||||
from bs4 import BeautifulSoup
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
import BeautifulSoup
|
||||
|
||||
reload(sys)
|
||||
sys.setdefaultencoding('utf-8')
|
||||
|
@ -13,166 +16,184 @@ from strip_function import cleanUp
|
|||
|
||||
|
||||
def insertMapLink(soup, lat, lon, title, pageId):
|
||||
hrefLink = "mapswithme://map?v=1&ll=%s,%s&n=%s&id=%s&backurl=guideswithme&appname=Guides%%20With%%20Me"%(lat, lon, urllib.quote(title), pageId)
|
||||
mapTag = BeautifulSoup().new_tag("a", href=hrefLink)
|
||||
mapTag["class"] = "geolink";
|
||||
soup.body.insert(0, mapTag)
|
||||
hrefLink = "mapswithme://map?v=1&ll=%s,%s&n=%s&id=%s&backurl=guideswithme&appname=Guides%%20With%%20Me" % (lat, lon, urllib.quote(title), pageId)
|
||||
mapTag = BeautifulSoup().new_tag("a", href=hrefLink)
|
||||
mapTag["class"] = "geolink"
|
||||
soup.body.insert(0, mapTag)
|
||||
|
||||
|
||||
def insertArticleTitle(soup, articleTitle):
|
||||
titleTag = BeautifulSoup().new_tag("div")
|
||||
titleTag["class"] = "articleHeader"
|
||||
titleTag.append(articleTitle)
|
||||
soup.body.insert(0, titleTag)
|
||||
titleTag = BeautifulSoup().new_tag("div")
|
||||
titleTag["class"] = "articleHeader"
|
||||
titleTag.append(articleTitle)
|
||||
soup.body.insert(0, titleTag)
|
||||
|
||||
|
||||
def insertArticleImage(soup, imagePath):
|
||||
imgTag = BeautifulSoup().new_tag("img", src=imagePath)
|
||||
imgTag["class"] = "articleImage"
|
||||
soup.body.insert(0, imgTag)
|
||||
imgTag = BeautifulSoup().new_tag("img", style="background-image: url('%s')" % imagePath, id="articleImage")
|
||||
imgTag["class"] = "articleImage"
|
||||
soup.body.insert(0, imgTag)
|
||||
|
||||
|
||||
def insertBreadcrumb(soup, articleTitle, parentTitle, parentLink, grandParentTitle, grandParentLink):
|
||||
tagFactory = BeautifulSoup()
|
||||
tagFactory = BeautifulSoup()
|
||||
|
||||
bcWrapper = tagFactory.new_tag("div")
|
||||
bcWrapper["class"] = "breadcrumbs_wrapper"
|
||||
if (grandParentTitle):
|
||||
grandParentTag = tagFactory.new_tag("a", href=grandParentLink)
|
||||
grandParentTag["class"] = "breadcrumb bc1"
|
||||
grandParentTag.append(grandParentTitle)
|
||||
bcWrapper.append(grandParentTag)
|
||||
if (parentTitle):
|
||||
parentTag = tagFactory.new_tag("a", href=parentLink)
|
||||
parentTag["class"] = "breadcrumb bc2"
|
||||
parentTag.append(parentTitle)
|
||||
bcWrapper.append(parentTag)
|
||||
currTag = tagFactory.new_tag("span")
|
||||
currTag["class"] = "breadcrumb bc3"
|
||||
currTag.append(articleTitle)
|
||||
bcWrapper.append(currTag)
|
||||
soup.body.insert(0, bcWrapper)
|
||||
bcWrapper = tagFactory.new_tag("div")
|
||||
bcWrapper["class"] = "breadcrumbs_wrapper"
|
||||
if (grandParentTitle):
|
||||
grandParentTag = tagFactory.new_tag("a", href=grandParentLink)
|
||||
grandParentTag["class"] = "breadcrumb bc1"
|
||||
grandParentTag.append(grandParentTitle)
|
||||
bcWrapper.append(grandParentTag)
|
||||
if (parentTitle):
|
||||
parentTag = tagFactory.new_tag("a", href=parentLink)
|
||||
parentTag["class"] = "breadcrumb bc2"
|
||||
parentTag.append(parentTitle)
|
||||
bcWrapper.append(parentTag)
|
||||
currTag = tagFactory.new_tag("span")
|
||||
currTag["class"] = "breadcrumb bc3"
|
||||
currTag.append(articleTitle)
|
||||
bcWrapper.append(currTag)
|
||||
soup.body.insert(0, bcWrapper)
|
||||
|
||||
|
||||
|
||||
def transformStringWithEncoding(str):
|
||||
return urllib.unquote(str.decode("latin-1").encode("utf-8"))
|
||||
def transformStringWithEncoding(string):
|
||||
return urllib.unquote(string.decode("latin-1").encode("utf-8"))
|
||||
|
||||
|
||||
def transformString(s):
|
||||
unquoted = urllib.unquote(str(s));
|
||||
for i in u"\"',/\\@#$%^&*()!~`«»":
|
||||
unquoted = unquoted.replace(i, "_")
|
||||
return unicode(unquoted.strip("_"))
|
||||
|
||||
def formatToNFKD(s):
|
||||
return unicodedata.normalize("NFKD", transformString(s))
|
||||
|
||||
def unicodeNormalize(s):
|
||||
return (u"".join( x for x in formatToNFKD(s) if not unicodedata.category(x).startswith("M"))).lower()
|
||||
def sanitizeFileName(s):
|
||||
# unquote %20 and like that
|
||||
s = urllib.unquote(str(s))
|
||||
|
||||
def imageExist(fileName):
|
||||
global imageFiles
|
||||
global imageSet
|
||||
unquotedName = unicodeNormalize(fileName)
|
||||
if unquotedName in imageFiles:
|
||||
imageSet.add(unquotedName)
|
||||
return True
|
||||
# remove punctuation
|
||||
for i in u"\"',/\\@#$%^&*()!~`«»":
|
||||
s = s.replace(i, "_")
|
||||
s = s.strip("_")
|
||||
|
||||
return False
|
||||
# normalize unicode to NFKD (form with modifiers separated)
|
||||
s = unicodedata.normalize("NFKD", s)
|
||||
|
||||
# drop modifiers from string (no diacritics)
|
||||
s = u"".join(x for x in s if not unicodedata.category(x).startswith("M"))
|
||||
|
||||
# lowercase
|
||||
s = s.lower()
|
||||
return s
|
||||
|
||||
|
||||
def imageSanitizedPath(fileName):
|
||||
"""
|
||||
return path to image file if it's in dataset
|
||||
"""
|
||||
global imageFiles
|
||||
global imageSet
|
||||
unquotedName = sanitizeFileName(fileName)
|
||||
if unquotedName in imageFiles:
|
||||
imageSet.add(unquotedName)
|
||||
return 'images/' + unquotedName
|
||||
|
||||
|
||||
def rewriteImages(soup):
|
||||
imgTag = soup.findAll("img");
|
||||
imgTag = soup.findAll("img")
|
||||
|
||||
for imgElement in imgTag:
|
||||
del imgElement["alt"]
|
||||
#todo rewrite srcset attr if we can get callback on image loading in webview
|
||||
del imgElement["srcset"]
|
||||
for imgElement in imgTag:
|
||||
del imgElement["alt"]
|
||||
# todo rewrite srcset attr if we can get callback on image loading in webview
|
||||
del imgElement["srcset"]
|
||||
|
||||
index = -1
|
||||
splitSrc = imgElement["src"].split("/")
|
||||
splitSrc.reverse()
|
||||
# checking just two last elements (preview name, real name)
|
||||
for fileName in splitSrc[:2]:
|
||||
fileName = imageSanitizedPath(fileName)
|
||||
if fileName:
|
||||
imgElement["src"] = fileName
|
||||
break
|
||||
else:
|
||||
print "Stripping image", imgElement["src"]
|
||||
[s.decompose() for s in imgElement.fetchParents("div", {"class": ["thumb tright", "thumbinner", "image"]})]
|
||||
|
||||
index = -1
|
||||
srcPath = imgElement["src"]
|
||||
splitedSrc = srcPath.split("/")
|
||||
if imageExist(splitedSrc[-1]):
|
||||
imgElement['src'] = "images/" + unicodeNormalize(splitedSrc[-1])
|
||||
elif imageExist(splitedSrc[-2]):
|
||||
imgElement['src'] = "images/" + unicodeNormalize(splitedSrc[-2])
|
||||
else:
|
||||
print "Image strip = " + unicodeNormalize(splitedSrc[-2])
|
||||
[s.decompose() for s in imgElement.fetchParents("div", {"class" : ["thumb tright", "thumbinner", "image"]})]
|
||||
|
||||
def rewriteCrossLinks(soup):
|
||||
global idMapping
|
||||
global redirectMapping
|
||||
links = soup.findAll("a")
|
||||
global idMapping
|
||||
global redirectMapping
|
||||
links = soup.findAll("a")
|
||||
|
||||
for link in links:
|
||||
destTitle = link["href"].split("/",2)[-1]
|
||||
destTitle = transformStringWithEncoding(destTitle)
|
||||
destTitle = redirectMapping.get(destTitle, destTitle);
|
||||
for link in links:
|
||||
destTitle = link["href"].split("/", 2)[-1]
|
||||
destTitle = transformStringWithEncoding(destTitle)
|
||||
destTitle = redirectMapping.get(destTitle, destTitle)
|
||||
|
||||
if destTitle in idMapping:
|
||||
link["href"] = idMapping.get(destTitle, link["href"]) + ".html"
|
||||
continue
|
||||
if destTitle in idMapping:
|
||||
link["href"] = idMapping.get(destTitle, link["href"]) + ".html"
|
||||
continue
|
||||
|
||||
if "/wiki/File:" in link["href"] and "http" not in link["href"] and "www" not in link["href"]:
|
||||
imgElement = link.find("img")
|
||||
if imgElement:
|
||||
link["href"] = imgElement["src"]
|
||||
continue
|
||||
if "/wiki/File:" in link["href"] and "http" not in link["href"] and "www" not in link["href"]:
|
||||
imgElement = link.find("img")
|
||||
if imgElement:
|
||||
link["href"] = imgElement["src"]
|
||||
continue
|
||||
|
||||
if "/wiki/" in link["href"]:
|
||||
if link.string:
|
||||
link.replace_with(link.string)
|
||||
else:
|
||||
link.replace_with("")
|
||||
|
||||
if "/wiki/" in link["href"]:
|
||||
if link.string:
|
||||
link.replace_with(link.string)
|
||||
else:
|
||||
link.replace_with("")
|
||||
|
||||
def writeHtml(content, fileName):
|
||||
global outDir
|
||||
open(os.path.join(outDir, fileName + ".html"), "w").write(content.encode('utf-8'))
|
||||
global outDir
|
||||
open(os.path.join(outDir, fileName + ".html"), "w").write(content.encode('utf-8'))
|
||||
|
||||
|
||||
def fixTitle(title):
|
||||
return title.split('/')[-1].replace('_', ' ')
|
||||
return title.split('/')[-1].replace('_', ' ')
|
||||
|
||||
##############################################################################
|
||||
if len(sys.argv) < 9:
|
||||
print "Usage: " + sys.argv[0] + " <directory with html articles> <images directory> <article set info file> <redirect info file> <geocoords file> <output directory> <threadIndex> <cpu core count>"
|
||||
exit(1)
|
||||
print "Usage: " + sys.argv[0] + " <directory with html articles> <images directory> <article set info file> <redirect info file> <geocoords file> <output directory> <threadIndex> <cpu core count>"
|
||||
exit(1)
|
||||
|
||||
inDir = sys.argv[1]
|
||||
|
||||
imagesSrcDir = sys.argv[2]
|
||||
imageFiles = dict([(unicodeNormalize(file), file) for file in os.listdir(imagesSrcDir)])
|
||||
idMapping = dict([(unicode(i.split("\t")[1]), unicode(i.split("\t")[0])) for i in open(sys.argv[3])])
|
||||
imageFiles = dict([(sanitizeFileName(file), file) for file in os.listdir(imagesSrcDir)])
|
||||
|
||||
idMappingFile = sys.argv[3]
|
||||
idMapping = dict([(unicode(i.split("\t")[1]), unicode(i.split("\t")[0])) for i in open(idMappingFile)])
|
||||
|
||||
articleImages = dict([(i.split("\t")[0], i.strip().split("\t")[3]) for i in open(sys.argv[3])])
|
||||
|
||||
# pageId => [parentId, parentTitle, grandParentId, grandParentTitle], ids and titles can be "NULL"
|
||||
ancestors = dict([(i.split("\t")[0], i.strip().split("\t")[4:8]) for i in open(sys.argv[3])])
|
||||
|
||||
redirectMapping = dict([(unicode(line.split("\t")[1]), unicode(line.split("\t")[3].strip())) for line in open(sys.argv[4])])
|
||||
|
||||
coords = dict([(line.split("\t")[0], (line.split("\t")[1], line.split("\t")[2])) for line in open(sys.argv[5])])
|
||||
pageIdToTitle = {}
|
||||
for key, value in idMapping.iteritems():
|
||||
if value in coords:
|
||||
pageIdToTitle[value] = fixTitle(str(key))
|
||||
|
||||
pageIdToTitle = {v: fixTitle(str(k)) for k, v in idMapping.iteritems()}
|
||||
|
||||
outDir = sys.argv[6]
|
||||
threadIndex = int(sys.argv[7])
|
||||
coreCount = int(sys.argv[8])
|
||||
files = [urllib.unquote(file) for file in idMapping.values()]
|
||||
thisFiles = files[threadIndex * len(files) / coreCount : (threadIndex + 1) * len(files) / coreCount]
|
||||
|
||||
thisFiles = files[threadIndex * len(files) / coreCount: (threadIndex + 1) * len(files) / coreCount]
|
||||
imageSet = set()
|
||||
|
||||
# preload coords
|
||||
|
||||
if not os.path.exists(outDir):
|
||||
os.makedirs(outDir)
|
||||
os.makedirs(outDir)
|
||||
|
||||
for file in thisFiles:
|
||||
soup = BeautifulSoup(open(os.path.join(inDir, file)))
|
||||
soup = cleanUp(soup)
|
||||
rewriteImages(soup)
|
||||
rewriteCrossLinks(soup)
|
||||
# insert article "header" - image with breadcrumbs and map link
|
||||
if file in coords:
|
||||
soup = BeautifulSoup(open(os.path.join(inDir, file)))
|
||||
soup = cleanUp(soup)
|
||||
rewriteImages(soup)
|
||||
rewriteCrossLinks(soup)
|
||||
|
||||
articleTitle = pageIdToTitle[file]
|
||||
|
||||
insertMapLink(soup, coords[file][0], coords[file][1], articleTitle, file)
|
||||
if file in coords:
|
||||
insertMapLink(soup, coords[file][0], coords[file][1], articleTitle, file)
|
||||
|
||||
insertArticleTitle(soup, articleTitle)
|
||||
|
||||
|
@ -182,13 +203,25 @@ for file in thisFiles:
|
|||
grandParentLink = ancestors[file][2] + ".html" if ancestors[file][2] != "NULL" else False
|
||||
insertBreadcrumb(soup, articleTitle, parentTitle, parentLink, grandParentTitle, grandParentLink)
|
||||
|
||||
insertArticleImage(soup, "header_images/" + file + ".jpg")
|
||||
articleImage = imageSanitizedPath(articleImages[file])
|
||||
if articleImage:
|
||||
insertArticleImage(soup, articleImage)
|
||||
else:
|
||||
print "article image not found:", articleImages[file]
|
||||
|
||||
writeHtml(soup, file)
|
||||
writeHtml(soup, file)
|
||||
|
||||
imagesDstDir = os.path.join(outDir, "images")
|
||||
imagesDstDir = os.path.join(outDir, "images_fullsize")
|
||||
if not os.path.exists(imagesDstDir):
|
||||
os.makedirs(imagesDstDir)
|
||||
os.makedirs(imagesDstDir)
|
||||
|
||||
for image in imageSet:
|
||||
shutil.copy2(os.path.join(imagesSrcDir, imageFiles[image]), os.path.join(imagesDstDir, image))
|
||||
shutil.copy2(os.path.join(imagesSrcDir, imageFiles[image]), os.path.join(imagesDstDir, image))
|
||||
|
||||
thumbsDstDir = os.path.join(outDir, "thumb_fullsize")
|
||||
if not os.path.exists(thumbsDstDir):
|
||||
os.makedirs(thumbsDstDir)
|
||||
|
||||
for k, v in articleImages.iteritems():
|
||||
if k in thisFiles and sanitizeFileName(v) in imageFiles:
|
||||
shutil.copy2(os.path.join(imagesSrcDir, imageFiles[sanitizeFileName(v)]), os.path.join(thumbsDstDir, k + ".jpg"))
|
||||
|
|
|
@ -3,76 +3,81 @@ import sys
|
|||
import os
|
||||
import urllib
|
||||
import shutil
|
||||
from bs4 import BeautifulSoup
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
import BeautifulSoup
|
||||
|
||||
|
||||
reload(sys)
|
||||
sys.setdefaultencoding('utf-8')
|
||||
|
||||
|
||||
def cleanUp(soup):
|
||||
content = soup.find("div", {"id": "content"})
|
||||
content = soup.find("div", {"id": "content"})
|
||||
|
||||
# remove all specified tags
|
||||
[s.decompose() for s in content(['noscript'])]
|
||||
# remove all specified tags
|
||||
[s.decompose() for s in content(['noscript'])]
|
||||
|
||||
[s.decompose() for s in content.findAll("a", {"id": "mw-mf-last-modified"})]
|
||||
[s.decompose() for s in content.findAll("span", {"class": "mw-editsection"})]
|
||||
[s.decompose() for s in content.findAll("table", {"class": "articleState"})]
|
||||
[s.decompose() for s in content.findAll("button", {"class": "languageSelector"})]
|
||||
[s.decompose() for s in content.findAll("a", {"class": "section_anchors"})]
|
||||
[s.decompose() for s in content.findAll("div", {"id": "mw-mf-language-section"})]
|
||||
# cut off geo coords as we process them separately in original files
|
||||
[s.decompose() for s in content.findAll("div", {"id": "geoCoord"})]
|
||||
# cut off missing images (looks like text File:Image.JPG on pages)
|
||||
for s in content.findAll("div", {"class": "thumb"}):
|
||||
if (not s.find("img")):
|
||||
s.decompose();
|
||||
[s.decompose() for s in content.findAll("a", {"id": "mw-mf-last-modified"})]
|
||||
[s.decompose() for s in content.findAll("span", {"class": "mw-editsection"})]
|
||||
[s.decompose() for s in content.findAll("table", {"class": "articleState"})]
|
||||
[s.decompose() for s in content.findAll("button", {"class": "languageSelector"})]
|
||||
[s.decompose() for s in content.findAll("a", {"class": "section_anchors"})]
|
||||
[s.decompose() for s in content.findAll("div", {"id": "mw-mf-language-section"})]
|
||||
# cut off geo coords as we process them separately in original files
|
||||
[s.decompose() for s in content.findAll("div", {"id": "geoCoord"})]
|
||||
# cut off missing images (looks like text File:Image.JPG on pages)
|
||||
for s in content.findAll("div", {"class": "thumb"}):
|
||||
if (not s.find("img")):
|
||||
s.decompose()
|
||||
|
||||
# delete empty sections
|
||||
sections = content.findAll("div", {"class": "section"})
|
||||
for section in sections:
|
||||
hasText = 0
|
||||
for string in section.div.stripped_strings:
|
||||
hasText += 1
|
||||
if not hasText:
|
||||
section.decompose()
|
||||
# delete empty sections
|
||||
sections = content.findAll("div", {"class": "section"})
|
||||
for section in sections:
|
||||
hasText = 0
|
||||
for string in section.div.stripped_strings:
|
||||
hasText += 1
|
||||
if not hasText:
|
||||
section.decompose()
|
||||
|
||||
# Wrap content with our own header and body, and restore original div structure for css
|
||||
divContentWrapper = soup.new_tag("div", id="content_wrapper")
|
||||
divContentWrapper["class"] = "show"
|
||||
content = content.wrap(divContentWrapper)
|
||||
content = content.wrap(soup.new_tag("div", id="mw-mf-page-center"))
|
||||
content = content.wrap(soup.new_tag("div", id="mw-mf-viewport"))
|
||||
bodyTag = soup.new_tag("body")
|
||||
bodyTag["class"] = "mediawiki ltr sitedir-ltr mobile stable skin-mobile action-view"
|
||||
content = content.wrap(bodyTag)
|
||||
htmlTag = soup.new_tag("html", lang="en", dir="ltr")
|
||||
htmlTag["class"] = "client-js"
|
||||
content = content.wrap(htmlTag)
|
||||
# Here we add our own js and css into the <head>
|
||||
headTag = soup.new_tag("head")
|
||||
headTag.append(soup.new_tag("meta", charset="UTF-8"))
|
||||
headTag.append(soup.new_tag("link", rel="stylesheet", type="text/css", href="css/article.css"))
|
||||
headTag.append(soup.new_tag("script", type="text/javascript", src="js/article.js"))
|
||||
meta1 = soup.new_tag("meta", content="yes")
|
||||
# workaround as "name" is used in python
|
||||
meta1["name"] = "apple-mobile-web-app-capable"
|
||||
headTag.append(meta1)
|
||||
meta2 = soup.new_tag("meta", content="initial-scale=1.0, user-scalable=yes, minimum-scale=0.25, maximum-scale=1.6")
|
||||
meta2["name"] = "viewport"
|
||||
headTag.append(meta2)
|
||||
content.body.insert_before(headTag)
|
||||
# Wrap content with our own header and body, and restore original div structure for css
|
||||
divContentWrapper = soup.new_tag("div", id="content_wrapper")
|
||||
divContentWrapper["class"] = "show"
|
||||
content = content.wrap(divContentWrapper)
|
||||
content = content.wrap(soup.new_tag("div", id="mw-mf-page-center"))
|
||||
content = content.wrap(soup.new_tag("div", id="mw-mf-viewport"))
|
||||
bodyTag = soup.new_tag("body")
|
||||
bodyTag["class"] = "mediawiki ltr sitedir-ltr mobile stable skin-mobile action-view"
|
||||
content = content.wrap(bodyTag)
|
||||
htmlTag = soup.new_tag("html", lang="en", dir="ltr")
|
||||
htmlTag["class"] = "client-js"
|
||||
content = content.wrap(htmlTag)
|
||||
# Here we add our own js and css into the <head>
|
||||
headTag = soup.new_tag("head")
|
||||
headTag.append(soup.new_tag("meta", charset="UTF-8"))
|
||||
headTag.append(soup.new_tag("link", rel="stylesheet", type="text/css", href="css/article.css"))
|
||||
headTag.append(soup.new_tag("script", type="text/javascript", src="js/article.js"))
|
||||
meta1 = soup.new_tag("meta", content="yes")
|
||||
# workaround as "name" is used in python
|
||||
meta1["name"] = "apple-mobile-web-app-capable"
|
||||
headTag.append(meta1)
|
||||
meta2 = soup.new_tag("meta", content="initial-scale=1.0, user-scalable=yes, minimum-scale=0.25, maximum-scale=1.6")
|
||||
meta2["name"] = "viewport"
|
||||
headTag.append(meta2)
|
||||
content.body.insert_before(headTag)
|
||||
|
||||
return content
|
||||
return content
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) < 2:
|
||||
print "Usage: " + sys.argv[0] + " <inFile> [outFile]"
|
||||
exit(1)
|
||||
if len(sys.argv) < 2:
|
||||
print "Usage: " + sys.argv[0] + " <inFile> [outFile]"
|
||||
exit(1)
|
||||
|
||||
file = sys.argv[1]
|
||||
soup = BeautifulSoup(open(file))
|
||||
soup = cleanUp(soup)
|
||||
file = sys.stdout
|
||||
if len(sys.argv) > 2:
|
||||
file = open(sys.argv[2], 'w')
|
||||
file.write(soup.encode('utf-8'))
|
||||
file = sys.argv[1]
|
||||
soup = BeautifulSoup(open(file))
|
||||
soup = cleanUp(soup)
|
||||
file = sys.stdout
|
||||
if len(sys.argv) > 2:
|
||||
file = open(sys.argv[2], 'w')
|
||||
file.write(soup.encode('utf-8'))
|
||||
|
|
|
@ -22,7 +22,10 @@
|
|||
|
||||
.articleImage {
|
||||
width: 100%;
|
||||
height: auto;
|
||||
height: 256px;
|
||||
background-position: center center;
|
||||
background-repeat: no-repeat;
|
||||
background-size: cover;
|
||||
}
|
||||
|
||||
.breadcrumbs_wrapper {
|
||||
|
|
|
@ -52,6 +52,18 @@ function onPageLoaded() {
|
|||
[].forEach.call(sections, function(section) {
|
||||
addListener(section, 'click', onSectionClick);
|
||||
});
|
||||
|
||||
var img = new Image();
|
||||
img.onload = function() {
|
||||
if (document.getElementById('articleImage').offsetWidth > this.width ) {
|
||||
var rad = document.getElementById('articleImage').offsetWidth / this.width;
|
||||
document.getElementById('articleImage').style.webkitFilter = "blur(" + rad + "px)";
|
||||
rad *= 2;
|
||||
document.getElementById('articleImage').style.margin = "-" + rad + "px -" + rad + "px -" + rad + "px -" + rad + "px";
|
||||
};
|
||||
}
|
||||
img.src = document.getElementById('articleImage').style.backgroundImage.replace(/url\((['"])?(.*?)\1\)/gi, '$2').split(',')[0];
|
||||
|
||||
}
|
||||
|
||||
window.onload = onPageLoaded;
|
||||
|
|
Reference in a new issue