[crawler] Download full wikitravel images, not thumbnails.

This commit is contained in:
Yury Melnichek 2012-09-17 12:13:25 +02:00 committed by Alex Zolotarev
parent f8d90e92ce
commit f8b8a13a87
2 changed files with 7 additions and 1 deletions

View file

@ -0,0 +1,4 @@
#!/bin/bash
set -e -u -x
cat $1 | sed 's:/thumb\(/.*\)/[0-9][0-9]*px-.*$:\1:' | sort -u > $2

View file

@ -28,6 +28,8 @@ cat wikitravel-pages.json | python $MY_PATH/wikitravel-optimize-articles.py
$MY_PATH/extract-image-urls.sh wikitravel-images.urls
wget --wait=1 --no-clobber -i wikitravel-images.urls
$MY_PATH/normalize-image-urls.sh wikitravel-images.urls wikitravel-images-normalized.url
wget --wait=1 --random-wait --no-clobber -i wikitravel-images-normalized.urls
# TODO: Run publisher.