forked from organicmaps/organicmaps
[crawler] Download full wikitravel images, not thumbnails.
This commit is contained in:
parent
f8d90e92ce
commit
f8b8a13a87
2 changed files with 7 additions and 1 deletions
4
crawler/normalize-image-urls.sh
Executable file
4
crawler/normalize-image-urls.sh
Executable file
|
@ -0,0 +1,4 @@
|
|||
#!/bin/bash
|
||||
set -e -u -x
|
||||
|
||||
cat $1 | sed 's:/thumb\(/.*\)/[0-9][0-9]*px-.*$:\1:' | sort -u > $2
|
|
@ -28,6 +28,8 @@ cat wikitravel-pages.json | python $MY_PATH/wikitravel-optimize-articles.py
|
|||
|
||||
$MY_PATH/extract-image-urls.sh wikitravel-images.urls
|
||||
|
||||
wget --wait=1 --no-clobber -i wikitravel-images.urls
|
||||
$MY_PATH/normalize-image-urls.sh wikitravel-images.urls wikitravel-images-normalized.url
|
||||
|
||||
wget --wait=1 --random-wait --no-clobber -i wikitravel-images-normalized.urls
|
||||
|
||||
# TODO: Run publisher.
|
||||
|
|
Loading…
Add table
Reference in a new issue