diff --git a/download.sh b/download.sh index 47ba41e..c4d4bc6 100755 --- a/download.sh +++ b/download.sh @@ -54,15 +54,12 @@ fi # shellcheck disable=SC2086 # LANGUAGES is intentionally expanded. log "Selected languages:" $LANGUAGES -TMP_DIR=$(mktemp --tmpdir -d wikiparser-download-XXXX) -trap 'rm -rf $TMP_DIR' EXIT INT HUP - log "Fetching run index" -# Call wget outside of pipeline for errors to be caught by set -e. -wget 'https://dumps.wikimedia.org/other/enterprise_html/runs/' --no-verbose -O "$TMP_DIR/runs.html" + # The date of the latest dump, YYYYMMDD. -LATEST_DUMP=$(grep -Po '(?<=href=")[^"]*' "$TMP_DIR/runs.html" | grep -P '\d{8}' | sort -r | head -n1) +LATEST_DUMP=$(wget 'https://dumps.wikimedia.org/other/enterprise_html/runs/' --no-verbose -O - \ + | grep -Po '(?<=href=")[^"]*' | grep -P '\d{8}' | sort -r | head -n1) LATEST_DUMP="${LATEST_DUMP%/}" log "Checking latest dump $LATEST_DUMP"