diff --git a/docs/MAPS.md b/docs/MAPS.md index 8932d1e4a9..2fe81c2586 100644 --- a/docs/MAPS.md +++ b/docs/MAPS.md @@ -136,6 +136,8 @@ it can fail due to low memory. * `MERGE_INTERVAL`: delay in minutes between attempts to merge a coast line. Default is 40. * `REGIONS`: a list of `.poly` files for regions to be built. One for each line. +* `DELTA_WITH`: a path to an older map directory, to compare with the freshly +generated data in the testing step. Can be empty. Example: `$(ls ../../data/borders/{UK*,Ireland}.poly)`. ### Testing diff --git a/tools/unix/diff_features.py b/tools/unix/diff_features.py new file mode 100755 index 0000000000..d9dcd9ff27 --- /dev/null +++ b/tools/unix/diff_features.py @@ -0,0 +1,34 @@ +#!/usr/bin/python +import sys, re + +RE_STAT = re.compile(r'(?:\d+\. )?([\w:|-]+?)\|: size = (\d+); count = (\d+); length = ([0-9.e+-]+) m; area = ([0-9.e+-]+) m.\s*') +def parse_and_add(data, line): + m = RE_STAT.match(line) + if m: + data[m.group(1)] = int(m.group(3)) + +if len(sys.argv) < 3: + print 'This tool compares type_statistics output for feature sizes' + print 'Usage: {0} [threshold_in_%]'.format(sys.argv[0]) + sys.exit(0) + +data1 = {} +with open(sys.argv[2], 'r') as f: + for line in f: + parse_and_add(data1, line) +data2 = {} +with open(sys.argv[1], 'r') as f: + for line in f: + parse_and_add(data2, line) + +threshold = (int(sys.argv[3]) if len(sys.argv) > 3 else 50) / 100.0 + 1 +min_diff = 40 + +for k in data1: + v1 = int(data1[k]) + if k in data2: + v2 = int(data2[k]) + if v1 == 0 or v2 == 0 or max(v1, v2) / float(min(v1, v2)) > threshold and abs(v1 - v2) > min_diff: + print '{0}: {1} to {2}'.format(k, v1, v2) + elif v1 > min_diff: + print '- not found: {0}, {1}'.format(k, v1) diff --git a/tools/unix/diff_size.py b/tools/unix/diff_size.py new file mode 100755 index 0000000000..6235e36793 --- /dev/null +++ b/tools/unix/diff_size.py @@ -0,0 +1,24 @@ +#!/usr/bin/python +import os, sys + +if len(sys.argv) < 3: + print 'This tool shows very different file sizes' + print 'Usage: {0} [threshold_in_%]'.format(sys.argv[0]) + sys.exit(0) + +new_path = sys.argv[1] +old_path = sys.argv[2] +threshold = (int(sys.argv[3]) if len(sys.argv) > 3 else 10) / 100.0 + 1 +min_diff = 1024 * 1024 + +for f in os.listdir(new_path): + new_file = os.path.join(new_path, f) + old_file = os.path.join(old_path, f) + if os.path.isfile(new_file) and os.path.isfile(old_file): + new_size = os.path.getsize(new_file) + old_size = os.path.getsize(old_file) + if new_size + old_size > 0: + if new_size == 0 or old_size == 0 or max(new_size, old_size) / float(min(new_size, old_size)) > threshold and abs(new_size - old_size) > min_diff: + print '{0}: {1} to {2} MB'.format(f, old_size / 1024 / 1024, new_size / 1024 / 1024) + else: + print 'Not found a mirror for {0}'.format(f) diff --git a/tools/unix/generate_planet.sh b/tools/unix/generate_planet.sh index 1c1d11d911..381146da21 100755 --- a/tools/unix/generate_planet.sh +++ b/tools/unix/generate_planet.sh @@ -424,7 +424,7 @@ fi if [ "$MODE" == "test" ]; then putmode "Step 8: Testing data" TEST_LOG="$LOG_PATH/test_planet.log" - bash "$TESTING_SCRIPT" "$TARGET" > "$TEST_LOG" + bash "$TESTING_SCRIPT" "$TARGET" "${DELTA_WITH-}" > "$TEST_LOG" # Send both log files via e-mail if [ -n "${MAIL-}" ]; then cat <(grep STATUS "$PLANET_LOG") <(echo ---------------) "$TEST_LOG" | mailx -s "Generate_planet: build completed at $(hostname)" "$MAIL" diff --git a/tools/unix/test_planet.sh b/tools/unix/test_planet.sh index adc95f747e..12f53b970d 100755 --- a/tools/unix/test_planet.sh +++ b/tools/unix/test_planet.sh @@ -6,7 +6,7 @@ if [ $# -eq 0 ]; then echo echo "This script analyzes a generate_planet.sh run and prints all issues." - echo "Usage: $0 " + echo "Usage: $0 []" echo exit 1 fi @@ -17,6 +17,8 @@ OMIM_PATH="${OMIM_PATH:-$(cd "$(dirname "$0")/../.."; pwd)}" TARGET="${TARGET:-$1}" LOG_PATH="${LOG_PATH:-$TARGET/logs}" PLANET_LOG="$LOG_PATH/generate_planet.log" +DELTA_WITH= +[ $# -gt 1 -a -d "${2-}" ] && DELTA_WITH="$2" source "$(dirname "$0")/find_generator_tool.sh" @@ -54,6 +56,13 @@ if [ -n "$(ls "$TARGET" | grep '\.mwm\.routing')" ]; then done fi +# Step 2.5: compare new files sizes with old +if [ -n "$DELTA_WITH" ]; then + echo + echo "### SIZE DIFFERENCE WITH $DELTA_WITH" + python "$(dirname "$0")/diff_size.py" "$TARGET" "$DELTA_WITH" 5 +fi + # For generator_tool, we create a temporary directory with symlinks to all maps # That way, it can be easily cleaned after routing engine creates a lot of temporary directories in it FTARGET="$TARGET/symlinked_copy" @@ -78,6 +87,27 @@ done [ -z "$FOUND_COASTS" ] && echo && echo 'WARNING: Did not find any coastlines in MWM files' +# Step 3.5: run type_statistics for old and new files to compare +if [ -n "$DELTA_WITH" ]; then + echo + echo '### FEATURE DIFFERENCE' + TMPBASE="$HOME/test_planet_tmp" + for mwm in "$FTARGET"/*.mwm; do + BASENAME="$(basename "$mwm" .mwm)" + if [ -f "$DELTA_WITH/$BASENAME.mwm" ]; then + "$GENERATOR_TOOL" --data_path="$FTARGET" --user_resource_path="$OMIM_PATH/data/" --output="$BASENAME" --type_statistics >"${TMPBASE}_new" 2>/dev/null + "$GENERATOR_TOOL" --data_path="$DELTA_WITH" --user_resource_path="$OMIM_PATH/data/" --output="$BASENAME" --type_statistics >"${TMPBASE}_old" 2>/dev/null + DIFFERENCE="$(python "$(dirname "$0")/diff_features.py" "${TMPBASE}_new" "${TMPBASE}_old" 50)" + if [ -n "$DIFFERENCE" ]; then + echo + echo "$BASENAME" + echo "$DIFFERENCE" + fi + fi + done + rm "$TMPBASE"_* +fi + # Step 4: run intergation tests echo echo '### INTEGRATION TESTS'