From 20efe4aa261bb997dc5f5d64e51a5db20583f960 Mon Sep 17 00:00:00 2001
From: Petri Lehtinen <petri@digip.org>
Date: Sat, 18 Jul 2009 15:59:55 +0300
Subject: [PATCH] Expand parser and Unicode test coverage

---
 test/run-test                 | 19 ++++----
 test/split-testfile.py        |  6 +--
 test/test-invalid             |  2 +-
 test/test-valid               |  2 +-
 test/testdata/invalid         | 20 ++++++++
 test/testdata/invalid-unicode | 89 +++++++++++++++++++++++++++++++++++
 test/testdata/valid           |  2 +-
 7 files changed, 123 insertions(+), 17 deletions(-)
 create mode 100644 test/testdata/invalid-unicode

diff --git a/test/run-test b/test/run-test
index c0db36a..59fd833 100644
--- a/test/run-test
+++ b/test/run-test
@@ -16,15 +16,12 @@ run_testprog() {
     esac
 }
 
-if [ ! -f $TESTFILE ]; then
-    echo "$TESTFILE cannot be found" >&2
-    exit 1
-fi
-
-mkdir -p $TMPDIR
-${srcdir}/split-testfile.py $TESTFILE $TMPDIR | \
-while read input output; do
-    run_test load_dump $input $output
-    run_test loadf_dumpf $input $output
-    run_test loads_dumps $input $output
+for testfile in $TESTFILES; do
+    mkdir -p $TMPDIR
+    ${srcdir}/split-testfile.py $testfile $TMPDIR | while read input output; do
+        run_test load_dump $input $output
+        run_test loadf_dumpf $input $output
+        run_test loads_dumps $input $output
+    done || exit 1
+    rm -rf $TMPDIR
 done
diff --git a/test/split-testfile.py b/test/split-testfile.py
index ef9cc40..308eb7f 100755
--- a/test/split-testfile.py
+++ b/test/split-testfile.py
@@ -17,8 +17,8 @@ def main():
         print 'usage: %s input-file output-directory' % sys.argv[0]
         return 2
 
-    infile = sys.argv[1]
-    outdir = sys.argv[2]
+    infile = os.path.normpath(sys.argv[1])
+    outdir = os.path.normpath(sys.argv[2])
 
     if not os.path.exists(outdir):
         print >>sys.stderr, 'output directory %r does not exist!' % outdir
@@ -40,7 +40,7 @@ def main():
             current.write(line)
 
     close_files(input, output)
-    print >>sys.stderr, "%d test cases" % (i + 1)
+    print >>sys.stderr, "%s: %d test cases" % (infile, i + 1)
 
 if __name__ == '__main__':
     sys.exit(main() or 0)
diff --git a/test/test-invalid b/test/test-invalid
index b949caf..9289fc3 100755
--- a/test/test-invalid
+++ b/test/test-invalid
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-TESTFILE="${srcdir}/testdata/invalid"
+TESTFILES="${srcdir}/testdata/invalid ${srcdir}/testdata/invalid-unicode"
 TMPDIR="tmp"
 
 run_test() {
diff --git a/test/test-valid b/test/test-valid
index 15fd92d..da22f41 100755
--- a/test/test-valid
+++ b/test/test-valid
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-TESTFILE="${srcdir}/testdata/valid"
+TESTFILES="${srcdir}/testdata/valid"
 TMPDIR="tmp"
 
 run_test() {
diff --git a/test/testdata/invalid b/test/testdata/invalid
index 1871d85..41aa410 100644
--- a/test/testdata/invalid
+++ b/test/testdata/invalid
@@ -77,6 +77,11 @@ unexpected token near end of file
 1
 unexpected newline near '"a'
 ========
+{"a":"a" 123}
+====
+1
+'}' expected near '123'
+========
 {[
 ====
 1
@@ -132,6 +137,16 @@ invalid token near '0'
 1
 invalid token near '-0'
 ========
+[troo
+====
+1
+invalid token near 'troo'
+========
+["\a <-- invalid escape"]
+====
+1
+invalid escape near '"\'
+========
 ["	 <-- tab character"]
 ====
 1
@@ -156,3 +171,8 @@ invalid Unicode '\uD888\u3210'
 ====
 1
 invalid Unicode '\uDFAA'
+========
+Ã¥
+====
+1
+'[' or '{' expected near 'Ã¥'
diff --git a/test/testdata/invalid-unicode b/test/testdata/invalid-unicode
new file mode 100644
index 0000000..fbc807d
--- /dev/null
+++ b/test/testdata/invalid-unicode
@@ -0,0 +1,89 @@
+å
+====
+-1
+unable to decode byte 0xe5 at position 0
+========
+["å <-- invalid UTF-8"]
+====
+-1
+unable to decode byte 0xe5 at position 2
+========
+[å]
+====
+-1
+unable to decode byte 0xe5 at position 1
+========
+[aå]
+====
+-1
+unable to decode byte 0xe5 at position 2
+========
+["\uå"]
+====
+-1
+unable to decode byte 0xe5 at position 4
+========
+["\å"]
+====
+-1
+unable to decode byte 0xe5 at position 3
+========
+[0å]
+====
+-1
+unable to decode byte 0xe5 at position 2
+========
+[123å]
+====
+-1
+unable to decode byte 0xe5 at position 4
+========
+[1eå]
+====
+-1
+unable to decode byte 0xe5 at position 3
+========
+[1e1å]
+====
+-1
+unable to decode byte 0xe5 at position 4
+========
+[""]
+====
+-1
+unable to decode byte 0x81 at position 2
+========
+["Á"]
+====
+-1
+unable to decode byte 0xc1 at position 2
+========
+["ý"]
+====
+-1
+unable to decode byte 0xfd at position 2
+========
+["ô¿¿¿"]
+====
+-1
+unable to decode byte 0xf4 at position 2
+========
+["à€¢ <-- overlong encoding"]
+====
+-1
+unable to decode byte 0xe0 at position 2
+========
+["ð€€¢ <-- overlong encoding"]
+====
+-1
+unable to decode byte 0xf0 at position 2
+========
+["àÿ <-- truncated UTF-8"]
+====
+-1
+unable to decode byte 0xe0 at position 2
+========
+["í¢« <-- encoded surrogate half"]
+====
+-1
+unable to decode byte 0xed at position 2
diff --git a/test/testdata/valid b/test/testdata/valid
index ebe7f95..755b393 100644
--- a/test/testdata/valid
+++ b/test/testdata/valid
@@ -4,7 +4,7 @@
 ========
 ["abcdefghijklmnopqrstuvwxyz1234567890 "]
 ========
-["â‚¬Ã¾Ä±Å“É™ÃŸÃ° some utf-8 Ä¸Ê’Ã—Å‹ÂµÃ¥Ã¤Ã¶"]
+["â‚¬Ã¾Ä±Å“É™ÃŸÃ° some utf-8 Ä¸Ê’Ã—Å‹ÂµÃ¥Ã¤Ã¶ð„ž"]
 ========
 ["\"\\\/\b\f\n\r\t"]
 ========