From be22e43d7d70b280a4dc1afeed51199e98978556 Mon Sep 17 00:00:00 2001
From: Behdad Esfahbod <behdad@behdad.org>
Date: Sun, 9 Feb 2025 14:55:44 +0000
Subject: [PATCH] [test/fuzzing] Run each fuzzer on all fonts in one process

Much much faster, specially under valgrind, than spawning one process per font.

Fixes https://github.com/harfbuzz/harfbuzz/issues/5061
---
 test/fuzzing/meson.build                  |   6 +-
 test/fuzzing/run-draw-fuzzer-tests.py     |  99 +++++++++----------
 test/fuzzing/run-repacker-fuzzer-tests.py | 107 ++++++++++----------
 test/fuzzing/run-shape-fuzzer-tests.py    | 102 ++++++++++---------
 test/fuzzing/run-subset-fuzzer-tests.py   | 113 +++++++++++-----------
 5 files changed, 205 insertions(+), 222 deletions(-)

diff --git a/test/fuzzing/meson.build b/test/fuzzing/meson.build
index f6ebbddcf..cada41948 100644
--- a/test/fuzzing/meson.build
+++ b/test/fuzzing/meson.build
@@ -41,23 +41,21 @@ test('shape-fuzzer', find_program('run-shape-fuzzer-tests.py'),
   args: [
     hb_shape_fuzzer_exe,
   ],
-  timeout: 90,
   depends: [hb_shape_fuzzer_exe, libharfbuzz, libharfbuzz_subset],
   workdir: meson.current_build_dir() / '..' / '..',
   env: env,
   priority: 1,
-  suite: ['fuzzing', 'slow'],
+  suite: ['fuzzing'],
 )
 
 test('subset-fuzzer', find_program('run-subset-fuzzer-tests.py'),
   args: [
     hb_subset_fuzzer_exe,
   ],
-  timeout: 90,
   workdir: meson.current_build_dir() / '..' / '..',
   env: env,
   priority: 1,
-  suite: ['fuzzing', 'slow'],
+  suite: ['fuzzing'],
 )
 
 test('repacker-fuzzer', find_program('run-repacker-fuzzer-tests.py'),
diff --git a/test/fuzzing/run-draw-fuzzer-tests.py b/test/fuzzing/run-draw-fuzzer-tests.py
index 8b5a2e82d..33b265e25 100755
--- a/test/fuzzing/run-draw-fuzzer-tests.py
+++ b/test/fuzzing/run-draw-fuzzer-tests.py
@@ -1,66 +1,59 @@
 #!/usr/bin/env python3
 
-import sys, os, subprocess, tempfile, shutil
+import sys
+import os
+import subprocess
+import tempfile
 
+def run_command(command):
+    with tempfile.TemporaryFile() as tempf:
+        p = subprocess.Popen(command, stdout=tempf, stderr=tempf)
+        p.wait()
+        tempf.seek(0)
+        output = tempf.read().decode('utf-8', errors='replace')
+    return output, p.returncode
 
-def cmd (command):
-	# https://stackoverflow.com/a/4408409 as we might have huge output sometimes
-	with tempfile.TemporaryFile () as tempf:
-		p = subprocess.Popen (command, stderr=tempf)
+srcdir = os.getenv("srcdir", ".")
+EXEEXT = os.getenv("EXEEXT", "")
+top_builddir = os.getenv("top_builddir", ".")
 
-		try:
-			p.wait ()
-			tempf.seek (0)
-			text = tempf.read ()
+hb_draw_fuzzer = os.path.join(top_builddir, "hb-draw-fuzzer" + EXEEXT)
+# If not found automatically, try sys.argv[1]
+if not os.path.exists(hb_draw_fuzzer):
+    if len(sys.argv) < 2 or not os.path.exists(sys.argv[1]):
+        sys.exit(
+            "Failed to find hb-draw-fuzzer binary automatically.\n"
+            "Please provide it as the first argument to the tool."
+        )
+    hb_draw_fuzzer = sys.argv[1]
 
-			#TODO: Detect debug mode with a better way
-			is_debug_mode = b"SANITIZE" in text
+print("Using hb_draw_fuzzer:", hb_draw_fuzzer)
 
-			return ("" if is_debug_mode else text.decode ("utf-8").strip ()), p.returncode
-		except subprocess.TimeoutExpired:
-			return 'error: timeout, ' + ' '.join (command), 1
+# Collect all files from the fonts/ directory
+parent_path = os.path.join(srcdir, "fonts")
+if not os.path.isdir(parent_path):
+    sys.exit(f"Directory {parent_path} not found or not a directory.")
 
+files_to_check = [
+    os.path.join(parent_path, f) for f in os.listdir(parent_path)
+    if os.path.isfile(os.path.join(parent_path, f))
+]
 
-srcdir = os.getenv ("srcdir", ".")
-EXEEXT = os.getenv ("EXEEXT", "")
-top_builddir = os.getenv ("top_builddir", ".")
-hb_draw_fuzzer = os.path.join (top_builddir, "hb-draw-fuzzer" + EXEEXT)
+if not files_to_check:
+    print(f"No files found in {parent_path}")
+    sys.exit(1)
 
-if not os.path.exists (hb_draw_fuzzer):
-	if len (sys.argv) == 1 or not os.path.exists (sys.argv[1]):
-		sys.exit ("""Failed to find hb-draw-fuzzer binary automatically,
-please provide it as the first argument to the tool""")
+# Single invocation passing all files
+cmd_line = [hb_draw_fuzzer] + files_to_check
+output, returncode = run_command(cmd_line)
 
-	hb_draw_fuzzer = sys.argv[1]
+# Print output if not empty
+if output.strip():
+    print(output)
 
-print ('hb_draw_fuzzer:', hb_draw_fuzzer)
-fails = 0
+# If there's an error, print a message and exit non-zero
+if returncode != 0:
+    print("Failure while processing these files:", ", ".join(os.path.basename(f) for f in files_to_check))
+    sys.exit(returncode)
 
-valgrind = None
-if os.getenv ('RUN_VALGRIND', ''):
-	valgrind = shutil.which ('valgrind')
-	if valgrind is None:
-		sys.exit ("""Valgrind requested but not found.""")
-
-parent_path = os.path.join (srcdir, "fonts")
-for file in os.listdir (parent_path):
-	if "draw" not in file: continue
-	path = os.path.join (parent_path, file)
-
-	if valgrind:
-		text, returncode = cmd ([valgrind, '--leak-check=full', '--error-exitcode=1', hb_draw_fuzzer, path])
-	else:
-		text, returncode = cmd ([hb_draw_fuzzer, path])
-		if 'error' in text:
-			returncode = 1
-
-	if (not valgrind or returncode) and text.strip ():
-		print (text)
-
-	if returncode != 0:
-		print ('failure on %s' % file)
-		fails = fails + 1
-
-
-if fails:
-	sys.exit ("%d draw fuzzer related tests failed." % fails)
+print("All files processed successfully.")
diff --git a/test/fuzzing/run-repacker-fuzzer-tests.py b/test/fuzzing/run-repacker-fuzzer-tests.py
index 85a23e13e..81971531f 100755
--- a/test/fuzzing/run-repacker-fuzzer-tests.py
+++ b/test/fuzzing/run-repacker-fuzzer-tests.py
@@ -1,68 +1,65 @@
 #!/usr/bin/env python3
 
-import sys, os, subprocess, tempfile, shutil
+import sys
+import os
+import subprocess
+import tempfile
+
+def run_command(command):
+    with tempfile.TemporaryFile() as tempf:
+        p = subprocess.Popen(command, stdout=tempf, stderr=tempf)
+        p.wait()
+        tempf.seek(0)
+        output = tempf.read().decode('utf-8', errors='replace')
+    return output, p.returncode
 
 
-def cmd (command):
-	# https://stackoverflow.com/a/4408409 as we might have huge output sometimes
-	with tempfile.TemporaryFile () as tempf:
-		p = subprocess.Popen (command, stderr=tempf)
+# Environment and binary location
+srcdir = os.getenv("srcdir", ".")
+EXEEXT = os.getenv("EXEEXT", "")
+top_builddir = os.getenv("top_builddir", ".")
 
-		try:
-			p.wait ()
-			tempf.seek (0)
-			text = tempf.read ()
+hb_repacker_fuzzer = os.path.join(top_builddir, "hb-repacker-fuzzer" + EXEEXT)
+# If the binary isn't found, try sys.argv[1]
+if not os.path.exists(hb_repacker_fuzzer):
+    if len(sys.argv) < 2 or not os.path.exists(sys.argv[1]):
+        sys.exit(
+            "Failed to find hb-repacker-fuzzer binary automatically.\n"
+            "Please provide it as the first argument to the tool."
+        )
+    hb_repacker_fuzzer = sys.argv[1]
 
-			#TODO: Detect debug mode with a better way
-			is_debug_mode = b"SANITIZE" in text
+print("hb_repacker_fuzzer:", hb_repacker_fuzzer)
 
-			return ("" if is_debug_mode else text.decode ("utf-8").strip ()), p.returncode
-		except subprocess.TimeoutExpired:
-			return 'error: timeout, ' + ' '.join (command), 1
+# Collect all files from graphs/
+graphs_path = os.path.join(srcdir, "graphs")
+if not os.path.isdir(graphs_path):
+    sys.exit(f"No 'graphs' directory found at {graphs_path}.")
 
+files_to_check = [
+    os.path.join(graphs_path, f)
+    for f in os.listdir(graphs_path)
+    if os.path.isfile(os.path.join(graphs_path, f))
+]
 
-srcdir = os.getenv ("srcdir", ".")
-EXEEXT = os.getenv ("EXEEXT", "")
-top_builddir = os.getenv ("top_builddir", ".")
-hb_repacker_fuzzer = os.path.join (top_builddir, "hb-repacker-fuzzer" + EXEEXT)
+if not files_to_check:
+    print("No files found in the 'graphs' directory.")
+    sys.exit(1)
 
-if not os.path.exists (hb_repacker_fuzzer):
-        if len (sys.argv) < 2 or not os.path.exists (sys.argv[1]):
-                sys.exit ("""Failed to find hb-repacker-fuzzer binary automatically,
-please provide it as the first argument to the tool""")
+# Single invocation passing all files
+print(f"Running repacker fuzzer against {len(files_to_check)} file(s) in 'graphs'...")
+cmd_line = [hb_repacker_fuzzer] + files_to_check
+output, returncode = run_command(cmd_line)
 
-        hb_repacker_fuzzer = sys.argv[1]
+# Print the output if present
+if output.strip():
+    print(output)
 
-print ('hb_repacker_fuzzer:', hb_repacker_fuzzer)
-fails = 0
+# Exit if there's an error
+if returncode != 0:
+    print("Failed for these files:")
+    for f in files_to_check:
+        print("  ", f)
+    sys.exit("1 repacker fuzzer related test(s) failed.")
 
-valgrind = None
-if os.getenv ('RUN_VALGRIND', ''):
-	valgrind = shutil.which ('valgrind')
-	if valgrind is None:
-		sys.exit ("""Valgrind requested but not found.""")
-
-def run_dir (parent_path):
-	global fails
-	for file in os.listdir (parent_path):
-		path = os.path.join(parent_path, file)
-		print ("running repacker fuzzer against %s" % path)
-		if valgrind:
-			text, returncode = cmd ([valgrind, '--leak-check=full', '--error-exitcode=1', hb_repacker_fuzzer, path])
-		else:
-			text, returncode = cmd ([hb_repacker_fuzzer, path])
-			if 'error' in text:
-				returncode = 1
-
-		if (not valgrind or returncode) and text.strip ():
-			print (text)
-
-		if returncode != 0:
-			print ("failed for %s" % path)
-			fails = fails + 1
-
-
-run_dir (os.path.join (srcdir, "graphs"))
-
-if fails:
-	sys.exit ("%d repacker fuzzer related tests failed." % fails)
+print("All repacker fuzzer tests passed successfully.")
diff --git a/test/fuzzing/run-shape-fuzzer-tests.py b/test/fuzzing/run-shape-fuzzer-tests.py
index 382f60929..6ce7c1c47 100755
--- a/test/fuzzing/run-shape-fuzzer-tests.py
+++ b/test/fuzzing/run-shape-fuzzer-tests.py
@@ -1,65 +1,63 @@
 #!/usr/bin/env python3
 
-import sys, os, subprocess, tempfile, shutil
+import sys
+import os
+import subprocess
+import tempfile
 
+def run_command(command):
+    """Run a command, capturing potentially large output."""
+    with tempfile.TemporaryFile() as tempf:
+        p = subprocess.Popen(command, stdout=tempf, stderr=tempf)
+        p.wait()
+        tempf.seek(0)
+        output = tempf.read().decode('utf-8', errors='replace')
+    return output, p.returncode
 
-def cmd (command):
-	# https://stackoverflow.com/a/4408409 as we might have huge output sometimes
-	with tempfile.TemporaryFile () as tempf:
-		p = subprocess.Popen (command, stderr=tempf)
+srcdir = os.getenv("srcdir", ".")
+EXEEXT = os.getenv("EXEEXT", "")
+top_builddir = os.getenv("top_builddir", ".")
 
-		try:
-			p.wait ()
-			tempf.seek (0)
-			text = tempf.read ()
+hb_shape_fuzzer = os.path.join(top_builddir, "hb-shape-fuzzer" + EXEEXT)
+if not os.path.exists(hb_shape_fuzzer):
+    # If not found automatically, fall back to the first CLI argument.
+    if len(sys.argv) < 2 or not os.path.exists(sys.argv[1]):
+        sys.exit(
+            "Failed to find hb-shape-fuzzer binary automatically.\n"
+            "Please provide it as the first argument to the tool."
+        )
+    hb_shape_fuzzer = sys.argv[1]
 
-			#TODO: Detect debug mode with a better way
-			is_debug_mode = b"SANITIZE" in text
+print("hb_shape_fuzzer:", hb_shape_fuzzer)
 
-			return ("" if is_debug_mode else text.decode ("utf-8").strip ()), p.returncode
-		except subprocess.TimeoutExpired:
-			return 'error: timeout, ' + ' '.join (command), 1
+fonts_dir = os.path.join(srcdir, "fonts")
+if not os.path.isdir(fonts_dir):
+    sys.exit(f"Fonts directory not found at: {fonts_dir}")
 
+# Gather all files in `fonts_dir`
+files_to_test = [
+    os.path.join(fonts_dir, f)
+    for f in os.listdir(fonts_dir)
+    if os.path.isfile(os.path.join(fonts_dir, f))
+]
 
-srcdir = os.getenv ("srcdir", ".")
-EXEEXT = os.getenv ("EXEEXT", "")
-top_builddir = os.getenv ("top_builddir", ".")
-hb_shape_fuzzer = os.path.join (top_builddir, "hb-shape-fuzzer" + EXEEXT)
+if not files_to_test:
+    print(f"No files found in {fonts_dir}")
+    sys.exit(1)
 
-if not os.path.exists (hb_shape_fuzzer):
-	if len (sys.argv) == 1 or not os.path.exists (sys.argv[1]):
-		sys.exit ("""Failed to find hb-shape-fuzzer binary automatically,
-please provide it as the first argument to the tool""")
+# Single invocation with all test files
+cmd_line = [hb_shape_fuzzer] + files_to_test
+output, returncode = run_command(cmd_line)
 
-	hb_shape_fuzzer = sys.argv[1]
+# Print output if any
+if output.strip():
+    print(output)
 
-print ('hb_shape_fuzzer:', hb_shape_fuzzer)
-fails = 0
+# Fail if return code is non-zero
+if returncode != 0:
+    print("Failure on the following file(s):")
+    for f in files_to_test:
+        print("  ", f)
+    sys.exit("1 shape fuzzer test failed.")
 
-valgrind = None
-if os.getenv ('RUN_VALGRIND', ''):
-	valgrind = shutil.which ('valgrind')
-	if valgrind is None:
-		sys.exit ("""Valgrind requested but not found.""")
-
-parent_path = os.path.join (srcdir, "fonts")
-for file in os.listdir (parent_path):
-	path = os.path.join (parent_path, file)
-
-	if valgrind:
-		text, returncode = cmd ([valgrind, '--leak-check=full', '--error-exitcode=1', hb_shape_fuzzer, path])
-	else:
-		text, returncode = cmd ([hb_shape_fuzzer, path])
-		if 'error' in text:
-			returncode = 1
-
-	if (not valgrind or returncode) and text.strip ():
-		print (text)
-
-	if returncode != 0:
-		print ('failure on %s' % file)
-		fails = fails + 1
-
-
-if fails:
-	sys.exit ("%d shape fuzzer related tests failed." % fails)
+print("All shape fuzzer tests passed successfully.")
diff --git a/test/fuzzing/run-subset-fuzzer-tests.py b/test/fuzzing/run-subset-fuzzer-tests.py
index da7d1e570..a3fdee75f 100755
--- a/test/fuzzing/run-subset-fuzzer-tests.py
+++ b/test/fuzzing/run-subset-fuzzer-tests.py
@@ -1,72 +1,69 @@
 #!/usr/bin/env python3
 
-import sys, os, subprocess, tempfile, shutil
+import sys
+import os
+import subprocess
+import tempfile
 
+def run_command(command):
+    """Run a command, capturing potentially large output."""
+    with tempfile.TemporaryFile() as tempf:
+        p = subprocess.Popen(command, stdout=tempf, stderr=tempf)
+        p.wait()
+        tempf.seek(0)
+        output = tempf.read().decode("utf-8", errors="replace")
+    return output, p.returncode
 
-def cmd (command):
-	# https://stackoverflow.com/a/4408409 as we might have huge output sometimes
-	with tempfile.TemporaryFile () as tempf:
-		p = subprocess.Popen (command, stderr=tempf)
+# Environment variables and binary location
+srcdir = os.getenv("srcdir", ".")
+EXEEXT = os.getenv("EXEEXT", "")
+top_builddir = os.getenv("top_builddir", ".")
 
-		try:
-			p.wait ()
-			tempf.seek (0)
-			text = tempf.read ()
+hb_subset_fuzzer = os.path.join(top_builddir, "hb-subset-fuzzer" + EXEEXT)
+# If not found automatically, fall back to the first CLI argument
+if not os.path.exists(hb_subset_fuzzer):
+    if len(sys.argv) < 2 or not os.path.exists(sys.argv[1]):
+        sys.exit(
+            "Failed to find hb-subset-fuzzer binary automatically.\n"
+            "Please provide it as the first argument to the tool."
+        )
+    hb_subset_fuzzer = sys.argv[1]
 
-			#TODO: Detect debug mode with a better way
-			is_debug_mode = b"SANITIZE" in text
+print("hb_subset_fuzzer:", hb_subset_fuzzer)
 
-			return ("" if is_debug_mode else text.decode ("utf-8").strip ()), p.returncode
-		except subprocess.TimeoutExpired:
-			return 'error: timeout, ' + ' '.join (command), 1
+# Gather all files from both directories
+dir1 = os.path.join(srcdir, "..", "subset", "data", "fonts")
+dir2 = os.path.join(srcdir, "fonts")
 
+files_to_test = []
 
-srcdir = os.getenv ("srcdir", ".")
-EXEEXT = os.getenv ("EXEEXT", "")
-top_builddir = os.getenv ("top_builddir", ".")
-hb_subset_fuzzer = os.path.join (top_builddir, "hb-subset-fuzzer" + EXEEXT)
+for d in [dir1, dir2]:
+    if not os.path.isdir(d):
+        # Skip if the directory doesn't exist
+        continue
+    for f in os.listdir(d):
+        file_path = os.path.join(d, f)
+        if os.path.isfile(file_path):
+            files_to_test.append(file_path)
 
-if not os.path.exists (hb_subset_fuzzer):
-        if len (sys.argv) < 2 or not os.path.exists (sys.argv[1]):
-                sys.exit ("""Failed to find hb-subset-fuzzer binary automatically,
-please provide it as the first argument to the tool""")
+if not files_to_test:
+    print("No fonts found in either directory.")
+    sys.exit(1)
 
-        hb_subset_fuzzer = sys.argv[1]
+# Run the fuzzer once, passing all collected files
+print(f"Running subset fuzzer on {len(files_to_test)} file(s).")
+cmd_line = [hb_subset_fuzzer] + files_to_test
+output, returncode = run_command(cmd_line)
 
-print ('hb_subset_fuzzer:', hb_subset_fuzzer)
-fails = 0
+# Print any output
+if output.strip():
+    print(output)
 
-valgrind = None
-if os.getenv ('RUN_VALGRIND', ''):
-	valgrind = shutil.which ('valgrind')
-	if valgrind is None:
-		sys.exit ("""Valgrind requested but not found.""")
+# If there's an error, exit non-zero
+if returncode != 0:
+    print("Failure while processing these files:")
+    for f in files_to_test:
+        print(" ", f)
+    sys.exit("1 subset fuzzer test failed.")
 
-def run_dir (parent_path):
-	global fails
-	for file in os.listdir (parent_path):
-		path = os.path.join(parent_path, file)
-		# TODO: Run on all the fonts not just subset related ones
-		if "subset" not in path: continue
-
-		print ("running subset fuzzer against %s" % path)
-		if valgrind:
-			text, returncode = cmd ([valgrind, '--leak-check=full', '--error-exitcode=1', hb_subset_fuzzer, path])
-		else:
-			text, returncode = cmd ([hb_subset_fuzzer, path])
-			if 'error' in text:
-				returncode = 1
-
-		if (not valgrind or returncode) and text.strip ():
-			print (text)
-
-		if returncode != 0:
-			print ("failed for %s" % path)
-			fails = fails + 1
-
-
-run_dir (os.path.join (srcdir, "..", "subset", "data", "fonts"))
-run_dir (os.path.join (srcdir, "fonts"))
-
-if fails:
-	sys.exit ("%d subset fuzzer related tests failed." % fails)
+print("All subset fuzzer tests passed successfully.")