[test/fuzzing] Call binaries with 64 fonts at a time

Second try... Previous attempt caused a too-many-command-line-args on Windows. https://github.com/harfbuzz/harfbuzz/issues/5061
2025-04-06 05:55:06 +00:00 · 2025-02-09 15:12:03 +00:00 · 2025-02-09 15:12:03 +00:00 · 86329643fd
commit 86329643fd
parent be22e43d7d
5 changed files with 205 additions and 194 deletions
--- a/test/fuzzing/hb_fuzzer_tools.py
+++ b/test/fuzzing/hb_fuzzer_tools.py
@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+import os
+import sys
+import subprocess
+import tempfile
+
+def run_command(command):
+    """
+    Run a command, capturing potentially large output in a temp file.
+    Returns (output_string, exit_code).
+    """
+    with tempfile.TemporaryFile() as tempf:
+        p = subprocess.Popen(command, stdout=tempf, stderr=tempf)
+        p.wait()
+        tempf.seek(0)
+        output = tempf.read().decode("utf-8", errors="replace")
+    return output, p.returncode
+
+def chunkify(lst, chunk_size=64):
+    """
+    Yield successive chunk_size-sized slices from lst.
+    """
+    for i in range(0, len(lst), chunk_size):
+        yield lst[i:i + chunk_size]
+
+def find_fuzzer_binary(default_path, argv):
+    """
+    If default_path exists, return it;
+    otherwise check argv[1] for a user-supplied binary path;
+    otherwise exit with an error.
+    """
+    if os.path.exists(default_path):
+        return default_path
+
+    if len(argv) > 1 and os.path.exists(argv[1]):
+        return argv[1]
+
+    sys.exit(
+        f"Failed to find {os.path.basename(default_path)} binary.\n"
+        "Please provide it as the first argument to the tool."
+    )
+
+def gather_files(directory):
+    """
+    Return a list of *all* files (not subdirs) in `directory`.
+    If `directory` doesn’t exist, returns an empty list.
+    """
+    if not os.path.isdir(directory):
+        return []
+    return [
+        os.path.join(directory, f)
+        for f in os.listdir(directory)
+        if os.path.isfile(os.path.join(directory, f))
+    ]
--- a/test/fuzzing/run-draw-fuzzer-tests.py
+++ b/test/fuzzing/run-draw-fuzzer-tests.py
@ -2,58 +2,52 @@

 import sys
 import os
-import subprocess
-import tempfile
+from hb_fuzzer_tools import (
+    run_command,
+    chunkify,
+    find_fuzzer_binary,
+    gather_files
+)

-def run_command(command):
-    with tempfile.TemporaryFile() as tempf:
-        p = subprocess.Popen(command, stdout=tempf, stderr=tempf)
-        p.wait()
-        tempf.seek(0)
-        output = tempf.read().decode('utf-8', errors='replace')
-    return output, p.returncode
+def main():
+    srcdir = os.getenv("srcdir", ".")
+    EXEEXT = os.getenv("EXEEXT", "")
+    top_builddir = os.getenv("top_builddir", ".")

-srcdir = os.getenv("srcdir", ".")
-EXEEXT = os.getenv("EXEEXT", "")
-top_builddir = os.getenv("top_builddir", ".")
+    # Find the fuzzer binary
+    default_bin = os.path.join(top_builddir, "hb-draw-fuzzer" + EXEEXT)
+    hb_draw_fuzzer = find_fuzzer_binary(default_bin, sys.argv)

-hb_draw_fuzzer = os.path.join(top_builddir, "hb-draw-fuzzer" + EXEEXT)
-# If not found automatically, try sys.argv[1]
-if not os.path.exists(hb_draw_fuzzer):
-    if len(sys.argv) < 2 or not os.path.exists(sys.argv[1]):
-        sys.exit(
-            "Failed to find hb-draw-fuzzer binary automatically.\n"
-            "Please provide it as the first argument to the tool."
-        )
-    hb_draw_fuzzer = sys.argv[1]
+    print("Using hb_draw_fuzzer:", hb_draw_fuzzer)

-print("Using hb_draw_fuzzer:", hb_draw_fuzzer)
+    # Gather all files from fonts/
+    fonts_dir = os.path.join(srcdir, "fonts")
+    files_to_test = gather_files(fonts_dir)

-# Collect all files from the fonts/ directory
-parent_path = os.path.join(srcdir, "fonts")
-if not os.path.isdir(parent_path):
-    sys.exit(f"Directory {parent_path} not found or not a directory.")
+    if not files_to_test:
+        print("No files found in", fonts_dir)
+        sys.exit(0)

-files_to_check = [
-    os.path.join(parent_path, f) for f in os.listdir(parent_path)
-    if os.path.isfile(os.path.join(parent_path, f))
-]
+    fails = 0
+    batch_index = 0

-if not files_to_check:
-    print(f"No files found in {parent_path}")
-    sys.exit(1)
+    # Run in batches of up to 64 files
+    for chunk in chunkify(files_to_test, 64):
+        batch_index += 1
+        cmd_line = [hb_draw_fuzzer] + chunk
+        output, returncode = run_command(cmd_line)

-# Single invocation passing all files
-cmd_line = [hb_draw_fuzzer] + files_to_check
-output, returncode = run_command(cmd_line)
+        if output.strip():
+            print(output)

-# Print output if not empty
-if output.strip():
-    print(output)
+        if returncode != 0:
+            print(f"Failure in batch #{batch_index}")
+            fails += 1

-# If there's an error, print a message and exit non-zero
-if returncode != 0:
-    print("Failure while processing these files:", ", ".join(os.path.basename(f) for f in files_to_check))
-    sys.exit(returncode)
+    if fails > 0:
+        sys.exit(f"{fails} draw fuzzer batch(es) failed.")

-print("All files processed successfully.")
+    print("All draw fuzzer tests passed successfully.")
+
+if __name__ == "__main__":
+    main()
--- a/test/fuzzing/run-repacker-fuzzer-tests.py
+++ b/test/fuzzing/run-repacker-fuzzer-tests.py
@ -2,64 +2,52 @@

 import sys
 import os
-import subprocess
-import tempfile
+from hb_fuzzer_tools import (
+    run_command,
+    chunkify,
+    find_fuzzer_binary,
+    gather_files
+)

-def run_command(command):
-    with tempfile.TemporaryFile() as tempf:
-        p = subprocess.Popen(command, stdout=tempf, stderr=tempf)
-        p.wait()
-        tempf.seek(0)
-        output = tempf.read().decode('utf-8', errors='replace')
-    return output, p.returncode
+def main():
+    srcdir = os.getenv("srcdir", ".")
+    EXEEXT = os.getenv("EXEEXT", "")
+    top_builddir = os.getenv("top_builddir", ".")

+    # Find the fuzzer binary
+    default_bin = os.path.join(top_builddir, "hb-repacker-fuzzer" + EXEEXT)
+    hb_repacker_fuzzer = find_fuzzer_binary(default_bin, sys.argv)

-# Environment and binary location
-srcdir = os.getenv("srcdir", ".")
-EXEEXT = os.getenv("EXEEXT", "")
-top_builddir = os.getenv("top_builddir", ".")
+    print("Using hb_repacker_fuzzer:", hb_repacker_fuzzer)

-hb_repacker_fuzzer = os.path.join(top_builddir, "hb-repacker-fuzzer" + EXEEXT)
-# If the binary isn't found, try sys.argv[1]
-if not os.path.exists(hb_repacker_fuzzer):
-    if len(sys.argv) < 2 or not os.path.exists(sys.argv[1]):
-        sys.exit(
-            "Failed to find hb-repacker-fuzzer binary automatically.\n"
-            "Please provide it as the first argument to the tool."
-        )
-    hb_repacker_fuzzer = sys.argv[1]
+    # Gather all files from graphs/
+    graphs_dir = os.path.join(srcdir, "graphs")
+    files_to_test = gather_files(graphs_dir)

-print("hb_repacker_fuzzer:", hb_repacker_fuzzer)
+    if not files_to_test:
+        print("No files found in", graphs_dir)
+        sys.exit(0)

-# Collect all files from graphs/
-graphs_path = os.path.join(srcdir, "graphs")
-if not os.path.isdir(graphs_path):
-    sys.exit(f"No 'graphs' directory found at {graphs_path}.")
+    fails = 0
+    batch_index = 0

-files_to_check = [
-    os.path.join(graphs_path, f)
-    for f in os.listdir(graphs_path)
-    if os.path.isfile(os.path.join(graphs_path, f))
-]
+    # Run in batches of up to 64 files
+    for chunk in chunkify(files_to_test, 64):
+        batch_index += 1
+        cmd_line = [hb_repacker_fuzzer] + chunk
+        output, returncode = run_command(cmd_line)

-if not files_to_check:
-    print("No files found in the 'graphs' directory.")
-    sys.exit(1)
+        if output.strip():
+            print(output)

-# Single invocation passing all files
-print(f"Running repacker fuzzer against {len(files_to_check)} file(s) in 'graphs'...")
-cmd_line = [hb_repacker_fuzzer] + files_to_check
-output, returncode = run_command(cmd_line)
+        if returncode != 0:
+            print(f"Failure in batch #{batch_index}")
+            fails += 1

-# Print the output if present
-if output.strip():
-    print(output)
+    if fails > 0:
+        sys.exit(f"{fails} repacker fuzzer batch(es) failed.")

-# Exit if there's an error
-if returncode != 0:
-    print("Failed for these files:")
-    for f in files_to_check:
-        print("  ", f)
-    sys.exit("1 repacker fuzzer related test(s) failed.")
+    print("All repacker fuzzer tests passed successfully.")

-print("All repacker fuzzer tests passed successfully.")
+if __name__ == "__main__":
+    main()
--- a/test/fuzzing/run-shape-fuzzer-tests.py
+++ b/test/fuzzing/run-shape-fuzzer-tests.py
@ -2,62 +2,51 @@

 import sys
 import os
-import subprocess
-import tempfile
+from hb_fuzzer_tools import (
+    run_command,
+    chunkify,
+    find_fuzzer_binary,
+    gather_files
+)

-def run_command(command):
-    """Run a command, capturing potentially large output."""
-    with tempfile.TemporaryFile() as tempf:
-        p = subprocess.Popen(command, stdout=tempf, stderr=tempf)
-        p.wait()
-        tempf.seek(0)
-        output = tempf.read().decode('utf-8', errors='replace')
-    return output, p.returncode
+def main():
+    srcdir = os.getenv("srcdir", ".")
+    EXEEXT = os.getenv("EXEEXT", "")
+    top_builddir = os.getenv("top_builddir", ".")

-srcdir = os.getenv("srcdir", ".")
-EXEEXT = os.getenv("EXEEXT", "")
-top_builddir = os.getenv("top_builddir", ".")
+    default_bin = os.path.join(top_builddir, "hb-shape-fuzzer" + EXEEXT)
+    hb_shape_fuzzer = find_fuzzer_binary(default_bin, sys.argv)

-hb_shape_fuzzer = os.path.join(top_builddir, "hb-shape-fuzzer" + EXEEXT)
-if not os.path.exists(hb_shape_fuzzer):
-    # If not found automatically, fall back to the first CLI argument.
-    if len(sys.argv) < 2 or not os.path.exists(sys.argv[1]):
-        sys.exit(
-            "Failed to find hb-shape-fuzzer binary automatically.\n"
-            "Please provide it as the first argument to the tool."
-        )
-    hb_shape_fuzzer = sys.argv[1]
+    print("Using hb_shape_fuzzer:", hb_shape_fuzzer)

-print("hb_shape_fuzzer:", hb_shape_fuzzer)
+    # Gather all files from fonts/
+    fonts_dir = os.path.join(srcdir, "fonts")
+    files_to_test = gather_files(fonts_dir)

-fonts_dir = os.path.join(srcdir, "fonts")
-if not os.path.isdir(fonts_dir):
-    sys.exit(f"Fonts directory not found at: {fonts_dir}")
+    if not files_to_test:
+        print("No files found in", fonts_dir)
+        sys.exit(0)

-# Gather all files in `fonts_dir`
-files_to_test = [
-    os.path.join(fonts_dir, f)
-    for f in os.listdir(fonts_dir)
-    if os.path.isfile(os.path.join(fonts_dir, f))
-]
+    fails = 0
+    batch_index = 0

-if not files_to_test:
-    print(f"No files found in {fonts_dir}")
-    sys.exit(1)
+    # Batch up to 64 files at a time
+    for chunk in chunkify(files_to_test, 64):
+        batch_index += 1
+        cmd_line = [hb_shape_fuzzer] + chunk
+        output, returncode = run_command(cmd_line)

-# Single invocation with all test files
-cmd_line = [hb_shape_fuzzer] + files_to_test
-output, returncode = run_command(cmd_line)
+        if output.strip():
+            print(output)

-# Print output if any
-if output.strip():
-    print(output)
+        if returncode != 0:
+            print(f"Failure in batch #{batch_index}")
+            fails += 1

-# Fail if return code is non-zero
-if returncode != 0:
-    print("Failure on the following file(s):")
-    for f in files_to_test:
-        print("  ", f)
-    sys.exit("1 shape fuzzer test failed.")
+    if fails > 0:
+        sys.exit(f"{fails} shape fuzzer batch(es) failed.")

-print("All shape fuzzer tests passed successfully.")
+    print("All shape fuzzer tests passed successfully.")
+
+if __name__ == "__main__":
+    main()
--- a/test/fuzzing/run-subset-fuzzer-tests.py
+++ b/test/fuzzing/run-subset-fuzzer-tests.py
@ -2,68 +2,54 @@

 import sys
 import os
-import subprocess
-import tempfile
+from hb_fuzzer_tools import (
+    run_command,
+    chunkify,
+    find_fuzzer_binary,
+    gather_files
+)

-def run_command(command):
-    """Run a command, capturing potentially large output."""
-    with tempfile.TemporaryFile() as tempf:
-        p = subprocess.Popen(command, stdout=tempf, stderr=tempf)
-        p.wait()
-        tempf.seek(0)
-        output = tempf.read().decode("utf-8", errors="replace")
-    return output, p.returncode
+def main():
+    srcdir = os.getenv("srcdir", ".")
+    EXEEXT = os.getenv("EXEEXT", "")
+    top_builddir = os.getenv("top_builddir", ".")

-# Environment variables and binary location
-srcdir = os.getenv("srcdir", ".")
-EXEEXT = os.getenv("EXEEXT", "")
-top_builddir = os.getenv("top_builddir", ".")
+    # Locate the binary
+    default_bin = os.path.join(top_builddir, "hb-subset-fuzzer" + EXEEXT)
+    hb_subset_fuzzer = find_fuzzer_binary(default_bin, sys.argv)

-hb_subset_fuzzer = os.path.join(top_builddir, "hb-subset-fuzzer" + EXEEXT)
-# If not found automatically, fall back to the first CLI argument
-if not os.path.exists(hb_subset_fuzzer):
-    if len(sys.argv) < 2 or not os.path.exists(sys.argv[1]):
-        sys.exit(
-            "Failed to find hb-subset-fuzzer binary automatically.\n"
-            "Please provide it as the first argument to the tool."
-        )
-    hb_subset_fuzzer = sys.argv[1]
+    print("Using hb_subset_fuzzer:", hb_subset_fuzzer)

-print("hb_subset_fuzzer:", hb_subset_fuzzer)
+    # Gather from two directories, then combine
+    dir1 = os.path.join(srcdir, "..", "subset", "data", "fonts")
+    dir2 = os.path.join(srcdir, "fonts")

-# Gather all files from both directories
-dir1 = os.path.join(srcdir, "..", "subset", "data", "fonts")
-dir2 = os.path.join(srcdir, "fonts")
+    files_to_test = gather_files(dir1) + gather_files(dir2)

-files_to_test = []
+    if not files_to_test:
+        print("No files found in either directory.")
+        sys.exit(0)

-for d in [dir1, dir2]:
-    if not os.path.isdir(d):
-        # Skip if the directory doesn't exist
-        continue
-    for f in os.listdir(d):
-        file_path = os.path.join(d, f)
-        if os.path.isfile(file_path):
-            files_to_test.append(file_path)
+    fails = 0
+    batch_index = 0

-if not files_to_test:
-    print("No fonts found in either directory.")
-    sys.exit(1)
+    # Batch the tests in up to 64 files per run
+    for chunk in chunkify(files_to_test, 64):
+        batch_index += 1
+        cmd_line = [hb_subset_fuzzer] + chunk
+        output, returncode = run_command(cmd_line)

-# Run the fuzzer once, passing all collected files
-print(f"Running subset fuzzer on {len(files_to_test)} file(s).")
-cmd_line = [hb_subset_fuzzer] + files_to_test
-output, returncode = run_command(cmd_line)
+        if output.strip():
+            print(output)

-# Print any output
-if output.strip():
-    print(output)
+        if returncode != 0:
+            print(f"Failure in batch #{batch_index}")
+            fails += 1

-# If there's an error, exit non-zero
-if returncode != 0:
-    print("Failure while processing these files:")
-    for f in files_to_test:
-        print(" ", f)
-    sys.exit("1 subset fuzzer test failed.")
+    if fails > 0:
+        sys.exit(f"{fails} subset fuzzer batch(es) failed.")

-print("All subset fuzzer tests passed successfully.")
+    print("All subset fuzzer tests passed successfully.")
+
+if __name__ == "__main__":
+    main()