From e5b8660a463821f2c4880cabdfd96f1c0395f24f Mon Sep 17 00:00:00 2001 From: Frank Tang Date: Mon, 17 Jun 2024 15:32:40 -0700 Subject: [PATCH] ICU-22716 Add uregex_match_fuzzer Based on https://chromium-review.googlesource.com/c/chromium/deps/icu/+/5465572 --- icu4c/source/test/fuzzer/Makefile.in | 1 + .../test/fuzzer/uregex_match_fuzzer.cpp | 36 +++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 icu4c/source/test/fuzzer/uregex_match_fuzzer.cpp diff --git a/icu4c/source/test/fuzzer/Makefile.in b/icu4c/source/test/fuzzer/Makefile.in index da3a3987160..216892bf6f9 100644 --- a/icu4c/source/test/fuzzer/Makefile.in +++ b/icu4c/source/test/fuzzer/Makefile.in @@ -61,6 +61,7 @@ FUZZER_TARGETS = \ unicode_string_codepage_create_fuzzer \ uprop_fuzzer \ uregex_open_fuzzer \ + uregex_match_fuzzer \ OBJECTS = $(FUZZER_TARGETS:%=%.o) diff --git a/icu4c/source/test/fuzzer/uregex_match_fuzzer.cpp b/icu4c/source/test/fuzzer/uregex_match_fuzzer.cpp new file mode 100644 index 00000000000..86f47a579f3 --- /dev/null +++ b/icu4c/source/test/fuzzer/uregex_match_fuzzer.cpp @@ -0,0 +1,36 @@ +// © 2024 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include +#include +#include +#include + + +#include "fuzzer_utils.h" +#include "unicode/regex.h" + +IcuEnvironment* env = new IcuEnvironment(); + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + UParseError pe = { 0 }; + UErrorCode status = U_ZERO_ERROR; + + size_t unistr_size = size/2; + std::unique_ptr fuzzbuff(new char16_t[unistr_size]); + std::memcpy(fuzzbuff.get(), data, unistr_size * 2); + icu::UnicodeString fuzzstr(false, fuzzbuff.get(), unistr_size); + + icu::UnicodeString regex = fuzzstr.tempSubString (0, fuzzstr.length() / 4); + icu::UnicodeString haystack = fuzzstr.tempSubString (regex.length()); + + std::unique_ptr re(icu::RegexPattern::compile(regex, UREGEX_CASE_INSENSITIVE, status)); + if (U_FAILURE(status)) { + return -1; // invalid regex, don't explore further + } + std::unique_ptr regex_matcher(re->matcher(haystack, status)); + if (U_SUCCESS(status)) { + regex_matcher->find(0, status); + } + return 0; +}