From b4fef640cfe6ea94d37e51ab1e57e6d91d31a569 Mon Sep 17 00:00:00 2001 From: Norbert Runge Date: Fri, 25 Jan 2019 12:58:18 -0800 Subject: [PATCH] ICU-20217 Adds additional ICU4C fuzzers. --- icu4c/source/test/fuzzer/locale_fuzzer.cc | 49 +++++++++++++++++++ icu4c/source/test/fuzzer/locale_util.cc | 12 +++++ icu4c/source/test/fuzzer/locale_util.h | 12 +++++ .../test/fuzzer/uloc_canonicalize_fuzzer.cc | 20 ++++++++ .../uloc_canonicalize_fuzzer_seed_corpus.zip | 3 ++ .../fuzzer/uloc_for_language_tag_fuzzer.cc | 21 ++++++++ ...oc_for_language_tag_fuzzer_seed_corpus.zip | 3 ++ .../test/fuzzer/uloc_get_name_fuzzer.cc | 20 ++++++++ .../uloc_get_name_fuzzer_seed_corpus.zip | 3 ++ .../fuzzer/uloc_is_right_to_left_fuzzer.cc | 15 ++++++ ...oc_is_right_to_left_fuzzer_seed_corpus.zip | 3 ++ .../test/fuzzer/uloc_open_keywords_fuzzer.cc | 20 ++++++++ .../uloc_open_keywords_seed_fuzzer_corpus.zip | 3 ++ 13 files changed, 184 insertions(+) create mode 100644 icu4c/source/test/fuzzer/locale_fuzzer.cc create mode 100644 icu4c/source/test/fuzzer/locale_util.cc create mode 100644 icu4c/source/test/fuzzer/locale_util.h create mode 100644 icu4c/source/test/fuzzer/uloc_canonicalize_fuzzer.cc create mode 100644 icu4c/source/test/fuzzer/uloc_canonicalize_fuzzer_seed_corpus.zip create mode 100644 icu4c/source/test/fuzzer/uloc_for_language_tag_fuzzer.cc create mode 100644 icu4c/source/test/fuzzer/uloc_for_language_tag_fuzzer_seed_corpus.zip create mode 100644 icu4c/source/test/fuzzer/uloc_get_name_fuzzer.cc create mode 100644 icu4c/source/test/fuzzer/uloc_get_name_fuzzer_seed_corpus.zip create mode 100644 icu4c/source/test/fuzzer/uloc_is_right_to_left_fuzzer.cc create mode 100644 icu4c/source/test/fuzzer/uloc_is_right_to_left_fuzzer_seed_corpus.zip create mode 100644 icu4c/source/test/fuzzer/uloc_open_keywords_fuzzer.cc create mode 100644 icu4c/source/test/fuzzer/uloc_open_keywords_seed_fuzzer_corpus.zip diff --git a/icu4c/source/test/fuzzer/locale_fuzzer.cc b/icu4c/source/test/fuzzer/locale_fuzzer.cc new file mode 100644 index 00000000000..4d6355531bd --- /dev/null +++ b/icu4c/source/test/fuzzer/locale_fuzzer.cc @@ -0,0 +1,49 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// Fuzzer for ICU Locales. + +#include +#include +#include +#include +#include +#include + +#include "unicode/locid.h" + +namespace { + +void ConsumeNBytes(const uint8_t** data, size_t* size, size_t N) { + *data += N; + *size -= N; +} + +uint8_t ConsumeUint8(const uint8_t** data, size_t* size) { + uint8_t tmp = 0; + if (*size >= 1) { + tmp = (*data)[0]; + ConsumeNBytes(data, size, 1); + } + return tmp; +} + +std::string ConsumeSubstring(const uint8_t** data, size_t* size) { + const size_t request_size = ConsumeUint8(data, size); + const char* substring_start = reinterpret_cast(*data); + const size_t substring_size = std::min(*size, request_size); + ConsumeNBytes(data, size, substring_size); + return std::string(substring_start, substring_size); +} + +} // namespace + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + const std::string language = ConsumeSubstring(&data, &size); + const std::string country = ConsumeSubstring(&data, &size); + const std::string variant = ConsumeSubstring(&data, &size); + const std::string kv_pairs = ConsumeSubstring(&data, &size); + icu::Locale locale(language.c_str(), country.c_str(), variant.c_str(), + kv_pairs.c_str()); + return EXIT_SUCCESS; +} diff --git a/icu4c/source/test/fuzzer/locale_util.cc b/icu4c/source/test/fuzzer/locale_util.cc new file mode 100644 index 00000000000..d39aeeece57 --- /dev/null +++ b/icu4c/source/test/fuzzer/locale_util.cc @@ -0,0 +1,12 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// Helper method for ICU locale fuzzer. + +#include "locale_util.h" + +#include + +std::string MakeZeroTerminatedInput(const uint8_t *data, int32_t size) { + return size == 0 ? "" : std::string(reinterpret_cast(data), size); +} diff --git a/icu4c/source/test/fuzzer/locale_util.h b/icu4c/source/test/fuzzer/locale_util.h new file mode 100644 index 00000000000..09848ff4a37 --- /dev/null +++ b/icu4c/source/test/fuzzer/locale_util.h @@ -0,0 +1,12 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef I18N_ICU_FUZZ_LOCALE_UTIL_H_ +#define I18N_ICU_FUZZ_LOCALE_UTIL_H_ + +#include + +// Takes uint8_t data from fuzzer, and makes a zero terminated string. +std::string MakeZeroTerminatedInput(const uint8_t* data, int32_t size); + +#endif // I18N_ICU_FUZZ_LOCALE_UTIL_H_ diff --git a/icu4c/source/test/fuzzer/uloc_canonicalize_fuzzer.cc b/icu4c/source/test/fuzzer/uloc_canonicalize_fuzzer.cc new file mode 100644 index 00000000000..51be32d8e99 --- /dev/null +++ b/icu4c/source/test/fuzzer/uloc_canonicalize_fuzzer.cc @@ -0,0 +1,20 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include + +#include "locale_util.h" +#include "unicode/uloc.h" + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + // Canonicalized locale name. + char name[ULOC_FULLNAME_CAPACITY]; + int32_t name_capacity = ULOC_FULLNAME_CAPACITY; + + const std::string input = MakeZeroTerminatedInput(data, size); + + UErrorCode status = U_ZERO_ERROR; + uloc_canonicalize(input.c_str(), name, name_capacity, &status); + + return 0; +} diff --git a/icu4c/source/test/fuzzer/uloc_canonicalize_fuzzer_seed_corpus.zip b/icu4c/source/test/fuzzer/uloc_canonicalize_fuzzer_seed_corpus.zip new file mode 100644 index 00000000000..990d6dced86 --- /dev/null +++ b/icu4c/source/test/fuzzer/uloc_canonicalize_fuzzer_seed_corpus.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf5c0deef267ac7f6857d3c2b95644d74266dc3ec9de6461572469b4c1e15a07 +size 746 diff --git a/icu4c/source/test/fuzzer/uloc_for_language_tag_fuzzer.cc b/icu4c/source/test/fuzzer/uloc_for_language_tag_fuzzer.cc new file mode 100644 index 00000000000..65520443007 --- /dev/null +++ b/icu4c/source/test/fuzzer/uloc_for_language_tag_fuzzer.cc @@ -0,0 +1,21 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include + +#include "locale_util.h" +#include "unicode/uloc.h" + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + // Full locale id. + char locale_id[ULOC_FULLNAME_CAPACITY]; + int32_t locale_id_capacity = ULOC_FULLNAME_CAPACITY; + + const std::string input = MakeZeroTerminatedInput(data, size); + + UErrorCode status = U_ZERO_ERROR; + uloc_forLanguageTag(input.c_str(), locale_id, locale_id_capacity, nullptr, + &status); + + return 0; +} diff --git a/icu4c/source/test/fuzzer/uloc_for_language_tag_fuzzer_seed_corpus.zip b/icu4c/source/test/fuzzer/uloc_for_language_tag_fuzzer_seed_corpus.zip new file mode 100644 index 00000000000..990d6dced86 --- /dev/null +++ b/icu4c/source/test/fuzzer/uloc_for_language_tag_fuzzer_seed_corpus.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf5c0deef267ac7f6857d3c2b95644d74266dc3ec9de6461572469b4c1e15a07 +size 746 diff --git a/icu4c/source/test/fuzzer/uloc_get_name_fuzzer.cc b/icu4c/source/test/fuzzer/uloc_get_name_fuzzer.cc new file mode 100644 index 00000000000..5a0dca92f53 --- /dev/null +++ b/icu4c/source/test/fuzzer/uloc_get_name_fuzzer.cc @@ -0,0 +1,20 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include + +#include "locale_util.h" +#include "unicode/uloc.h" + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + // Full locale name. + char name[ULOC_FULLNAME_CAPACITY]; + int32_t name_capacity = ULOC_FULLNAME_CAPACITY; + + const std::string input = MakeZeroTerminatedInput(data, size); + + UErrorCode status = U_ZERO_ERROR; + uloc_getName(input.c_str(), name, name_capacity, &status); + + return 0; +} diff --git a/icu4c/source/test/fuzzer/uloc_get_name_fuzzer_seed_corpus.zip b/icu4c/source/test/fuzzer/uloc_get_name_fuzzer_seed_corpus.zip new file mode 100644 index 00000000000..990d6dced86 --- /dev/null +++ b/icu4c/source/test/fuzzer/uloc_get_name_fuzzer_seed_corpus.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf5c0deef267ac7f6857d3c2b95644d74266dc3ec9de6461572469b4c1e15a07 +size 746 diff --git a/icu4c/source/test/fuzzer/uloc_is_right_to_left_fuzzer.cc b/icu4c/source/test/fuzzer/uloc_is_right_to_left_fuzzer.cc new file mode 100644 index 00000000000..fe43089e6ca --- /dev/null +++ b/icu4c/source/test/fuzzer/uloc_is_right_to_left_fuzzer.cc @@ -0,0 +1,15 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include + +#include "locale_util.h" +#include "unicode/uloc.h" + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + const std::string input = MakeZeroTerminatedInput(data, size); + + uloc_isRightToLeft(input.c_str()); + + return 0; +} diff --git a/icu4c/source/test/fuzzer/uloc_is_right_to_left_fuzzer_seed_corpus.zip b/icu4c/source/test/fuzzer/uloc_is_right_to_left_fuzzer_seed_corpus.zip new file mode 100644 index 00000000000..990d6dced86 --- /dev/null +++ b/icu4c/source/test/fuzzer/uloc_is_right_to_left_fuzzer_seed_corpus.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf5c0deef267ac7f6857d3c2b95644d74266dc3ec9de6461572469b4c1e15a07 +size 746 diff --git a/icu4c/source/test/fuzzer/uloc_open_keywords_fuzzer.cc b/icu4c/source/test/fuzzer/uloc_open_keywords_fuzzer.cc new file mode 100644 index 00000000000..db9d96703fc --- /dev/null +++ b/icu4c/source/test/fuzzer/uloc_open_keywords_fuzzer.cc @@ -0,0 +1,20 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include + +#include "locale_util.h" +#include "unicode/uenum.h" +#include "unicode/uloc.h" + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + const std::string input = MakeZeroTerminatedInput(data, size); + + UErrorCode status = U_ZERO_ERROR; + UEnumeration* enumeration = uloc_openKeywords(input.c_str(), &status); + + // Have to clean up. Call works even for nullptr enumeration. + uenum_close(enumeration); + + return 0; +} diff --git a/icu4c/source/test/fuzzer/uloc_open_keywords_seed_fuzzer_corpus.zip b/icu4c/source/test/fuzzer/uloc_open_keywords_seed_fuzzer_corpus.zip new file mode 100644 index 00000000000..990d6dced86 --- /dev/null +++ b/icu4c/source/test/fuzzer/uloc_open_keywords_seed_fuzzer_corpus.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf5c0deef267ac7f6857d3c2b95644d74266dc3ec9de6461572469b4c1e15a07 +size 746