From bbe54f6e89641dd02d6dd870a5e912bad33814c2 Mon Sep 17 00:00:00 2001 From: Ammar Askar Date: Tue, 18 Jun 2019 03:43:41 -0400 Subject: [PATCH 1/5] bpo-29505: Add fuzzing for re.compile, re.load and csv.reader --- Lib/test/test_xxtestfuzz.py | 2 + .../dictionaries/fuzz_sre_compile.dict | 219 ++++++++++++++++++ .../fuzz_csv_reader_corpus/test.csv | Bin 0 -> 118 bytes .../fuzz_sre_compile_corpus/anchor_links | 1 + .../fuzz_sre_compile_corpus/characters | 1 + .../_xxtestfuzz/fuzz_sre_compile_corpus/isbn | 1 + .../fuzz_sre_compile_corpus/phone_number | 1 + Modules/_xxtestfuzz/fuzz_tests.txt | 3 + Modules/_xxtestfuzz/fuzzer.c | 204 +++++++++++++++- 9 files changed, 426 insertions(+), 6 deletions(-) create mode 100644 Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict create mode 100644 Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv create mode 100644 Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links create mode 100644 Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters create mode 100644 Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn create mode 100644 Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number diff --git a/Lib/test/test_xxtestfuzz.py b/Lib/test/test_xxtestfuzz.py index 532f5fe72aa5f0..15924aaeff3851 100644 --- a/Lib/test/test_xxtestfuzz.py +++ b/Lib/test/test_xxtestfuzz.py @@ -16,6 +16,8 @@ def test_sample_input_smoke_test(self): _xxtestfuzz.run(b" ") _xxtestfuzz.run(b"x") _xxtestfuzz.run(b"1") + _xxtestfuzz.run(b"AAAAAAA") + _xxtestfuzz.run(b"AAAAAA\0") if __name__ == "__main__": diff --git a/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict b/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict new file mode 100644 index 00000000000000..961306a87901d0 --- /dev/null +++ b/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict @@ -0,0 +1,219 @@ +"?" +"abc" +"()" +"[]" +"abc|def" +"abc|def|ghi" +"^xxx$" +"ab\\b\\d\\bcd" +"\\w|\\d" +"a*?" +"abc+" +"abc+?" +"xyz?" +"xyz??" +"xyz{0,1}" +"xyz{0,1}?" +"xyz{93}" +"xyz{1,32}" +"xyz{1,32}?" +"xyz{1,}" +"xyz{1,}?" +"a\\fb\\nc\\rd\\te\\vf" +"a\\nb\\bc" +"(?:foo)" +"(?: foo )" +"foo|(bar|baz)|quux" +"foo(?=bar)baz" +"foo(?!bar)baz" +"foo(?<=bar)baz" +"foo(?)" +"(?.)" +"(?.)\\k" diff --git a/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv b/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv new file mode 100644 index 0000000000000000000000000000000000000000..8b7887d0f1d2426354ec0d01fb06768604406dc0 GIT binary patch literal 118 zcmXwwNeX~45ClEv6~mkq8$tbmUm11KfN_oBf`3;C)}bkAs#j@sr5t^b;+GPOf(O5_Fsw=uRKQIfG-Vn@SIH^Pt&RC5#NlL@()@BrgB} literal 0 HcmV?d00001 diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links new file mode 100644 index 00000000000000..d99247ccadfd18 --- /dev/null +++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links @@ -0,0 +1 @@ +XX] diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters new file mode 100644 index 00000000000000..0c67ee7dfc1b5d --- /dev/null +++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters @@ -0,0 +1 @@ +XX^(Tim|Robert)\s+the\s+(Enchanter|Shrubber)$ diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn new file mode 100644 index 00000000000000..cce8919e7285ce --- /dev/null +++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn @@ -0,0 +1 @@ +XX/((978[\--– ])?[0-9][0-9\--– ]{10}[\--– ][0-9xX])|((978)?[0-9]{9}[0-9Xx])/ diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number new file mode 100644 index 00000000000000..1e2efc51103be0 --- /dev/null +++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number @@ -0,0 +1 @@ +XX(\+1|1)?[ \-\.]?\(?(?[0-9]{3})\)?[ \-\.]?(?[0-9]{3})[ \-\.]?(?[0-9]{4})[ \.]*(ext|x)?[ \.]*(?[0-9]{0,5}) diff --git a/Modules/_xxtestfuzz/fuzz_tests.txt b/Modules/_xxtestfuzz/fuzz_tests.txt index f0121291eaa015..9d330a668ee88b 100644 --- a/Modules/_xxtestfuzz/fuzz_tests.txt +++ b/Modules/_xxtestfuzz/fuzz_tests.txt @@ -2,3 +2,6 @@ fuzz_builtin_float fuzz_builtin_int fuzz_builtin_unicode fuzz_json_loads +fuzz_sre_compile +fuzz_sre_match +fuzz_csv_reader diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c index e862a99cfb34ca..95a25f9b0c1f20 100644 --- a/Modules/_xxtestfuzz/fuzzer.c +++ b/Modules/_xxtestfuzz/fuzzer.c @@ -115,6 +115,155 @@ static int fuzz_json_loads(const char* data, size_t size) { return 0; } +#define MAX_RE_TEST_SIZE 0x10000 + +/* Initialized in LLVMFuzzerTestOneInput */ +PyObject* sre_compile_method = NULL; +PyObject* sre_error_exception = NULL; +int SRE_FLAG_DEBUG = 0; +/* Fuzz _sre.compile(x) */ +static int fuzz_sre_compile(const char* data, size_t size) { + /* Ignore really long regex patterns that will timeout the fuzzer */ + if (size > MAX_RE_TEST_SIZE) { + return 0; + } + /* We treat the first 2 bytes of the input as a number for the flags */ + if (size < 2) { + return 0; + } + uint16_t flags = ((uint16_t*) data)[0]; + /* We remove the SRE_FLAG_DEBUG if present. This is because it + prints to stdout which greatly decreases fuzzing speed */ + flags &= ~SRE_FLAG_DEBUG; + + /* Pull the pattern from the remaining bytes */ + PyObject* pattern_bytes = PyBytes_FromStringAndSize(data + 2, size - 2); + if (pattern_bytes == NULL) { + return 0; + } + PyObject* flags_obj = PyLong_FromUnsignedLong(flags); + if (flags_obj == NULL) { + Py_DECREF(pattern_bytes); + return 0; + } + + /* compiled = _sre.compile(data[2:], data[0:2] */ + PyObject* compiled = PyObject_CallFunctionObjArgs( + sre_compile_method, pattern_bytes, flags_obj, NULL); + /* Ignore ValueError as the fuzzer will more than likely + generate some invalid combination of flags */ + if (compiled == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) { + PyErr_Clear(); + } + /* Ignore some common errors thrown by sre_parse: + Overflow, Assertion and Index */ + if (compiled == NULL && PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_Clear(); + } + if (compiled == NULL && PyErr_ExceptionMatches(PyExc_AssertionError)) { + PyErr_Clear(); + } + if (compiled == NULL && PyErr_ExceptionMatches(PyExc_IndexError)) { + PyErr_Clear(); + } + /* Ignore re.error */ + if (compiled == NULL && PyErr_ExceptionMatches(sre_error_exception)) { + PyErr_Clear(); + } + + Py_DECREF(pattern_bytes); + Py_DECREF(flags_obj); + Py_XDECREF(compiled); + return 0; +} + +/* Some random patterns used to test re.match. + Be careful not to add catostraphically slow regexes here, we want to + excercise the matchign code without causing timeouts.*/ +static const char* regex_patterns[] = { + ".", "^", "abc", "abc|def", "^xxx$", "\\b", "()", "[a-zA-Z0-9]", + "abc+", "[^A-Z]", "[x]", "(?=)", "a{z}", "a+b", "a*?", "a??", "a+?", + "{}", "a{,}", "{", "}", "^\\(*\\d{3}\\)*( |-)*\\d{3}( |-)*\\d{4}$", + "(?:a*)*", "a{1,2}?" +}; +const size_t NUM_PATTERNS = sizeof(regex_patterns) / sizeof(regex_patterns[0]); +PyObject** compiled_patterns = NULL; +/* Fuzz re.match(x) */ +static int fuzz_sre_match(const char* data, size_t size) { + if (size < 1 || size > MAX_RE_TEST_SIZE) { + return 0; + } + /* Use the first byte as a uint8_t specifying the index of the + regex to use */ + uint8_t idx = ((uint8_t*) data)[0]; + idx = idx % NUM_PATTERNS; + + /* Pull the string to match from the remaining bytes */ + PyObject* to_match = PyBytes_FromStringAndSize(data + 1, size - 1); + if (to_match == NULL) { + return 0; + } + + PyObject* pattern = compiled_patterns[idx]; + PyObject* match_callable = PyObject_GetAttrString(pattern, "match"); + + PyObject* matches = PyObject_CallFunctionObjArgs(match_callable, to_match, NULL); + + Py_XDECREF(matches); + Py_DECREF(match_callable); + Py_DECREF(to_match); + return 0; +} + +#define MAX_CSV_TEST_SIZE 0x10000 +/* Initialized in LLVMFuzzerTestOneInput */ +PyObject* csv_module = NULL; +PyObject* csv_error = NULL; +/* Fuzz csv.reader([x]) */ +static int fuzz_csv_reader(const char* data, size_t size) { + if (size < 1 || size > MAX_CSV_TEST_SIZE) { + return 0; + } + /* Ignore non null-terminated strings since _csv can't handle + embeded nulls */ + if (memchr(data, '\0', size) == NULL) { + return 0; + } + + PyObject* s = PyUnicode_FromString(data); + /* Ignore exceptions until we have a valid string */ + if (s == NULL) { + PyErr_Clear(); + return 0; + } + + /* Split on \n so we can test multiple lines */ + PyObject* lines = PyObject_CallMethod(s, "split", "s", "\n"); + if (lines == NULL) { + Py_DECREF(s); + return 0; + } + + PyObject* reader = PyObject_CallMethod(csv_module, "reader", "N", lines); + if (reader) { + /* Consume all of the reader as an iterator */ + PyObject* parsed_line; + while ((parsed_line = PyIter_Next(reader))) { + Py_DECREF(parsed_line); + } + } + + /* Ignore csv.Error because we're probably going to generate + some bad files (embeded new-lines, unterminated quotes etc) */ + if (PyErr_ExceptionMatches(csv_error)) { + PyErr_Clear(); + } + + Py_XDECREF(reader); + Py_DECREF(s); + return 0; +} + /* Run fuzzer and abort on failure. */ static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) { int rv = fuzzer((const char*) data, size); @@ -152,12 +301,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { initialize CPython ourselves on the first run. */ Py_InitializeEx(0); } -#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads) - if (json_loads_method == NULL) { - PyObject* json_module = PyImport_ImportModule("json"); - json_loads_method = PyObject_GetAttrString(json_module, "loads"); - } -#endif int rv = 0; @@ -171,7 +314,56 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { rv |= _run_fuzz(data, size, fuzz_builtin_unicode); #endif #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads) + /* Import json.loads */ + if (json_loads_method == NULL) { + PyObject* json_module = PyImport_ImportModule("json"); + json_loads_method = PyObject_GetAttrString(json_module, "loads"); + } + rv |= _run_fuzz(data, size, fuzz_json_loads); +#endif +#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_compile) + /* Impore sre_compile.compile and sre.error */ + if (sre_compile_method == NULL) { + PyObject* sre_compile_module = PyImport_ImportModule("sre_compile"); + sre_compile_method = PyObject_GetAttrString(sre_compile_module, "compile"); + + PyObject* sre_constants = PyImport_ImportModule("sre_constants"); + sre_error_exception = PyObject_GetAttrString(sre_constants, "error"); + SRE_FLAG_DEBUG = PyLong_AsLong( + PyObject_GetAttrString(sre_constants, "SRE_FLAG_DEBUG")); + } + + rv |= _run_fuzz(data, size, fuzz_sre_compile); +#endif +#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_match) + /* Precompile all the regex patterns on the first run for faster fuzzing */ + if (compiled_patterns == NULL) { + PyObject* re_module = PyImport_ImportModule("re"); + compiled_patterns = (PyObject**) PyMem_RawMalloc( + sizeof(PyObject*) * NUM_PATTERNS); + + for (size_t i = 0; i < NUM_PATTERNS; i++) { + PyObject* compiled = PyObject_CallMethod( + re_module, "compile", "y", regex_patterns[i]); + + if (compiled == NULL) { + PyErr_Print(); + abort(); + } + compiled_patterns[i] = compiled; + } + } + + rv |= _run_fuzz(data, size, fuzz_sre_match); +#endif +#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_csv_reader) + /* Import csv and csv.Error */ + if (csv_module == NULL) { + csv_module = PyImport_ImportModule("csv"); + csv_error = PyObject_GetAttrString(csv_module, "Error"); + } + rv |= _run_fuzz(data, size, fuzz_csv_reader); #endif return rv; } From f94cb1e7d9183a33a88716810e577100896e57ba Mon Sep 17 00:00:00 2001 From: Ammar Askar Date: Wed, 26 Jun 2019 13:18:36 -0700 Subject: [PATCH 2/5] Use unsigned char for indexing --- Modules/_xxtestfuzz/fuzzer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c index 95a25f9b0c1f20..1f783cde734bf8 100644 --- a/Modules/_xxtestfuzz/fuzzer.c +++ b/Modules/_xxtestfuzz/fuzzer.c @@ -195,7 +195,7 @@ static int fuzz_sre_match(const char* data, size_t size) { } /* Use the first byte as a uint8_t specifying the index of the regex to use */ - uint8_t idx = ((uint8_t*) data)[0]; + unsigned char idx = (unsigned char) data[0]; idx = idx % NUM_PATTERNS; /* Pull the string to match from the remaining bytes */ From 7d0e0e2c50599fa993d1b74bf295dabaa9c69420 Mon Sep 17 00:00:00 2001 From: Ammar Askar Date: Fri, 28 Jun 2019 05:19:53 -0400 Subject: [PATCH 3/5] Clean up initialization code to handle all errors --- Modules/_xxtestfuzz/fuzzer.c | 135 +++++++++++++++++++++++++---------- 1 file changed, 98 insertions(+), 37 deletions(-) diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c index 1f783cde734bf8..ed812da97faa4d 100644 --- a/Modules/_xxtestfuzz/fuzzer.c +++ b/Modules/_xxtestfuzz/fuzzer.c @@ -81,8 +81,17 @@ static int fuzz_builtin_unicode(const char* data, size_t size) { #define MAX_JSON_TEST_SIZE 0x10000 -/* Initialized in LLVMFuzzerTestOneInput */ PyObject* json_loads_method = NULL; +/* Called by LLVMFuzzerTestOneInput for initialization */ +static int init_json_loads() { + /* Import json.loads */ + PyObject* json_module = PyImport_ImportModule("json"); + if (json_module == NULL) { + return 1; + } + json_loads_method = PyObject_GetAttrString(json_module, "loads"); + return json_loads_method != NULL; +} /* Fuzz json.loads(x) */ static int fuzz_json_loads(const char* data, size_t size) { /* Since python supports arbitrarily large ints in JSON, @@ -117,10 +126,36 @@ static int fuzz_json_loads(const char* data, size_t size) { #define MAX_RE_TEST_SIZE 0x10000 -/* Initialized in LLVMFuzzerTestOneInput */ PyObject* sre_compile_method = NULL; PyObject* sre_error_exception = NULL; int SRE_FLAG_DEBUG = 0; +/* Called by LLVMFuzzerTestOneInput for initialization */ +static int init_sre_compile() { + /* Import sre_compile.compile and sre.error */ + PyObject* sre_compile_module = PyImport_ImportModule("sre_compile"); + if (sre_compile_module == NULL) { + return 0; + } + sre_compile_method = PyObject_GetAttrString(sre_compile_module, "compile"); + if (sre_compile_method == NULL) { + return 0; + } + + PyObject* sre_constants = PyImport_ImportModule("sre_constants"); + if (sre_constants == NULL) { + return 0; + } + sre_error_exception = PyObject_GetAttrString(sre_constants, "error"); + if (sre_error_exception == NULL) { + return 0; + } + PyObject* debug_flag = PyObject_GetAttrString(sre_constants, "SRE_FLAG_DEBUG"); + if (debug_flag == NULL) { + return 0; + } + SRE_FLAG_DEBUG = PyLong_AsLong(debug_flag); + return 1; +} /* Fuzz _sre.compile(x) */ static int fuzz_sre_compile(const char* data, size_t size) { /* Ignore really long regex patterns that will timeout the fuzzer */ @@ -179,7 +214,7 @@ static int fuzz_sre_compile(const char* data, size_t size) { /* Some random patterns used to test re.match. Be careful not to add catostraphically slow regexes here, we want to - excercise the matchign code without causing timeouts.*/ + excercise the matching code without causing timeouts.*/ static const char* regex_patterns[] = { ".", "^", "abc", "abc|def", "^xxx$", "\\b", "()", "[a-zA-Z0-9]", "abc+", "[^A-Z]", "[x]", "(?=)", "a{z}", "a+b", "a*?", "a??", "a+?", @@ -188,6 +223,31 @@ static const char* regex_patterns[] = { }; const size_t NUM_PATTERNS = sizeof(regex_patterns) / sizeof(regex_patterns[0]); PyObject** compiled_patterns = NULL; +/* Called by LLVMFuzzerTestOneInput for initialization */ +static int init_sre_match() { + PyObject* re_module = PyImport_ImportModule("re"); + if (re_module == NULL) { + return 0; + } + compiled_patterns = (PyObject**) PyMem_RawMalloc( + sizeof(PyObject*) * NUM_PATTERNS); + if (compiled_patterns == NULL) { + PyErr_NoMemory(); + return 0; + } + + /* Precompile all the regex patterns on the first run for faster fuzzing */ + for (size_t i = 0; i < NUM_PATTERNS; i++) { + PyObject* compiled = PyObject_CallMethod( + re_module, "compile", "y", regex_patterns[i]); + /* Bail if any of the patterns fail to compile */ + if (compiled == NULL) { + return 0; + } + compiled_patterns[i] = compiled; + } + return 1; +} /* Fuzz re.match(x) */ static int fuzz_sre_match(const char* data, size_t size) { if (size < 1 || size > MAX_RE_TEST_SIZE) { @@ -216,9 +276,18 @@ static int fuzz_sre_match(const char* data, size_t size) { } #define MAX_CSV_TEST_SIZE 0x10000 -/* Initialized in LLVMFuzzerTestOneInput */ PyObject* csv_module = NULL; PyObject* csv_error = NULL; +/* Called by LLVMFuzzerTestOneInput for initialization */ +static int init_csv_reader() { + /* Import csv and csv.Error */ + csv_module = PyImport_ImportModule("csv"); + if (csv_module == NULL) { + return 0; + } + csv_error = PyObject_GetAttrString(csv_module, "Error"); + return csv_error != NULL; +} /* Fuzz csv.reader([x]) */ static int fuzz_csv_reader(const char* data, size_t size) { if (size < 1 || size > MAX_CSV_TEST_SIZE) { @@ -314,55 +383,47 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { rv |= _run_fuzz(data, size, fuzz_builtin_unicode); #endif #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads) - /* Import json.loads */ - if (json_loads_method == NULL) { - PyObject* json_module = PyImport_ImportModule("json"); - json_loads_method = PyObject_GetAttrString(json_module, "loads"); + static int JSON_LOADS_INITIALIZED = 0; + if (!JSON_LOADS_INITIALIZED && !init_json_loads()) { + PyErr_Print(); + abort(); + } else { + JSON_LOADS_INITIALIZED = 1; } rv |= _run_fuzz(data, size, fuzz_json_loads); #endif #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_compile) - /* Impore sre_compile.compile and sre.error */ - if (sre_compile_method == NULL) { - PyObject* sre_compile_module = PyImport_ImportModule("sre_compile"); - sre_compile_method = PyObject_GetAttrString(sre_compile_module, "compile"); - - PyObject* sre_constants = PyImport_ImportModule("sre_constants"); - sre_error_exception = PyObject_GetAttrString(sre_constants, "error"); - SRE_FLAG_DEBUG = PyLong_AsLong( - PyObject_GetAttrString(sre_constants, "SRE_FLAG_DEBUG")); + static int SRE_COMPILE_INITIALIZED = 0; + if (!SRE_COMPILE_INITIALIZED && !init_sre_compile()) { + PyErr_Print(); + abort(); + } else { + SRE_COMPILE_INITIALIZED = 1; } rv |= _run_fuzz(data, size, fuzz_sre_compile); #endif #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_match) - /* Precompile all the regex patterns on the first run for faster fuzzing */ - if (compiled_patterns == NULL) { - PyObject* re_module = PyImport_ImportModule("re"); - compiled_patterns = (PyObject**) PyMem_RawMalloc( - sizeof(PyObject*) * NUM_PATTERNS); - - for (size_t i = 0; i < NUM_PATTERNS; i++) { - PyObject* compiled = PyObject_CallMethod( - re_module, "compile", "y", regex_patterns[i]); - - if (compiled == NULL) { - PyErr_Print(); - abort(); - } - compiled_patterns[i] = compiled; - } + static int SRE_MATCH_INITIALIZED = 0; + if (!SRE_MATCH_INITIALIZED && !init_sre_match()) { + PyErr_Print(); + abort(); + } else { + SRE_MATCH_INITIALIZED = 1; } rv |= _run_fuzz(data, size, fuzz_sre_match); #endif #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_csv_reader) - /* Import csv and csv.Error */ - if (csv_module == NULL) { - csv_module = PyImport_ImportModule("csv"); - csv_error = PyObject_GetAttrString(csv_module, "Error"); + static int CSV_READER_INITIALIZED = 0; + if (!CSV_READER_INITIALIZED && !init_csv_reader()) { + PyErr_Print(); + abort(); + } else { + CSV_READER_INITIALIZED = 1; } + rv |= _run_fuzz(data, size, fuzz_csv_reader); #endif return rv; From 664161eae75c7f969a51c6f829b8cb3fb00f11fc Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Fri, 28 Jun 2019 02:38:59 -0700 Subject: [PATCH 4/5] return 0 on init_json_loads error. --- Modules/_xxtestfuzz/fuzzer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c index ed812da97faa4d..352104d6a94f0c 100644 --- a/Modules/_xxtestfuzz/fuzzer.c +++ b/Modules/_xxtestfuzz/fuzzer.c @@ -87,7 +87,7 @@ static int init_json_loads() { /* Import json.loads */ PyObject* json_module = PyImport_ImportModule("json"); if (json_module == NULL) { - return 1; + return 0; } json_loads_method = PyObject_GetAttrString(json_module, "loads"); return json_loads_method != NULL; From 9f70de8590cc8582cb8b85eb0e8d95ee930cfebb Mon Sep 17 00:00:00 2001 From: Ammar Askar Date: Fri, 28 Jun 2019 06:07:55 -0400 Subject: [PATCH 5/5] Collect up conditionals for some exceptions --- Modules/_xxtestfuzz/fuzzer.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c index 352104d6a94f0c..16104e492ab103 100644 --- a/Modules/_xxtestfuzz/fuzzer.c +++ b/Modules/_xxtestfuzz/fuzzer.c @@ -105,19 +105,18 @@ static int fuzz_json_loads(const char* data, size_t size) { return 0; } PyObject* parsed = PyObject_CallFunctionObjArgs(json_loads_method, input_bytes, NULL); - /* Ignore ValueError as the fuzzer will more than likely - generate some invalid json and values */ - if (parsed == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) { - PyErr_Clear(); - } - /* Ignore RecursionError as the fuzzer generates long sequences of - arrays such as `[[[...` */ - if (parsed == NULL && PyErr_ExceptionMatches(PyExc_RecursionError)) { - PyErr_Clear(); - } - /* Ignore unicode errors, invalid byte sequences are common */ - if (parsed == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { - PyErr_Clear(); + if (parsed == NULL) { + /* Ignore ValueError as the fuzzer will more than likely + generate some invalid json and values */ + if (PyErr_ExceptionMatches(PyExc_ValueError) || + /* Ignore RecursionError as the fuzzer generates long sequences of + arrays such as `[[[...` */ + PyErr_ExceptionMatches(PyExc_RecursionError) || + /* Ignore unicode errors, invalid byte sequences are common */ + PyErr_ExceptionMatches(PyExc_UnicodeDecodeError) + ) { + PyErr_Clear(); + } } Py_DECREF(input_bytes); Py_XDECREF(parsed); @@ -192,13 +191,10 @@ static int fuzz_sre_compile(const char* data, size_t size) { } /* Ignore some common errors thrown by sre_parse: Overflow, Assertion and Index */ - if (compiled == NULL && PyErr_ExceptionMatches(PyExc_OverflowError)) { - PyErr_Clear(); - } - if (compiled == NULL && PyErr_ExceptionMatches(PyExc_AssertionError)) { - PyErr_Clear(); - } - if (compiled == NULL && PyErr_ExceptionMatches(PyExc_IndexError)) { + if (compiled == NULL && (PyErr_ExceptionMatches(PyExc_OverflowError) || + PyErr_ExceptionMatches(PyExc_AssertionError) || + PyErr_ExceptionMatches(PyExc_IndexError)) + ) { PyErr_Clear(); } /* Ignore re.error */