From bbe54f6e89641dd02d6dd870a5e912bad33814c2 Mon Sep 17 00:00:00 2001
From: Ammar Askar <ammar@ammaraskar.com>
Date: Tue, 18 Jun 2019 03:43:41 -0400
Subject: [PATCH 1/5] bpo-29505: Add fuzzing for re.compile, re.load and
 csv.reader

---
 Lib/test/test_xxtestfuzz.py                   |   2 +
 .../dictionaries/fuzz_sre_compile.dict        | 219 ++++++++++++++++++
 .../fuzz_csv_reader_corpus/test.csv           | Bin 0 -> 118 bytes
 .../fuzz_sre_compile_corpus/anchor_links      |   1 +
 .../fuzz_sre_compile_corpus/characters        |   1 +
 .../_xxtestfuzz/fuzz_sre_compile_corpus/isbn  |   1 +
 .../fuzz_sre_compile_corpus/phone_number      |   1 +
 Modules/_xxtestfuzz/fuzz_tests.txt            |   3 +
 Modules/_xxtestfuzz/fuzzer.c                  | 204 +++++++++++++++-
 9 files changed, 426 insertions(+), 6 deletions(-)
 create mode 100644 Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict
 create mode 100644 Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv
 create mode 100644 Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links
 create mode 100644 Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters
 create mode 100644 Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn
 create mode 100644 Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number

diff --git a/Lib/test/test_xxtestfuzz.py b/Lib/test/test_xxtestfuzz.py
index 532f5fe72aa5f0..15924aaeff3851 100644
--- a/Lib/test/test_xxtestfuzz.py
+++ b/Lib/test/test_xxtestfuzz.py
@@ -16,6 +16,8 @@ def test_sample_input_smoke_test(self):
         _xxtestfuzz.run(b" ")
         _xxtestfuzz.run(b"x")
         _xxtestfuzz.run(b"1")
+        _xxtestfuzz.run(b"AAAAAAA")
+        _xxtestfuzz.run(b"AAAAAA\0")
 
 
 if __name__ == "__main__":
diff --git a/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict b/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict
new file mode 100644
index 00000000000000..961306a87901d0
--- /dev/null
+++ b/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict
@@ -0,0 +1,219 @@
+"?"
+"abc"
+"()"
+"[]"
+"abc|def"
+"abc|def|ghi"
+"^xxx$"
+"ab\\b\\d\\bcd"
+"\\w|\\d"
+"a*?"
+"abc+"
+"abc+?"
+"xyz?"
+"xyz??"
+"xyz{0,1}"
+"xyz{0,1}?"
+"xyz{93}"
+"xyz{1,32}"
+"xyz{1,32}?"
+"xyz{1,}"
+"xyz{1,}?"
+"a\\fb\\nc\\rd\\te\\vf"
+"a\\nb\\bc"
+"(?:foo)"
+"(?: foo )"
+"foo|(bar|baz)|quux"
+"foo(?=bar)baz"
+"foo(?!bar)baz"
+"foo(?<=bar)baz"
+"foo(?<!bar)baz"
+"()"
+"(?=)"
+"[]"
+"[x]"
+"[xyz]"
+"[a-zA-Z0-9]"
+"[-123]"
+"[^123]"
+"]"
+"}"
+"[a-b-c]"
+"[x\\dz]"
+"[\\d-z]"
+"[\\d-\\d]"
+"[z-\\d]"
+"\\cj\\cJ\\ci\\cI\\ck\\cK"
+"\\c!"
+"\\c_"
+"\\c~"
+"[\\c!]"
+"[\\c_]"
+"[\\c~]"
+"[\\ca]"
+"[\\cz]"
+"[\\cA]"
+"[\\cZ]"
+"[\\c1]"
+"\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ "
+"[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]"
+"\\8"
+"\\9"
+"\\11"
+"\\11a"
+"\\011"
+"\\118"
+"\\111"
+"\\1111"
+"(x)(x)(x)\\1"
+"(x)(x)(x)\\2"
+"(x)(x)(x)\\3"
+"(x)(x)(x)\\4"
+"(x)(x)(x)\\1*"
+"(x)(x)(x)\\3*"
+"(x)(x)(x)\\4*"
+"(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10"
+"(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11"
+"(a)\\1"
+"(a\\1)"
+"(\\1a)"
+"(\\2)(\\1)"
+"(?=a){0,10}a"
+"(?=a){1,10}a"
+"(?=a){9,10}a"
+"(?!a)?a"
+"\\1(a)"
+"(?!(a))\\1"
+"(?!\\1(a\\1)\\1)\\1"
+"\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1"
+"[\\0]"
+"[\\11]"
+"[\\11a]"
+"[\\011]"
+"[\\00011]"
+"[\\118]"
+"[\\111]"
+"[\\1111]"
+"\\x60"
+"\\x3z"
+"\\c"
+"\\u0034"
+"\\u003z"
+"foo[z]*"
+"\\u{12345}"
+"\\u{12345}\\u{23456}"
+"\\u{12345}{3}"
+"\\u{12345}*"
+"\\ud808\\udf45*"
+"[\\ud808\\udf45-\\ud809\\udccc]"
+"a"
+"a|b"
+"a\\n"
+"a$"
+"a\\b!"
+"a\\Bb"
+"a*?"
+"a?"
+"a??"
+"a{0,1}?"
+"a{1,2}?"
+"a+?"
+"(a)"
+"(a)\\1"
+"(\\1a)"
+"\\1(a)"
+"a\\s"
+"a\\S"
+"a\\D"
+"a\\w"
+"a\\W"
+"a."
+"a\\q"
+"a[a]"
+"a[^a]"
+"a[a-z]"
+"a(?:b)"
+"a(?=b)"
+"a(?!b)"
+"\\x60"
+"\\u0060"
+"\\cA"
+"\\q"
+"\\1112"
+"(a)\\1"
+"(?!a)?a\\1"
+"(?:(?=a))a\\1"
+"a{}"
+"a{,}"
+"a{"
+"a{z}"
+"a{12z}"
+"a{12,"
+"a{12,3b"
+"{}"
+"{,}"
+"{"
+"{z}"
+"{1z}"
+"{12,"
+"{12,3b"
+"a"
+"abc"
+"a[bc]d"
+"a|bc"
+"ab|c"
+"a||bc"
+"(?:ab)"
+"(?:ab|cde)"
+"(?:ab)|cde"
+"(ab)"
+"(ab|cde)"
+"(ab)\\1"
+"(ab|cde)\\1"
+"(?:ab)?"
+"(?:ab)+"
+"a?"
+"a+"
+"a??"
+"a*?"
+"a+?"
+"(?:a?)?"
+"(?:a+)?"
+"(?:a?)+"
+"(?:a*)+"
+"(?:a+)+"
+"(?:a?)*"
+"(?:a*)*"
+"(?:a+)*"
+"a{0}"
+"(?:a+){0,0}"
+"a*b"
+"a+b"
+"a*b|c"
+"a+b|c"
+"(?:a{5,1000000}){3,1000000}"
+"(?:ab){4,7}"
+"a\\bc"
+"a\\sc"
+"a\\Sc"
+"a(?=b)c"
+"a(?=bbb|bb)c"
+"a(?!bbb|bb)c"
+"\xe2\x81\xa3"
+"[\xe2\x81\xa3]"
+"\xed\xb0\x80"
+"\xed\xa0\x80"
+"(\xed\xb0\x80)\x01"
+"((\xed\xa0\x80))\x02"
+"\xf0\x9f\x92\xa9"
+"\x01"
+"\x0f"
+"[-\xf0\x9f\x92\xa9]+"
+"[\xf0\x9f\x92\xa9-\xf4\x8f\xbf\xbf]"
+"(?<=)"
+"(?<=a)"
+"(?<!)"
+"(?<!a)"
+"(?<a>)"
+"(?<a>.)"
+"(?<a>.)\\k<a>"
diff --git a/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv b/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv
new file mode 100644
index 0000000000000000000000000000000000000000..8b7887d0f1d2426354ec0d01fb06768604406dc0
GIT binary patch
literal 118
zcmXwwNeX~45ClEv6~mkq8$tbmUm11KfN_oBf`3;C)}bkAs#j@sr5t^b;+GP<?334)
x!owiHiXH6OQ8PIw8$OKn>Of(O5_Fsw=uRKQIfG-Vn@SIH^Pt&RC5#NlL@()@BrgB}

literal 0
HcmV?d00001

diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links
new file mode 100644
index 00000000000000..d99247ccadfd18
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links
@@ -0,0 +1 @@
+XX<a\s*href=(.*?)[\s|>]
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters
new file mode 100644
index 00000000000000..0c67ee7dfc1b5d
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters
@@ -0,0 +1 @@
+XX^(Tim|Robert)\s+the\s+(Enchanter|Shrubber)$
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn
new file mode 100644
index 00000000000000..cce8919e7285ce
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn
@@ -0,0 +1 @@
+XX/((978[\--– ])?[0-9][0-9\--– ]{10}[\--– ][0-9xX])|((978)?[0-9]{9}[0-9Xx])/
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number
new file mode 100644
index 00000000000000..1e2efc51103be0
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number
@@ -0,0 +1 @@
+XX(\+1|1)?[ \-\.]?\(?(?<areacode>[0-9]{3})\)?[ \-\.]?(?<prefix>[0-9]{3})[ \-\.]?(?<number>[0-9]{4})[ \.]*(ext|x)?[ \.]*(?<extension>[0-9]{0,5})
diff --git a/Modules/_xxtestfuzz/fuzz_tests.txt b/Modules/_xxtestfuzz/fuzz_tests.txt
index f0121291eaa015..9d330a668ee88b 100644
--- a/Modules/_xxtestfuzz/fuzz_tests.txt
+++ b/Modules/_xxtestfuzz/fuzz_tests.txt
@@ -2,3 +2,6 @@ fuzz_builtin_float
 fuzz_builtin_int
 fuzz_builtin_unicode
 fuzz_json_loads
+fuzz_sre_compile
+fuzz_sre_match
+fuzz_csv_reader
diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c
index e862a99cfb34ca..95a25f9b0c1f20 100644
--- a/Modules/_xxtestfuzz/fuzzer.c
+++ b/Modules/_xxtestfuzz/fuzzer.c
@@ -115,6 +115,155 @@ static int fuzz_json_loads(const char* data, size_t size) {
     return 0;
 }
 
+#define MAX_RE_TEST_SIZE 0x10000
+
+/* Initialized in LLVMFuzzerTestOneInput */
+PyObject* sre_compile_method = NULL;
+PyObject* sre_error_exception = NULL;
+int SRE_FLAG_DEBUG = 0;
+/* Fuzz _sre.compile(x) */
+static int fuzz_sre_compile(const char* data, size_t size) {
+    /* Ignore really long regex patterns that will timeout the fuzzer */
+    if (size > MAX_RE_TEST_SIZE) {
+        return 0;
+    }
+    /* We treat the first 2 bytes of the input as a number for the flags */
+    if (size < 2) {
+        return 0;
+    }
+    uint16_t flags = ((uint16_t*) data)[0];
+    /* We remove the SRE_FLAG_DEBUG if present. This is because it
+       prints to stdout which greatly decreases fuzzing speed */
+    flags &= ~SRE_FLAG_DEBUG;
+
+    /* Pull the pattern from the remaining bytes */
+    PyObject* pattern_bytes = PyBytes_FromStringAndSize(data + 2, size - 2);
+    if (pattern_bytes == NULL) {
+        return 0;
+    }
+    PyObject* flags_obj = PyLong_FromUnsignedLong(flags);
+    if (flags_obj == NULL) {
+        Py_DECREF(pattern_bytes);
+        return 0;
+    }
+
+    /* compiled = _sre.compile(data[2:], data[0:2] */
+    PyObject* compiled = PyObject_CallFunctionObjArgs(
+        sre_compile_method, pattern_bytes, flags_obj, NULL);
+    /* Ignore ValueError as the fuzzer will more than likely
+       generate some invalid combination of flags */
+    if (compiled == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
+        PyErr_Clear();
+    }
+    /* Ignore some common errors thrown by sre_parse:
+       Overflow, Assertion and Index */
+    if (compiled == NULL && PyErr_ExceptionMatches(PyExc_OverflowError)) {
+        PyErr_Clear();
+    }
+    if (compiled == NULL && PyErr_ExceptionMatches(PyExc_AssertionError)) {
+        PyErr_Clear();
+    }
+    if (compiled == NULL && PyErr_ExceptionMatches(PyExc_IndexError)) {
+        PyErr_Clear();
+    }
+    /* Ignore re.error */
+    if (compiled == NULL && PyErr_ExceptionMatches(sre_error_exception)) {
+        PyErr_Clear();
+    }
+
+    Py_DECREF(pattern_bytes);
+    Py_DECREF(flags_obj);
+    Py_XDECREF(compiled);
+    return 0;
+}
+
+/* Some random patterns used to test re.match.
+   Be careful not to add catostraphically slow regexes here, we want to
+   excercise the matchign code without causing timeouts.*/
+static const char* regex_patterns[] = {
+    ".", "^", "abc", "abc|def", "^xxx$", "\\b", "()", "[a-zA-Z0-9]",
+    "abc+", "[^A-Z]", "[x]", "(?=)", "a{z}", "a+b", "a*?", "a??", "a+?",
+    "{}", "a{,}", "{", "}", "^\\(*\\d{3}\\)*( |-)*\\d{3}( |-)*\\d{4}$",
+    "(?:a*)*", "a{1,2}?"
+};
+const size_t NUM_PATTERNS = sizeof(regex_patterns) / sizeof(regex_patterns[0]);
+PyObject** compiled_patterns = NULL;
+/* Fuzz re.match(x) */
+static int fuzz_sre_match(const char* data, size_t size) {
+    if (size < 1 || size > MAX_RE_TEST_SIZE) {
+        return 0;
+    }
+    /* Use the first byte as a uint8_t specifying the index of the
+       regex to use */
+    uint8_t idx = ((uint8_t*) data)[0];
+    idx = idx % NUM_PATTERNS;
+
+    /* Pull the string to match from the remaining bytes */
+    PyObject* to_match = PyBytes_FromStringAndSize(data + 1, size - 1);
+    if (to_match == NULL) {
+        return 0;
+    }
+
+    PyObject* pattern = compiled_patterns[idx];
+    PyObject* match_callable = PyObject_GetAttrString(pattern, "match");
+
+    PyObject* matches = PyObject_CallFunctionObjArgs(match_callable, to_match, NULL);
+
+    Py_XDECREF(matches);
+    Py_DECREF(match_callable);
+    Py_DECREF(to_match);
+    return 0;
+}
+
+#define MAX_CSV_TEST_SIZE 0x10000
+/* Initialized in LLVMFuzzerTestOneInput */
+PyObject* csv_module = NULL;
+PyObject* csv_error = NULL;
+/* Fuzz csv.reader([x]) */
+static int fuzz_csv_reader(const char* data, size_t size) {
+    if (size < 1 || size > MAX_CSV_TEST_SIZE) {
+        return 0;
+    }
+    /* Ignore non null-terminated strings since _csv can't handle
+       embeded nulls */
+    if (memchr(data, '\0', size) == NULL) {
+        return 0;
+    }
+
+    PyObject* s = PyUnicode_FromString(data);
+    /* Ignore exceptions until we have a valid string */
+    if (s == NULL) {
+        PyErr_Clear();
+        return 0;
+    }
+
+    /* Split on \n so we can test multiple lines */
+    PyObject* lines = PyObject_CallMethod(s, "split", "s", "\n");
+    if (lines == NULL) {
+        Py_DECREF(s);
+        return 0;
+    }
+
+    PyObject* reader = PyObject_CallMethod(csv_module, "reader", "N", lines);
+    if (reader) {
+        /* Consume all of the reader as an iterator */
+        PyObject* parsed_line;
+        while ((parsed_line = PyIter_Next(reader))) {
+            Py_DECREF(parsed_line);
+        }
+    }
+
+    /* Ignore csv.Error because we're probably going to generate
+       some bad files (embeded new-lines, unterminated quotes etc) */
+    if (PyErr_ExceptionMatches(csv_error)) {
+        PyErr_Clear();
+    }
+
+    Py_XDECREF(reader);
+    Py_DECREF(s);
+    return 0;
+}
+
 /* Run fuzzer and abort on failure. */
 static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
     int rv = fuzzer((const char*) data, size);
@@ -152,12 +301,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
            initialize CPython ourselves on the first run. */
         Py_InitializeEx(0);
     }
-#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
-    if (json_loads_method == NULL) {
-        PyObject* json_module = PyImport_ImportModule("json");
-        json_loads_method = PyObject_GetAttrString(json_module, "loads");
-    }
-#endif
 
     int rv = 0;
 
@@ -171,7 +314,56 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
     rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
 #endif
 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
+    /* Import json.loads */
+    if (json_loads_method == NULL) {
+        PyObject* json_module = PyImport_ImportModule("json");
+        json_loads_method = PyObject_GetAttrString(json_module, "loads");
+    }
+
     rv |= _run_fuzz(data, size, fuzz_json_loads);
+#endif
+#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_compile)
+    /* Impore sre_compile.compile and sre.error */
+    if (sre_compile_method == NULL) {
+        PyObject* sre_compile_module = PyImport_ImportModule("sre_compile");
+        sre_compile_method = PyObject_GetAttrString(sre_compile_module, "compile");
+
+        PyObject* sre_constants = PyImport_ImportModule("sre_constants");
+        sre_error_exception = PyObject_GetAttrString(sre_constants, "error");
+        SRE_FLAG_DEBUG = PyLong_AsLong(
+            PyObject_GetAttrString(sre_constants, "SRE_FLAG_DEBUG"));
+    }
+
+    rv |= _run_fuzz(data, size, fuzz_sre_compile);
+#endif
+#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_match)
+    /* Precompile all the regex patterns on the first run for faster fuzzing */
+    if (compiled_patterns == NULL) {
+        PyObject* re_module = PyImport_ImportModule("re");
+        compiled_patterns = (PyObject**) PyMem_RawMalloc(
+            sizeof(PyObject*) * NUM_PATTERNS);
+
+        for (size_t i = 0; i < NUM_PATTERNS; i++) {
+            PyObject* compiled = PyObject_CallMethod(
+                re_module, "compile", "y", regex_patterns[i]);
+
+            if (compiled == NULL) {
+                PyErr_Print();
+                abort();
+            }
+            compiled_patterns[i] = compiled;
+        }
+    }
+
+    rv |= _run_fuzz(data, size, fuzz_sre_match);
+#endif
+#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_csv_reader)
+    /* Import csv and csv.Error */
+    if (csv_module == NULL) {
+        csv_module = PyImport_ImportModule("csv");
+        csv_error = PyObject_GetAttrString(csv_module, "Error");
+    }
+    rv |= _run_fuzz(data, size, fuzz_csv_reader);
 #endif
   return rv;
 }

From f94cb1e7d9183a33a88716810e577100896e57ba Mon Sep 17 00:00:00 2001
From: Ammar Askar <aaskar@google.com>
Date: Wed, 26 Jun 2019 13:18:36 -0700
Subject: [PATCH 2/5] Use unsigned char for indexing

---
 Modules/_xxtestfuzz/fuzzer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c
index 95a25f9b0c1f20..1f783cde734bf8 100644
--- a/Modules/_xxtestfuzz/fuzzer.c
+++ b/Modules/_xxtestfuzz/fuzzer.c
@@ -195,7 +195,7 @@ static int fuzz_sre_match(const char* data, size_t size) {
     }
     /* Use the first byte as a uint8_t specifying the index of the
        regex to use */
-    uint8_t idx = ((uint8_t*) data)[0];
+    unsigned char idx = (unsigned char) data[0];
     idx = idx % NUM_PATTERNS;
 
     /* Pull the string to match from the remaining bytes */

From 7d0e0e2c50599fa993d1b74bf295dabaa9c69420 Mon Sep 17 00:00:00 2001
From: Ammar Askar <ammar@ammaraskar.com>
Date: Fri, 28 Jun 2019 05:19:53 -0400
Subject: [PATCH 3/5] Clean up initialization code to handle all errors

---
 Modules/_xxtestfuzz/fuzzer.c | 135 +++++++++++++++++++++++++----------
 1 file changed, 98 insertions(+), 37 deletions(-)

diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c
index 1f783cde734bf8..ed812da97faa4d 100644
--- a/Modules/_xxtestfuzz/fuzzer.c
+++ b/Modules/_xxtestfuzz/fuzzer.c
@@ -81,8 +81,17 @@ static int fuzz_builtin_unicode(const char* data, size_t size) {
 
 #define MAX_JSON_TEST_SIZE 0x10000
 
-/* Initialized in LLVMFuzzerTestOneInput */
 PyObject* json_loads_method = NULL;
+/* Called by LLVMFuzzerTestOneInput for initialization */
+static int init_json_loads() {
+    /* Import json.loads */
+    PyObject* json_module = PyImport_ImportModule("json");
+    if (json_module == NULL) {
+        return 1;
+    }
+    json_loads_method = PyObject_GetAttrString(json_module, "loads");
+    return json_loads_method != NULL;
+}
 /* Fuzz json.loads(x) */
 static int fuzz_json_loads(const char* data, size_t size) {
     /* Since python supports arbitrarily large ints in JSON,
@@ -117,10 +126,36 @@ static int fuzz_json_loads(const char* data, size_t size) {
 
 #define MAX_RE_TEST_SIZE 0x10000
 
-/* Initialized in LLVMFuzzerTestOneInput */
 PyObject* sre_compile_method = NULL;
 PyObject* sre_error_exception = NULL;
 int SRE_FLAG_DEBUG = 0;
+/* Called by LLVMFuzzerTestOneInput for initialization */
+static int init_sre_compile() {
+    /* Import sre_compile.compile and sre.error */
+    PyObject* sre_compile_module = PyImport_ImportModule("sre_compile");
+    if (sre_compile_module == NULL) {
+        return 0;
+    }
+    sre_compile_method = PyObject_GetAttrString(sre_compile_module, "compile");
+    if (sre_compile_method == NULL) {
+        return 0;
+    }
+
+    PyObject* sre_constants = PyImport_ImportModule("sre_constants");
+    if (sre_constants == NULL) {
+        return 0;
+    }
+    sre_error_exception = PyObject_GetAttrString(sre_constants, "error");
+    if (sre_error_exception == NULL) {
+        return 0;
+    }
+    PyObject* debug_flag = PyObject_GetAttrString(sre_constants, "SRE_FLAG_DEBUG");
+    if (debug_flag == NULL) {
+        return 0;
+    }
+    SRE_FLAG_DEBUG = PyLong_AsLong(debug_flag);
+    return 1;
+}
 /* Fuzz _sre.compile(x) */
 static int fuzz_sre_compile(const char* data, size_t size) {
     /* Ignore really long regex patterns that will timeout the fuzzer */
@@ -179,7 +214,7 @@ static int fuzz_sre_compile(const char* data, size_t size) {
 
 /* Some random patterns used to test re.match.
    Be careful not to add catostraphically slow regexes here, we want to
-   excercise the matchign code without causing timeouts.*/
+   excercise the matching code without causing timeouts.*/
 static const char* regex_patterns[] = {
     ".", "^", "abc", "abc|def", "^xxx$", "\\b", "()", "[a-zA-Z0-9]",
     "abc+", "[^A-Z]", "[x]", "(?=)", "a{z}", "a+b", "a*?", "a??", "a+?",
@@ -188,6 +223,31 @@ static const char* regex_patterns[] = {
 };
 const size_t NUM_PATTERNS = sizeof(regex_patterns) / sizeof(regex_patterns[0]);
 PyObject** compiled_patterns = NULL;
+/* Called by LLVMFuzzerTestOneInput for initialization */
+static int init_sre_match() {
+    PyObject* re_module = PyImport_ImportModule("re");
+    if (re_module == NULL) {
+        return 0;
+    }
+    compiled_patterns = (PyObject**) PyMem_RawMalloc(
+        sizeof(PyObject*) * NUM_PATTERNS);
+    if (compiled_patterns == NULL) {
+        PyErr_NoMemory();
+        return 0;
+    }
+
+    /* Precompile all the regex patterns on the first run for faster fuzzing */
+    for (size_t i = 0; i < NUM_PATTERNS; i++) {
+        PyObject* compiled = PyObject_CallMethod(
+            re_module, "compile", "y", regex_patterns[i]);
+        /* Bail if any of the patterns fail to compile */
+        if (compiled == NULL) {
+            return 0;
+        }
+        compiled_patterns[i] = compiled;
+    }
+    return 1;
+}
 /* Fuzz re.match(x) */
 static int fuzz_sre_match(const char* data, size_t size) {
     if (size < 1 || size > MAX_RE_TEST_SIZE) {
@@ -216,9 +276,18 @@ static int fuzz_sre_match(const char* data, size_t size) {
 }
 
 #define MAX_CSV_TEST_SIZE 0x10000
-/* Initialized in LLVMFuzzerTestOneInput */
 PyObject* csv_module = NULL;
 PyObject* csv_error = NULL;
+/* Called by LLVMFuzzerTestOneInput for initialization */
+static int init_csv_reader() {
+    /* Import csv and csv.Error */
+    csv_module = PyImport_ImportModule("csv");
+    if (csv_module == NULL) {
+        return 0;
+    }
+    csv_error = PyObject_GetAttrString(csv_module, "Error");
+    return csv_error != NULL;
+}
 /* Fuzz csv.reader([x]) */
 static int fuzz_csv_reader(const char* data, size_t size) {
     if (size < 1 || size > MAX_CSV_TEST_SIZE) {
@@ -314,55 +383,47 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
     rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
 #endif
 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
-    /* Import json.loads */
-    if (json_loads_method == NULL) {
-        PyObject* json_module = PyImport_ImportModule("json");
-        json_loads_method = PyObject_GetAttrString(json_module, "loads");
+    static int JSON_LOADS_INITIALIZED = 0;
+    if (!JSON_LOADS_INITIALIZED && !init_json_loads()) {
+        PyErr_Print();
+        abort();
+    } else {
+        JSON_LOADS_INITIALIZED = 1;
     }
 
     rv |= _run_fuzz(data, size, fuzz_json_loads);
 #endif
 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_compile)
-    /* Impore sre_compile.compile and sre.error */
-    if (sre_compile_method == NULL) {
-        PyObject* sre_compile_module = PyImport_ImportModule("sre_compile");
-        sre_compile_method = PyObject_GetAttrString(sre_compile_module, "compile");
-
-        PyObject* sre_constants = PyImport_ImportModule("sre_constants");
-        sre_error_exception = PyObject_GetAttrString(sre_constants, "error");
-        SRE_FLAG_DEBUG = PyLong_AsLong(
-            PyObject_GetAttrString(sre_constants, "SRE_FLAG_DEBUG"));
+    static int SRE_COMPILE_INITIALIZED = 0;
+    if (!SRE_COMPILE_INITIALIZED && !init_sre_compile()) {
+        PyErr_Print();
+        abort();
+    } else {
+        SRE_COMPILE_INITIALIZED = 1;
     }
 
     rv |= _run_fuzz(data, size, fuzz_sre_compile);
 #endif
 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_match)
-    /* Precompile all the regex patterns on the first run for faster fuzzing */
-    if (compiled_patterns == NULL) {
-        PyObject* re_module = PyImport_ImportModule("re");
-        compiled_patterns = (PyObject**) PyMem_RawMalloc(
-            sizeof(PyObject*) * NUM_PATTERNS);
-
-        for (size_t i = 0; i < NUM_PATTERNS; i++) {
-            PyObject* compiled = PyObject_CallMethod(
-                re_module, "compile", "y", regex_patterns[i]);
-
-            if (compiled == NULL) {
-                PyErr_Print();
-                abort();
-            }
-            compiled_patterns[i] = compiled;
-        }
+    static int SRE_MATCH_INITIALIZED = 0;
+    if (!SRE_MATCH_INITIALIZED && !init_sre_match()) {
+        PyErr_Print();
+        abort();
+    } else {
+        SRE_MATCH_INITIALIZED = 1;
     }
 
     rv |= _run_fuzz(data, size, fuzz_sre_match);
 #endif
 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_csv_reader)
-    /* Import csv and csv.Error */
-    if (csv_module == NULL) {
-        csv_module = PyImport_ImportModule("csv");
-        csv_error = PyObject_GetAttrString(csv_module, "Error");
+    static int CSV_READER_INITIALIZED = 0;
+    if (!CSV_READER_INITIALIZED && !init_csv_reader()) {
+        PyErr_Print();
+        abort();
+    } else {
+        CSV_READER_INITIALIZED = 1;
     }
+
     rv |= _run_fuzz(data, size, fuzz_csv_reader);
 #endif
   return rv;

From 664161eae75c7f969a51c6f829b8cb3fb00f11fc Mon Sep 17 00:00:00 2001
From: "Gregory P. Smith" <greg@krypto.org>
Date: Fri, 28 Jun 2019 02:38:59 -0700
Subject: [PATCH 4/5] return 0 on init_json_loads error.

---
 Modules/_xxtestfuzz/fuzzer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c
index ed812da97faa4d..352104d6a94f0c 100644
--- a/Modules/_xxtestfuzz/fuzzer.c
+++ b/Modules/_xxtestfuzz/fuzzer.c
@@ -87,7 +87,7 @@ static int init_json_loads() {
     /* Import json.loads */
     PyObject* json_module = PyImport_ImportModule("json");
     if (json_module == NULL) {
-        return 1;
+        return 0;
     }
     json_loads_method = PyObject_GetAttrString(json_module, "loads");
     return json_loads_method != NULL;

From 9f70de8590cc8582cb8b85eb0e8d95ee930cfebb Mon Sep 17 00:00:00 2001
From: Ammar Askar <ammar@ammaraskar.com>
Date: Fri, 28 Jun 2019 06:07:55 -0400
Subject: [PATCH 5/5] Collect up conditionals for some exceptions

---
 Modules/_xxtestfuzz/fuzzer.c | 36 ++++++++++++++++--------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c
index 352104d6a94f0c..16104e492ab103 100644
--- a/Modules/_xxtestfuzz/fuzzer.c
+++ b/Modules/_xxtestfuzz/fuzzer.c
@@ -105,19 +105,18 @@ static int fuzz_json_loads(const char* data, size_t size) {
         return 0;
     }
     PyObject* parsed = PyObject_CallFunctionObjArgs(json_loads_method, input_bytes, NULL);
-    /* Ignore ValueError as the fuzzer will more than likely
-       generate some invalid json and values */
-    if (parsed == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
-        PyErr_Clear();
-    }
-    /* Ignore RecursionError as the fuzzer generates long sequences of
-       arrays such as `[[[...` */
-    if (parsed == NULL && PyErr_ExceptionMatches(PyExc_RecursionError)) {
-        PyErr_Clear();
-    }
-    /* Ignore unicode errors, invalid byte sequences are common */
-    if (parsed == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
-        PyErr_Clear();
+    if (parsed == NULL) {
+        /* Ignore ValueError as the fuzzer will more than likely
+           generate some invalid json and values */
+        if (PyErr_ExceptionMatches(PyExc_ValueError) ||
+        /* Ignore RecursionError as the fuzzer generates long sequences of
+           arrays such as `[[[...` */
+            PyErr_ExceptionMatches(PyExc_RecursionError) ||
+        /* Ignore unicode errors, invalid byte sequences are common */
+            PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)
+        ) {
+            PyErr_Clear();
+        }
     }
     Py_DECREF(input_bytes);
     Py_XDECREF(parsed);
@@ -192,13 +191,10 @@ static int fuzz_sre_compile(const char* data, size_t size) {
     }
     /* Ignore some common errors thrown by sre_parse:
        Overflow, Assertion and Index */
-    if (compiled == NULL && PyErr_ExceptionMatches(PyExc_OverflowError)) {
-        PyErr_Clear();
-    }
-    if (compiled == NULL && PyErr_ExceptionMatches(PyExc_AssertionError)) {
-        PyErr_Clear();
-    }
-    if (compiled == NULL && PyErr_ExceptionMatches(PyExc_IndexError)) {
+    if (compiled == NULL && (PyErr_ExceptionMatches(PyExc_OverflowError) ||
+                             PyErr_ExceptionMatches(PyExc_AssertionError) ||
+                             PyErr_ExceptionMatches(PyExc_IndexError))
+    ) {
         PyErr_Clear();
     }
     /* Ignore re.error */