python · gpshead · Sep 6, 2017 · Jul 21, 2017 · Jul 25, 2017 · Jul 25, 2017
diff --git a/Lib/test/test_xxtestfuzz.py b/Lib/test/test_xxtestfuzz.py
@@ -0,0 +1,23 @@
+import faulthandler
+import test.support
+import unittest
+
+_xxtestfuzz = test.support.import_module('_xxtestfuzz')
+
+
+class TestFuzzer(unittest.TestCase):
+    """To keep our https://github.com/google/oss-fuzz API working."""
+
+    def test_sample_input_smoke_test(self):
+        """This is only a regression test: Check that it doesn't crash."""
+        _xxtestfuzz.run(b"")
+        _xxtestfuzz.run(b"\0")
+        _xxtestfuzz.run(b"{")
+        _xxtestfuzz.run(b" ")
+        _xxtestfuzz.run(b"x")
+        _xxtestfuzz.run(b"1")
+
+
+if __name__ == "__main__":
+    faulthandler.enable()
+    unittest.main()
diff --git a/Misc/NEWS.d/next/Security/2017-08-23-17-02-55.bpo-29505.BL6Yt8.rst b/Misc/NEWS.d/next/Security/2017-08-23-17-02-55.bpo-29505.BL6Yt8.rst
@@ -0,0 +1 @@
+Add fuzz tests for float(str), int(str), unicode(str); for oss-fuzz.
diff --git a/Modules/_xxtestfuzz/README.rst b/Modules/_xxtestfuzz/README.rst
@@ -0,0 +1,46 @@
+Fuzz Tests for CPython
+======================
+
+These fuzz tests are designed to be included in Google's `oss-fuzz`_ project.
+
+oss-fuzz works against a library exposing a function of the form
+``int LLVMFuzzerTestOneInput(const uint8_t* data, size_t length)``. We provide
+that library (``fuzzer.c``), and include a ``_fuzz`` module for testing with
+some toy values -- no fuzzing occurs in Python's test suite.
+
+oss-fuzz will regularly pull from CPython, discover all the tests in
+``fuzz_tests.txt``, and run them -- so adding a new test here means it will
+automatically be run in oss-fuzz, while also being smoke-tested as part of
+CPython's test suite.
+
+Adding a new fuzz test
+----------------------
+
+Add the test name on a new line in ``fuzz_tests.txt``.
+
+In ``fuzzer.c``, add a function to be run::
+
+    int $test_name (const char* data, size_t size) {
+        ...
+        return 0;
+    }
+
+
+And invoke it from ``LLVMFuzzerTestOneInput``::
+
+    #if _Py_FUZZ_YES(fuzz_builtin_float)
+        rv |= _run_fuzz(data, size, fuzz_builtin_float);
+    #endif
+
+``LLVMFuzzerTestOneInput`` will run in oss-fuzz, with each test in
+``fuzz_tests.txt`` run separately.
+
+What makes a good fuzz test
+---------------------------
+
+Libraries written in C that might handle untrusted data are worthwhile. The
+more complex the logic (e.g. parsing), the more likely this is to be a useful
+fuzz test. See the existing examples for reference, and refer to the
+`oss-fuzz`_ docs.
+
+.. _oss-fuzz: https://github.com/google/oss-fuzz
diff --git a/Modules/_xxtestfuzz/_xxtestfuzz.c b/Modules/_xxtestfuzz/_xxtestfuzz.c
@@ -0,0 +1,53 @@
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
+
+static PyObject* _fuzz_run(PyObject* self, PyObject* args) {
+    const char* buf;
+    Py_ssize_t size;
+    if (!PyArg_ParseTuple(args, "s#", &buf, &size)) {
+        return NULL;
+    }
+    int rv = LLVMFuzzerTestOneInput((const uint8_t*)buf, size);
+    if (PyErr_Occurred()) {
+        return NULL;
+    }
+    if (rv != 0) {
+        // Nonzero return codes are reserved for future use.
+        PyErr_Format(
+            PyExc_RuntimeError, "Nonzero return code from fuzzer: %d", rv);
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+static PyMethodDef module_methods[] = {
+    {"run", (PyCFunction)_fuzz_run, METH_VARARGS, ""},
+    {NULL},
+};
+
+static struct PyModuleDef _fuzzmodule = {
+        PyModuleDef_HEAD_INIT,
+        "_fuzz",
+        NULL,
+        0,
+        module_methods,
+        NULL,
+        NULL,
+        NULL,
+        NULL
+};
+
+PyMODINIT_FUNC
+PyInit__xxtestfuzz(void)
+{
+    PyObject *m = NULL;
+
+    if ((m = PyModule_Create(&_fuzzmodule)) == NULL) {
+        return NULL;
+    }
+    return m;
+}
diff --git a/Modules/_xxtestfuzz/fuzz_tests.txt b/Modules/_xxtestfuzz/fuzz_tests.txt
@@ -0,0 +1,3 @@
+fuzz_builtin_float
+fuzz_builtin_int
+fuzz_builtin_unicode
diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c
@@ -0,0 +1,120 @@
+/* A fuzz test for CPython.
+
+  The only exposed function is LLVMFuzzerTestOneInput, which is called by
+  fuzzers and by the _fuzz module for smoke tests.
+
+  To build exactly one fuzz test, as when running in oss-fuzz etc.,
+  build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build
+  LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with
+      -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float.
+
+  See the source code for LLVMFuzzerTestOneInput for details. */
+
+#include <Python.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+/*  Fuzz PyFloat_FromString as a proxy for float(str). */
+static int fuzz_builtin_float(const char* data, size_t size) {
+    PyObject* s = PyBytes_FromStringAndSize(data, size);
+    if (s == NULL) return 0;
+    PyObject* f = PyFloat_FromString(s);
+    if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) {
+        PyErr_Clear();
+    }
+
+    Py_XDECREF(f);
+    Py_DECREF(s);
+    return 0;
+}
+
+/* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */
+static int fuzz_builtin_int(const char* data, size_t size) {
+    /* Pick a random valid base. (When the fuzzed function takes extra
+       parameters, it's somewhat normal to hash the input to generate those
+       parameters. We want to exercise all code paths, so we do so here.) */
+    int base = _Py_HashBytes(data, size) % 37;
+    if (base == 1) {
+        // 1 is the only number between 0 and 36 that is not a valid base.
+        base = 0;
+    }
+    if (base == -1) {
+        return 0;  // An error occurred, bail early.
+    }
+    if (base < 0) {
+        base = -base;
+    }
+
+    PyObject* s = PyUnicode_FromStringAndSize(data, size);
+    if (s == NULL) {
+        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+            PyErr_Clear();
+        }
+        return 0;
+    }
+    PyObject* l = PyLong_FromUnicodeObject(s, base);
+    if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
+        PyErr_Clear();
+    }
+    PyErr_Clear();
+    Py_XDECREF(l);
+    Py_DECREF(s);
+    return 0;
+}
+
+/* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */
+static int fuzz_builtin_unicode(const char* data, size_t size) {
+    PyObject* s = PyUnicode_FromStringAndSize(data, size);
+    if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+        PyErr_Clear();
+    }
+    Py_XDECREF(s);
+    return 0;
+}
+
+/* Run fuzzer and abort on failure. */
+static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
+    int rv = fuzzer((const char*) data, size);
+    if (PyErr_Occurred()) {
+        /* Fuzz tests should handle expected errors for themselves.
+           This is last-ditch check in case they didn't. */
+        PyErr_Print();
+        abort();
+    }
+    /* Someday the return value might mean something, propagate it. */
+    return rv;
+}
+
+/* CPython generates a lot of leak warnings for whatever reason. */
+int __lsan_is_turned_off(void) { return 1; }
+
+/* Fuzz test interface.
+   This returns the bitwise or of all fuzz test's return values.
+
+   All fuzz tests must return 0, as all nonzero return codes are reserved for
+   future use -- we propagate the return values for that future case.
+   (And we bitwise or when running multiple tests to verify that normally we
+   only return 0.) */
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+    if (!Py_IsInitialized()) {
+        /* LLVMFuzzerTestOneInput is called repeatedly from the same process,
+           with no separate initialization phase, sadly, so we need to
+           initialize CPython ourselves on the first run. */
+        Py_InitializeEx(0);
+    }
+
+    int rv = 0;
+
+#define _Py_FUZZ_YES(test_name) (defined(_Py_FUZZ_##test_name) || !defined(_Py_FUZZ_ONE))
+#if _Py_FUZZ_YES(fuzz_builtin_float)
+    rv |= _run_fuzz(data, size, fuzz_builtin_float);
+#endif
+#if _Py_FUZZ_YES(fuzz_builtin_int)
+    rv |= _run_fuzz(data, size, fuzz_builtin_int);
+#endif
+#if _Py_FUZZ_YES(fuzz_builtin_unicode)
+    rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
+#endif
+#undef _Py_FUZZ_YES
+  return rv;
+}
diff --git a/setup.py b/setup.py
@@ -715,6 +715,12 @@ def detect_modules(self):
         # syslog daemon interface
         exts.append( Extension('syslog', ['syslogmodule.c']) )
 
+        # Fuzz tests.
+        exts.append( Extension(
+            '_xxtestfuzz',
+            ['_xxtestfuzz/_xxtestfuzz.c', '_xxtestfuzz/fuzzer.c'])
+        )
+
         #
         # Here ends the simple stuff.  From here on, modules need certain
         # libraries, are platform-specific, or present other surprises.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Add fuzz tests for float(str), int(str), unicode(str); for oss-fuzz.