From 8316e8d79094b4b54156895ce5bddfb01b65e743 Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Fri, 21 Jul 2017 00:25:51 -0700 Subject: [PATCH 01/23] Add basic fuzz tests for a few common builtin functions. This is an easy place to start, and these functions are probably safe. Please review with it in mind that I want to add more fuzz tests later. While the fuzz tests are included in CPython and compiled / tested on a very basic level inside CPython itself, the actual fuzzing happens as part of oss-fuzz (https://github.com/google/oss-fuzz). The reason to include the tests in CPython is to make sure that they're maintained as part of the CPython project, especially when (as some will) they use internal implementation details in the test. (This will be necessary sometimes because e.g. the fuzz test should never enter Python's interpreter loop, whereas some APIs only expose themselves publicly as Python functions.) This particular set of changes is part of testing Python's builtins, tracked internally at Google by b/37562550. --- Lib/test/test_fuzz.py | 15 ++++++ Modules/_fuzz/README.rst | 26 ++++++++++ Modules/_fuzz/_fuzzmodule.c | 70 ++++++++++++++++++++++++++ Modules/_fuzz/fuzz_builtin_float.inc | 12 +++++ Modules/_fuzz/fuzz_builtin_hash.inc | 4 ++ Modules/_fuzz/fuzz_builtin_int.inc | 29 +++++++++++ Modules/_fuzz/fuzz_builtin_unicode.inc | 9 ++++ setup.py | 3 ++ 8 files changed, 168 insertions(+) create mode 100644 Lib/test/test_fuzz.py create mode 100644 Modules/_fuzz/README.rst create mode 100644 Modules/_fuzz/_fuzzmodule.c create mode 100644 Modules/_fuzz/fuzz_builtin_float.inc create mode 100644 Modules/_fuzz/fuzz_builtin_hash.inc create mode 100644 Modules/_fuzz/fuzz_builtin_int.inc create mode 100644 Modules/_fuzz/fuzz_builtin_unicode.inc diff --git a/Lib/test/test_fuzz.py b/Lib/test/test_fuzz.py new file mode 100644 index 00000000000000..deccbf9d905ca5 --- /dev/null +++ b/Lib/test/test_fuzz.py @@ -0,0 +1,15 @@ +import unittest +from test import support +import _fuzz + +class TestFuzz(unittest.TestCase): + + def test_fuzz(self): + """Run the fuzz tests on blank input. + + This isn't meaningful and only checks it doesn't crash. + """ + _fuzz._fuzz_run_all() + +if __name__ == "__main__": + support.run_unittest(TestFuzz) diff --git a/Modules/_fuzz/README.rst b/Modules/_fuzz/README.rst new file mode 100644 index 00000000000000..01e9c3810e4692 --- /dev/null +++ b/Modules/_fuzz/README.rst @@ -0,0 +1,26 @@ +Fuzz Tests for CPython +====================== + +These fuzz tests are designed to be included in Google's `oss-fuzz`_ project. + +Adding a new fuzz test +---------------------- + +In a file named ``$test_name.inc``, add a function with the same name:: + + int $test_name (const char* data, size_t size) { + ... + return 0; + } + + +This function will be called from ``LLVMFuzzerTestOneInput`` for fuzz testing +when run in oss-fuzz. + +What makes a good fuzz test +--------------------------- + +Libraries written in C that might handle untrusted data are worthwhile. See the +existing examples for reference, and refer to the `oss-fuzz`_ docs. + +.. _oss-fuzz: https://github.com/google/oss-fuzz diff --git a/Modules/_fuzz/_fuzzmodule.c b/Modules/_fuzz/_fuzzmodule.c new file mode 100644 index 00000000000000..36fe24a3f118a1 --- /dev/null +++ b/Modules/_fuzz/_fuzzmodule.c @@ -0,0 +1,70 @@ +#include +#include +#include + +// List all fuzz functions here, and in the _fuzz_run_all function. +#include "fuzz_builtin_hash.inc" +#include "fuzz_builtin_int.inc" +#include "fuzz_builtin_float.inc" +#include "fuzz_builtin_unicode.inc" + +// Runs fuzzer and returns nonzero if an error occurred. +int _run_fuzz(int(*fuzzer)(const char* , size_t)) { + int rv = fuzzer("", 0); + if (PyErr_Occurred()) { + return 1; + } + if (rv != 0) { + PyErr_Format( + PyExc_RuntimeError, "Nonzero return code from fuzzer: %d", rv); + return 1; + } + return 0; +} + + +static PyObject* _fuzz_run_all(PyObject* self) { +#define _Py_FUZZ_STRINGIZE(x) _Py_FUZZ_STRINGIZE2(x) +#define _Py_FUZZ_STRINGIZE2(x) #x +#define _Py_FUZZ_RUN(f) \ + do {\ + printf("%s()\n", _Py_FUZZ_STRINGIZE(f));\ + if (_run_fuzz(f)) return NULL; \ + } while (0) + _Py_FUZZ_RUN(fuzz_builtin_hash); + _Py_FUZZ_RUN(fuzz_builtin_int); + _Py_FUZZ_RUN(fuzz_builtin_float); + _Py_FUZZ_RUN(fuzz_builtin_unicode); +#undef _Py_FUZZ_RUN +#undef _Py_FUZZ_STRINGIZE +#undef _Py_FUZZ_STRINGIZE2 + Py_RETURN_NONE; +} + +static PyMethodDef module_methods[] = { + {"_fuzz_run_all", (PyCFunction)_fuzz_run_all, METH_NOARGS, ""}, + {NULL}, +}; + +static struct PyModuleDef _fuzzmodule = { + PyModuleDef_HEAD_INIT, + "_fuzz", + NULL, + 0, + module_methods, + NULL, + NULL, + NULL, + NULL +}; + +PyMODINIT_FUNC +PyInit__fuzz(void) +{ + PyObject *m = NULL; + + if ((m = PyModule_Create(&_fuzzmodule)) == NULL) { + return NULL; + } + return m; +} diff --git a/Modules/_fuzz/fuzz_builtin_float.inc b/Modules/_fuzz/fuzz_builtin_float.inc new file mode 100644 index 00000000000000..bd9a729c461d22 --- /dev/null +++ b/Modules/_fuzz/fuzz_builtin_float.inc @@ -0,0 +1,12 @@ +int fuzz_builtin_float(const char* data, size_t size) { + PyObject* s = PyBytes_FromStringAndSize(data, size); + if (s == NULL) return 0; + PyObject* f = PyFloat_FromString(s); + if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) { + PyErr_Clear(); + } + + Py_XDECREF(f); + Py_DECREF(s); + return 0; +} diff --git a/Modules/_fuzz/fuzz_builtin_hash.inc b/Modules/_fuzz/fuzz_builtin_hash.inc new file mode 100644 index 00000000000000..3d3da25a1b5b40 --- /dev/null +++ b/Modules/_fuzz/fuzz_builtin_hash.inc @@ -0,0 +1,4 @@ +int fuzz_builtin_hash(const char* data, size_t size) { + _Py_HashBytes(data, size); + return 0; +} diff --git a/Modules/_fuzz/fuzz_builtin_int.inc b/Modules/_fuzz/fuzz_builtin_int.inc new file mode 100644 index 00000000000000..8bd10c49604c88 --- /dev/null +++ b/Modules/_fuzz/fuzz_builtin_int.inc @@ -0,0 +1,29 @@ +// Fuzz PyLong_FromUnicodeObject as a proxy for int(). +int fuzz_builtin_int(const char* data, size_t size) { + int base = _Py_HashBytes(data, size) % 36; + if (base == 1) { + base = 0; + } + if (base == -1) { + return 0; // An error occurred, bail early. + } + if (base < 0) { + base = -base; + } + + PyObject* s = PyUnicode_FromStringAndSize(data, size); + if (PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + PyErr_Clear(); + } + return 0; + } + PyObject* l = PyLong_FromUnicodeObject(s, base); + if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) { + PyErr_Clear(); + } + PyErr_Clear(); + Py_XDECREF(l); + Py_DECREF(s); + return 0; +} diff --git a/Modules/_fuzz/fuzz_builtin_unicode.inc b/Modules/_fuzz/fuzz_builtin_unicode.inc new file mode 100644 index 00000000000000..5281cc39b4e0cb --- /dev/null +++ b/Modules/_fuzz/fuzz_builtin_unicode.inc @@ -0,0 +1,9 @@ +// Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). +int fuzz_builtin_unicode(const char* data, size_t size) { + PyObject* s = PyUnicode_FromStringAndSize(data, size); + if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + PyErr_Clear(); + } + Py_XDECREF(s); + return 0; +} diff --git a/setup.py b/setup.py index 4f0f522bf8f06b..05dd2fb71f87cc 100644 --- a/setup.py +++ b/setup.py @@ -715,6 +715,9 @@ def detect_modules(self): # syslog daemon interface exts.append( Extension('syslog', ['syslogmodule.c']) ) + # Fuzz tests. + exts.append( Extension('_fuzz', ['_fuzz/_fuzzmodule.c'], optional=False) ) + # # Here ends the simple stuff. From here on, modules need certain # libraries, are platform-specific, or present other surprises. From f47f875915c95f1fded9df244383534b410d81ec Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Tue, 25 Jul 2017 14:24:55 -0700 Subject: [PATCH 02/23] Remove fuzzing of hash() per comment by kcc / oss-fuzz-team. https://github.com/google/oss-fuzz/pull/731 --- Modules/_fuzz/fuzz_builtin_hash.inc | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 Modules/_fuzz/fuzz_builtin_hash.inc diff --git a/Modules/_fuzz/fuzz_builtin_hash.inc b/Modules/_fuzz/fuzz_builtin_hash.inc deleted file mode 100644 index 3d3da25a1b5b40..00000000000000 --- a/Modules/_fuzz/fuzz_builtin_hash.inc +++ /dev/null @@ -1,4 +0,0 @@ -int fuzz_builtin_hash(const char* data, size_t size) { - _Py_HashBytes(data, size); - return 0; -} From d112252d711763c22bd10214f379e68a7ea33fc8 Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Tue, 25 Jul 2017 16:08:44 -0700 Subject: [PATCH 03/23] Move LLVMFuzzerTestOneInput into cpython and tweak how test discovery occurs. I'm still not happy with how many times I need to repeat the fuzz test name. This can go wrong way too easily. :/ --- Lib/test/test_fuzz.py | 2 +- Modules/_fuzz/README.rst | 20 +++-- Modules/_fuzz/_fuzzmodule.c | 37 ++------ Modules/_fuzz/fuzz_builtin_float.inc | 12 --- Modules/_fuzz/fuzz_builtin_int.inc | 29 ------- Modules/_fuzz/fuzz_builtin_unicode.inc | 9 -- Modules/_fuzz/fuzz_tests.txt | 3 + Modules/_fuzz/fuzzer.c | 115 +++++++++++++++++++++++++ setup.py | 6 +- 9 files changed, 146 insertions(+), 87 deletions(-) delete mode 100644 Modules/_fuzz/fuzz_builtin_float.inc delete mode 100644 Modules/_fuzz/fuzz_builtin_int.inc delete mode 100644 Modules/_fuzz/fuzz_builtin_unicode.inc create mode 100644 Modules/_fuzz/fuzz_tests.txt create mode 100644 Modules/_fuzz/fuzzer.c diff --git a/Lib/test/test_fuzz.py b/Lib/test/test_fuzz.py index deccbf9d905ca5..79b456ae2ba836 100644 --- a/Lib/test/test_fuzz.py +++ b/Lib/test/test_fuzz.py @@ -9,7 +9,7 @@ def test_fuzz(self): This isn't meaningful and only checks it doesn't crash. """ - _fuzz._fuzz_run_all() + _fuzz.run() if __name__ == "__main__": support.run_unittest(TestFuzz) diff --git a/Modules/_fuzz/README.rst b/Modules/_fuzz/README.rst index 01e9c3810e4692..313432f114290b 100644 --- a/Modules/_fuzz/README.rst +++ b/Modules/_fuzz/README.rst @@ -6,7 +6,9 @@ These fuzz tests are designed to be included in Google's `oss-fuzz`_ project. Adding a new fuzz test ---------------------- -In a file named ``$test_name.inc``, add a function with the same name:: +Add the test name on a new line in ``fuzz_tests.txt``. + +In ``fuzzer.c``, add a function to be run:: int $test_name (const char* data, size_t size) { ... @@ -14,13 +16,21 @@ In a file named ``$test_name.inc``, add a function with the same name:: } -This function will be called from ``LLVMFuzzerTestOneInput`` for fuzz testing -when run in oss-fuzz. +And invoke it from ``LLVMFuzzerTestOneInput``:: + + #if _Py_FUZZ_YES(fuzz_builtin_float) + rv |= _run_fuzz(data, size, fuzz_builtin_float); + #endif + +``LLVMFuzzerTestOneInput`` will run in oss-fuzz, with each test in +``fuzz_tests.txt`` run separately. What makes a good fuzz test --------------------------- -Libraries written in C that might handle untrusted data are worthwhile. See the -existing examples for reference, and refer to the `oss-fuzz`_ docs. +Libraries written in C that might handle untrusted data are worthwhile. The +more complex the logic (e.g. parsing), the more likely this is to be a useful +fuzz test. See the existing examples for reference, and refer to the +`oss-fuzz`_ docs. .. _oss-fuzz: https://github.com/google/oss-fuzz diff --git a/Modules/_fuzz/_fuzzmodule.c b/Modules/_fuzz/_fuzzmodule.c index 36fe24a3f118a1..0bcb20cc596855 100644 --- a/Modules/_fuzz/_fuzzmodule.c +++ b/Modules/_fuzz/_fuzzmodule.c @@ -2,47 +2,24 @@ #include #include -// List all fuzz functions here, and in the _fuzz_run_all function. -#include "fuzz_builtin_hash.inc" -#include "fuzz_builtin_int.inc" -#include "fuzz_builtin_float.inc" -#include "fuzz_builtin_unicode.inc" +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size); -// Runs fuzzer and returns nonzero if an error occurred. -int _run_fuzz(int(*fuzzer)(const char* , size_t)) { - int rv = fuzzer("", 0); +static PyObject* _fuzz_run(PyObject* self) { + int rv = LLVMFuzzerTestOneInput((const uint8_t*)"", 0); if (PyErr_Occurred()) { - return 1; + return NULL; } if (rv != 0) { + // Nonzero return codes are reserved for future use. PyErr_Format( PyExc_RuntimeError, "Nonzero return code from fuzzer: %d", rv); - return 1; + return NULL; } - return 0; -} - - -static PyObject* _fuzz_run_all(PyObject* self) { -#define _Py_FUZZ_STRINGIZE(x) _Py_FUZZ_STRINGIZE2(x) -#define _Py_FUZZ_STRINGIZE2(x) #x -#define _Py_FUZZ_RUN(f) \ - do {\ - printf("%s()\n", _Py_FUZZ_STRINGIZE(f));\ - if (_run_fuzz(f)) return NULL; \ - } while (0) - _Py_FUZZ_RUN(fuzz_builtin_hash); - _Py_FUZZ_RUN(fuzz_builtin_int); - _Py_FUZZ_RUN(fuzz_builtin_float); - _Py_FUZZ_RUN(fuzz_builtin_unicode); -#undef _Py_FUZZ_RUN -#undef _Py_FUZZ_STRINGIZE -#undef _Py_FUZZ_STRINGIZE2 Py_RETURN_NONE; } static PyMethodDef module_methods[] = { - {"_fuzz_run_all", (PyCFunction)_fuzz_run_all, METH_NOARGS, ""}, + {"run", (PyCFunction)_fuzz_run, METH_NOARGS, ""}, {NULL}, }; diff --git a/Modules/_fuzz/fuzz_builtin_float.inc b/Modules/_fuzz/fuzz_builtin_float.inc deleted file mode 100644 index bd9a729c461d22..00000000000000 --- a/Modules/_fuzz/fuzz_builtin_float.inc +++ /dev/null @@ -1,12 +0,0 @@ -int fuzz_builtin_float(const char* data, size_t size) { - PyObject* s = PyBytes_FromStringAndSize(data, size); - if (s == NULL) return 0; - PyObject* f = PyFloat_FromString(s); - if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) { - PyErr_Clear(); - } - - Py_XDECREF(f); - Py_DECREF(s); - return 0; -} diff --git a/Modules/_fuzz/fuzz_builtin_int.inc b/Modules/_fuzz/fuzz_builtin_int.inc deleted file mode 100644 index 8bd10c49604c88..00000000000000 --- a/Modules/_fuzz/fuzz_builtin_int.inc +++ /dev/null @@ -1,29 +0,0 @@ -// Fuzz PyLong_FromUnicodeObject as a proxy for int(). -int fuzz_builtin_int(const char* data, size_t size) { - int base = _Py_HashBytes(data, size) % 36; - if (base == 1) { - base = 0; - } - if (base == -1) { - return 0; // An error occurred, bail early. - } - if (base < 0) { - base = -base; - } - - PyObject* s = PyUnicode_FromStringAndSize(data, size); - if (PyErr_Occurred()) { - if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { - PyErr_Clear(); - } - return 0; - } - PyObject* l = PyLong_FromUnicodeObject(s, base); - if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) { - PyErr_Clear(); - } - PyErr_Clear(); - Py_XDECREF(l); - Py_DECREF(s); - return 0; -} diff --git a/Modules/_fuzz/fuzz_builtin_unicode.inc b/Modules/_fuzz/fuzz_builtin_unicode.inc deleted file mode 100644 index 5281cc39b4e0cb..00000000000000 --- a/Modules/_fuzz/fuzz_builtin_unicode.inc +++ /dev/null @@ -1,9 +0,0 @@ -// Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). -int fuzz_builtin_unicode(const char* data, size_t size) { - PyObject* s = PyUnicode_FromStringAndSize(data, size); - if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { - PyErr_Clear(); - } - Py_XDECREF(s); - return 0; -} diff --git a/Modules/_fuzz/fuzz_tests.txt b/Modules/_fuzz/fuzz_tests.txt new file mode 100644 index 00000000000000..2e53bfdc71619c --- /dev/null +++ b/Modules/_fuzz/fuzz_tests.txt @@ -0,0 +1,3 @@ +fuzz_builtin_float +fuzz_builtin_int +fuzz_builtin_unicode diff --git a/Modules/_fuzz/fuzzer.c b/Modules/_fuzz/fuzzer.c new file mode 100644 index 00000000000000..ffcaf6411d77c2 --- /dev/null +++ b/Modules/_fuzz/fuzzer.c @@ -0,0 +1,115 @@ +// A fuzz test for CPython. +// +// The only exposed function is LLVMFuzzerTestOneInput, which is called by +// fuzzers and by the _fuzz module for smoke tests. +// +// To build exactly one fuzz test, as when running in oss-fuzz etc., +// build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_. e.g. to build +// LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with +// -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float. +// +// See the source code for LLVMFuzzerTestOneInput for details. + +#include +#include +#include + +// Fuzz PyFloat_FromString as a proxy for float(str). +static int fuzz_builtin_float(const char* data, size_t size) { + PyObject* s = PyBytes_FromStringAndSize(data, size); + if (s == NULL) return 0; + PyObject* f = PyFloat_FromString(s); + if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) { + PyErr_Clear(); + } + + Py_XDECREF(f); + Py_DECREF(s); + return 0; +} + +// Fuzz PyLong_FromUnicodeObject as a proxy for int(str). +static int fuzz_builtin_int(const char* data, size_t size) { + int base = _Py_HashBytes(data, size) % 36; + if (base == 1) { + base = 0; + } + if (base == -1) { + return 0; // An error occurred, bail early. + } + if (base < 0) { + base = -base; + } + + PyObject* s = PyUnicode_FromStringAndSize(data, size); + if (PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + PyErr_Clear(); + } + return 0; + } + PyObject* l = PyLong_FromUnicodeObject(s, base); + if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) { + PyErr_Clear(); + } + PyErr_Clear(); + Py_XDECREF(l); + Py_DECREF(s); + return 0; +} + +// Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). +static int fuzz_builtin_unicode(const char* data, size_t size) { + PyObject* s = PyUnicode_FromStringAndSize(data, size); + if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + PyErr_Clear(); + } + Py_XDECREF(s); + return 0; +} + +// Run fuzzer and abort on failure. +static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) { + int rv = fuzzer("", 0); + if (PyErr_Occurred()) { + // Fuzz tests should handle expected errors for themselves. + PyErr_Print(); + abort(); + } + // Someday the return value might mean something, propagate it. + return rv; +} + +// CPython generates a lot of leak warnings for whatever reason. +int __lsan_is_turned_off(void) { return 1; } + +// Fuzz test interface. +// This returns the bitwise or of all fuzz test's return values. +// +// All fuzz tests must return 0, as all nonzero return codes are reserved for +// future use -- we propagate the return values for that future case. +// (And we bitwise or when running multiple tests to verify that normally we +// only return 0.) +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + if (!Py_IsInitialized()) { + // LLVMFuzzerTestOneInput is called repeatedly from the same process, with + // no separate initialization phase, sadly, so we need to initialize CPython + // ourselves on the first run. + Py_InitializeEx(0); + } + + int rv = 0; + +#define _Py_FUZZ_YES(test_name) (defined(_Py_FUZZ_##test_name) || !defined(_Py_FUZZ_ONE)) +#if _Py_FUZZ_YES(fuzz_builtin_float) + rv |= _run_fuzz(data, size, fuzz_builtin_float); +#endif +#if _Py_FUZZ_YES(fuzz_builtin_int) + rv |= _run_fuzz(data, size, fuzz_builtin_int); +#endif +#if _Py_FUZZ_YES(fuzz_builtin_unicode) + rv |= _run_fuzz(data, size, fuzz_builtin_unicode); +#endif +#undef _Py_FUZZ_YES + return rv; +} \ No newline at end of file diff --git a/setup.py b/setup.py index 05dd2fb71f87cc..c795b1650cda13 100644 --- a/setup.py +++ b/setup.py @@ -716,7 +716,11 @@ def detect_modules(self): exts.append( Extension('syslog', ['syslogmodule.c']) ) # Fuzz tests. - exts.append( Extension('_fuzz', ['_fuzz/_fuzzmodule.c'], optional=False) ) + exts.append( Extension( + '_fuzz', + ['_fuzz/_fuzzmodule.c', '_fuzz/fuzzer.c'], + optional=False) + ) # # Here ends the simple stuff. From here on, modules need certain From cb9cdc08127d20c57f3d9e738ca7773230bf5851 Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Tue, 25 Jul 2017 16:47:11 -0700 Subject: [PATCH 04/23] Move the fuzzer to C++ so that it builds. It's possible there's a way to compile this with clang and then link it with clang++, but I don't know how to do that. Specifically, compilation fails with this: $CC $CFLAGS \ -D _Py_FUZZ_ONE -D _Py_FUZZ_$fuzz_test \ -Wno-unused-function \ $($OUT/bin/python3-config --cflags) -g -O1 \ $FUZZ_DIR/fuzzer.c -o $OUT/$fuzz_test -lFuzzingEngine \ $($OUT/bin/python3-config --ldflags) With errors like: /usr/local/bin/../include/c++/v1/new:234: undefined reference to `operator delete(void*)' But it works just fine with this: $CXX $CXXFLAGS \ -D _Py_FUZZ_ONE -D _Py_FUZZ_$fuzz_test \ -Wno-unused-function \ $($OUT/bin/python3-config --cflags) -g -O1 \ $FUZZ_DIR/fuzzer.c -o $OUT/$fuzz_test -lFuzzingEngine \ $($OUT/bin/python3-config --ldflags) Presumably there are ways to do this in C, so for expediency I'm doing it in C++ right now, with the minimal possible C++ changes (extern "C"), so that if I need to change it back to C in code review, it won't be too hard. --- Modules/_fuzz/{fuzzer.c => fuzzer.cpp} | 9 ++++++--- setup.py | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) rename Modules/_fuzz/{fuzzer.c => fuzzer.cpp} (93%) diff --git a/Modules/_fuzz/fuzzer.c b/Modules/_fuzz/fuzzer.cpp similarity index 93% rename from Modules/_fuzz/fuzzer.c rename to Modules/_fuzz/fuzzer.cpp index ffcaf6411d77c2..2a9bf0787ba506 100644 --- a/Modules/_fuzz/fuzzer.c +++ b/Modules/_fuzz/fuzzer.cpp @@ -1,5 +1,8 @@ // A fuzz test for CPython. // +// Unusually for CPython, this is written in C++ for the benefit of linking with +// libFuzzer. +// // The only exposed function is LLVMFuzzerTestOneInput, which is called by // fuzzers and by the _fuzz module for smoke tests. // @@ -81,7 +84,7 @@ static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* } // CPython generates a lot of leak warnings for whatever reason. -int __lsan_is_turned_off(void) { return 1; } +extern "C" int __lsan_is_turned_off(void) { return 1; } // Fuzz test interface. // This returns the bitwise or of all fuzz test's return values. @@ -90,7 +93,7 @@ int __lsan_is_turned_off(void) { return 1; } // future use -- we propagate the return values for that future case. // (And we bitwise or when running multiple tests to verify that normally we // only return 0.) -int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { if (!Py_IsInitialized()) { // LLVMFuzzerTestOneInput is called repeatedly from the same process, with // no separate initialization phase, sadly, so we need to initialize CPython @@ -112,4 +115,4 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { #endif #undef _Py_FUZZ_YES return rv; -} \ No newline at end of file +} diff --git a/setup.py b/setup.py index c795b1650cda13..7d8d26968e5e73 100644 --- a/setup.py +++ b/setup.py @@ -718,7 +718,7 @@ def detect_modules(self): # Fuzz tests. exts.append( Extension( '_fuzz', - ['_fuzz/_fuzzmodule.c', '_fuzz/fuzzer.c'], + ['_fuzz/_fuzzmodule.c', '_fuzz/fuzzer.cpp'], optional=False) ) From 7028614379bf37a585c08b153088fc527fddb3fc Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Tue, 25 Jul 2017 17:11:33 -0700 Subject: [PATCH 05/23] Run the fuzz smoke tests on a little more input, just for kicks. (i.e. just to get increased confidence we won't immediately crash on fuzzing.) I'm using s# because I'd like to minimize the diff between Python 2 and 3. --- Lib/test/test_fuzz.py | 9 +++++++-- Modules/_fuzz/_fuzzmodule.c | 11 ++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_fuzz.py b/Lib/test/test_fuzz.py index 79b456ae2ba836..77ce455566ec63 100644 --- a/Lib/test/test_fuzz.py +++ b/Lib/test/test_fuzz.py @@ -5,11 +5,16 @@ class TestFuzz(unittest.TestCase): def test_fuzz(self): - """Run the fuzz tests on blank input. + """Run the fuzz tests on sample input. This isn't meaningful and only checks it doesn't crash. """ - _fuzz.run() + _fuzz.run(b"") + _fuzz.run(b"\0") + _fuzz.run(b"{") + _fuzz.run(b" ") + _fuzz.run(b"x") + _fuzz.run(b"1") if __name__ == "__main__": support.run_unittest(TestFuzz) diff --git a/Modules/_fuzz/_fuzzmodule.c b/Modules/_fuzz/_fuzzmodule.c index 0bcb20cc596855..397f1a57a684c2 100644 --- a/Modules/_fuzz/_fuzzmodule.c +++ b/Modules/_fuzz/_fuzzmodule.c @@ -4,8 +4,13 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size); -static PyObject* _fuzz_run(PyObject* self) { - int rv = LLVMFuzzerTestOneInput((const uint8_t*)"", 0); +static PyObject* _fuzz_run(PyObject* self, PyObject* args) { + const char* buf; + size_t size; + if (!PyArg_ParseTuple(args, "s#", &buf, &size)) { + return NULL; + } + int rv = LLVMFuzzerTestOneInput((const uint8_t*)buf, size); if (PyErr_Occurred()) { return NULL; } @@ -19,7 +24,7 @@ static PyObject* _fuzz_run(PyObject* self) { } static PyMethodDef module_methods[] = { - {"run", (PyCFunction)_fuzz_run, METH_NOARGS, ""}, + {"run", (PyCFunction)_fuzz_run, METH_VARARGS, ""}, {NULL}, }; From 2b34f0744a1f5f61da3cb41d70799deca402d7a0 Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Tue, 25 Jul 2017 17:24:35 -0700 Subject: [PATCH 06/23] Make the _fuzz module optional. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7d8d26968e5e73..3a49f18b95bb3b 100644 --- a/setup.py +++ b/setup.py @@ -719,7 +719,7 @@ def detect_modules(self): exts.append( Extension( '_fuzz', ['_fuzz/_fuzzmodule.c', '_fuzz/fuzzer.cpp'], - optional=False) + optional=True) ) # From f77be6500b45a884c0584e47195cecb8144eafc8 Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Tue, 25 Jul 2017 17:38:57 -0700 Subject: [PATCH 07/23] Actually run the fuzz tests... (Screwed up my refactoring when I moved the fuzz definition to CPython.) --- Modules/_fuzz/fuzzer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_fuzz/fuzzer.cpp b/Modules/_fuzz/fuzzer.cpp index 2a9bf0787ba506..cae6d94ceea54d 100644 --- a/Modules/_fuzz/fuzzer.cpp +++ b/Modules/_fuzz/fuzzer.cpp @@ -73,7 +73,7 @@ static int fuzz_builtin_unicode(const char* data, size_t size) { // Run fuzzer and abort on failure. static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) { - int rv = fuzzer("", 0); + int rv = fuzzer(data, size); if (PyErr_Occurred()) { // Fuzz tests should handle expected errors for themselves. PyErr_Print(); From 778c8277a9a781fd5c0323d93c06941c184afb57 Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Tue, 25 Jul 2017 21:17:43 -0700 Subject: [PATCH 08/23] Use unittest.main instead of test.support. --- Lib/test/test_fuzz.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/test/test_fuzz.py b/Lib/test/test_fuzz.py index 77ce455566ec63..aa947f9a84273d 100644 --- a/Lib/test/test_fuzz.py +++ b/Lib/test/test_fuzz.py @@ -1,5 +1,4 @@ import unittest -from test import support import _fuzz class TestFuzz(unittest.TestCase): @@ -17,4 +16,4 @@ def test_fuzz(self): _fuzz.run(b"1") if __name__ == "__main__": - support.run_unittest(TestFuzz) + unittest.main() From bc3d33f7dce84cd069092a46f07aa907903255e9 Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Wed, 26 Jul 2017 17:49:52 -0700 Subject: [PATCH 09/23] Use C-style comments. (For porting to Python 2.7 and C89). --- Modules/_fuzz/fuzzer.cpp | 62 ++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/Modules/_fuzz/fuzzer.cpp b/Modules/_fuzz/fuzzer.cpp index cae6d94ceea54d..272215d94ec72d 100644 --- a/Modules/_fuzz/fuzzer.cpp +++ b/Modules/_fuzz/fuzzer.cpp @@ -1,23 +1,23 @@ -// A fuzz test for CPython. -// -// Unusually for CPython, this is written in C++ for the benefit of linking with -// libFuzzer. -// -// The only exposed function is LLVMFuzzerTestOneInput, which is called by -// fuzzers and by the _fuzz module for smoke tests. -// -// To build exactly one fuzz test, as when running in oss-fuzz etc., -// build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_. e.g. to build -// LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with -// -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float. -// -// See the source code for LLVMFuzzerTestOneInput for details. +/* A fuzz test for CPython. + + Unusually for CPython, this is written in C++ for the benefit of linking with + libFuzzer. + + The only exposed function is LLVMFuzzerTestOneInput, which is called by + fuzzers and by the _fuzz module for smoke tests. + + To build exactly one fuzz test, as when running in oss-fuzz etc., + build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_. e.g. to build + LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with + -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float. + + See the source code for LLVMFuzzerTestOneInput for details. */ #include #include #include -// Fuzz PyFloat_FromString as a proxy for float(str). +/* Fuzz PyFloat_FromString as a proxy for float(str). */ static int fuzz_builtin_float(const char* data, size_t size) { PyObject* s = PyBytes_FromStringAndSize(data, size); if (s == NULL) return 0; @@ -31,7 +31,7 @@ static int fuzz_builtin_float(const char* data, size_t size) { return 0; } -// Fuzz PyLong_FromUnicodeObject as a proxy for int(str). +/* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */ static int fuzz_builtin_int(const char* data, size_t size) { int base = _Py_HashBytes(data, size) % 36; if (base == 1) { @@ -61,7 +61,7 @@ static int fuzz_builtin_int(const char* data, size_t size) { return 0; } -// Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). +/* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */ static int fuzz_builtin_unicode(const char* data, size_t size) { PyObject* s = PyUnicode_FromStringAndSize(data, size); if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { @@ -71,33 +71,33 @@ static int fuzz_builtin_unicode(const char* data, size_t size) { return 0; } -// Run fuzzer and abort on failure. +/* Run fuzzer and abort on failure. */ static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) { int rv = fuzzer(data, size); if (PyErr_Occurred()) { - // Fuzz tests should handle expected errors for themselves. + /* Fuzz tests should handle expected errors for themselves. */ PyErr_Print(); abort(); } - // Someday the return value might mean something, propagate it. + /* Someday the return value might mean something, propagate it. */ return rv; } -// CPython generates a lot of leak warnings for whatever reason. +/* CPython generates a lot of leak warnings for whatever reason. */ extern "C" int __lsan_is_turned_off(void) { return 1; } -// Fuzz test interface. -// This returns the bitwise or of all fuzz test's return values. -// -// All fuzz tests must return 0, as all nonzero return codes are reserved for -// future use -- we propagate the return values for that future case. -// (And we bitwise or when running multiple tests to verify that normally we -// only return 0.) +/* Fuzz test interface. + This returns the bitwise or of all fuzz test's return values. + + All fuzz tests must return 0, as all nonzero return codes are reserved for + future use -- we propagate the return values for that future case. + (And we bitwise or when running multiple tests to verify that normally we + only return 0.) */ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { if (!Py_IsInitialized()) { - // LLVMFuzzerTestOneInput is called repeatedly from the same process, with - // no separate initialization phase, sadly, so we need to initialize CPython - // ourselves on the first run. + /* LLVMFuzzerTestOneInput is called repeatedly from the same process, + with no separate initialization phase, sadly, so we need to + initialize CPython ourselves on the first run. */ Py_InitializeEx(0); } From dee024fdebe19c7bac8c80004de479c92d3b7b59 Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Wed, 26 Jul 2017 17:50:55 -0700 Subject: [PATCH 10/23] Fix build break I accidentally introduced in f77be65. Not sure how I didn't notice it. :/ --- Modules/_fuzz/fuzzer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_fuzz/fuzzer.cpp b/Modules/_fuzz/fuzzer.cpp index 272215d94ec72d..f27aba410abb85 100644 --- a/Modules/_fuzz/fuzzer.cpp +++ b/Modules/_fuzz/fuzzer.cpp @@ -73,7 +73,7 @@ static int fuzz_builtin_unicode(const char* data, size_t size) { /* Run fuzzer and abort on failure. */ static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) { - int rv = fuzzer(data, size); + int rv = fuzzer((const char*) data, size); if (PyErr_Occurred()) { /* Fuzz tests should handle expected errors for themselves. */ PyErr_Print(); From 9a00b2376845b210076f30e00a086cd71c72dc14 Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Wed, 26 Jul 2017 17:55:31 -0700 Subject: [PATCH 11/23] Add a little detail on what the heck this stuff even is, in readme. --- Modules/_fuzz/README.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Modules/_fuzz/README.rst b/Modules/_fuzz/README.rst index 313432f114290b..e1c8a33006514b 100644 --- a/Modules/_fuzz/README.rst +++ b/Modules/_fuzz/README.rst @@ -3,6 +3,11 @@ Fuzz Tests for CPython These fuzz tests are designed to be included in Google's `oss-fuzz`_ project. +oss-fuzz works against a library exposing a function of the form +``int LLVMFuzzerTestOneInput(const uint8_t* data, size_t length)``. We provide +that library (``fuzzer.cpp``), and include a ``_fuzz`` module for testing with +some toy values -- no fuzzing occurs in Python's test suite. + Adding a new fuzz test ---------------------- From d5421305280de7ee6620ffc4e686ba2e3eb5ef99 Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Wed, 26 Jul 2017 17:56:48 -0700 Subject: [PATCH 12/23] s/fuzzer.c/fuzzer.cpp/ --- Modules/_fuzz/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_fuzz/README.rst b/Modules/_fuzz/README.rst index e1c8a33006514b..35fbb83d1c71fa 100644 --- a/Modules/_fuzz/README.rst +++ b/Modules/_fuzz/README.rst @@ -13,7 +13,7 @@ Adding a new fuzz test Add the test name on a new line in ``fuzz_tests.txt``. -In ``fuzzer.c``, add a function to be run:: +In ``fuzzer.cpp``, add a function to be run:: int $test_name (const char* data, size_t size) { ... From 9f16dd4c10e0f85ae918bbb4008a1cdc92d71067 Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Thu, 3 Aug 2017 10:40:37 -0700 Subject: [PATCH 13/23] fuzzer.cpp -> fuzzer.c in faith that I can make it build. (Despite not understanding how to compile C code.) --- Modules/_fuzz/README.rst | 4 ++-- Modules/_fuzz/{fuzzer.cpp => fuzzer.c} | 4 ++-- setup.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) rename Modules/_fuzz/{fuzzer.cpp => fuzzer.c} (96%) diff --git a/Modules/_fuzz/README.rst b/Modules/_fuzz/README.rst index 35fbb83d1c71fa..5cf24a54debe1f 100644 --- a/Modules/_fuzz/README.rst +++ b/Modules/_fuzz/README.rst @@ -5,7 +5,7 @@ These fuzz tests are designed to be included in Google's `oss-fuzz`_ project. oss-fuzz works against a library exposing a function of the form ``int LLVMFuzzerTestOneInput(const uint8_t* data, size_t length)``. We provide -that library (``fuzzer.cpp``), and include a ``_fuzz`` module for testing with +that library (``fuzzer.c``), and include a ``_fuzz`` module for testing with some toy values -- no fuzzing occurs in Python's test suite. Adding a new fuzz test @@ -13,7 +13,7 @@ Adding a new fuzz test Add the test name on a new line in ``fuzz_tests.txt``. -In ``fuzzer.cpp``, add a function to be run:: +In ``fuzzer.c``, add a function to be run:: int $test_name (const char* data, size_t size) { ... diff --git a/Modules/_fuzz/fuzzer.cpp b/Modules/_fuzz/fuzzer.c similarity index 96% rename from Modules/_fuzz/fuzzer.cpp rename to Modules/_fuzz/fuzzer.c index f27aba410abb85..d16a667d3d1246 100644 --- a/Modules/_fuzz/fuzzer.cpp +++ b/Modules/_fuzz/fuzzer.c @@ -84,7 +84,7 @@ static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* } /* CPython generates a lot of leak warnings for whatever reason. */ -extern "C" int __lsan_is_turned_off(void) { return 1; } +int __lsan_is_turned_off(void) { return 1; } /* Fuzz test interface. This returns the bitwise or of all fuzz test's return values. @@ -93,7 +93,7 @@ extern "C" int __lsan_is_turned_off(void) { return 1; } future use -- we propagate the return values for that future case. (And we bitwise or when running multiple tests to verify that normally we only return 0.) */ -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { if (!Py_IsInitialized()) { /* LLVMFuzzerTestOneInput is called repeatedly from the same process, with no separate initialization phase, sadly, so we need to diff --git a/setup.py b/setup.py index 3a49f18b95bb3b..009bc4f1de8d3f 100644 --- a/setup.py +++ b/setup.py @@ -718,7 +718,7 @@ def detect_modules(self): # Fuzz tests. exts.append( Extension( '_fuzz', - ['_fuzz/_fuzzmodule.c', '_fuzz/fuzzer.cpp'], + ['_fuzz/_fuzzmodule.c', '_fuzz/fuzzer.c'], optional=True) ) From aa6d7846be147095729959db7169da03385df0be Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Wed, 23 Aug 2017 15:58:42 -0700 Subject: [PATCH 14/23] Make _fuzz required, so that tests never fail on a successful build. --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 009bc4f1de8d3f..06a7078f43e634 100644 --- a/setup.py +++ b/setup.py @@ -718,8 +718,7 @@ def detect_modules(self): # Fuzz tests. exts.append( Extension( '_fuzz', - ['_fuzz/_fuzzmodule.c', '_fuzz/fuzzer.c'], - optional=True) + ['_fuzz/_fuzzmodule.c', '_fuzz/fuzzer.c']) ) # From fa0af7315ce183aa0f90c43845344f09591c3c71 Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Wed, 23 Aug 2017 16:19:04 -0700 Subject: [PATCH 15/23] Fix the windows test failure by just not testing on windows. :) Since it's now required in setup.py, we know that the test will run on linux. On Windows we don't really care if it runs or not -- the fuzz tests are on a linux machine as far as I'm aware -- and I don't have a windows box to mess with Visual Studio files in. Big thanks to zware on #python-dev for walking me through this. --- Lib/test/test_fuzz.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_fuzz.py b/Lib/test/test_fuzz.py index aa947f9a84273d..3840820e82eeea 100644 --- a/Lib/test/test_fuzz.py +++ b/Lib/test/test_fuzz.py @@ -1,5 +1,7 @@ +import test.support import unittest -import _fuzz + +_fuzz = test.support.import_module('_fuzz') class TestFuzz(unittest.TestCase): From 4af5c1137dd16620f197ed98b1d53a810f70d154 Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Wed, 23 Aug 2017 17:03:34 -0700 Subject: [PATCH 16/23] NEWS entry thingermajig. --- .../next/Security/2017-08-23-17-02-55.bpo-29505.BL6Yt8.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Security/2017-08-23-17-02-55.bpo-29505.BL6Yt8.rst diff --git a/Misc/NEWS.d/next/Security/2017-08-23-17-02-55.bpo-29505.BL6Yt8.rst b/Misc/NEWS.d/next/Security/2017-08-23-17-02-55.bpo-29505.BL6Yt8.rst new file mode 100644 index 00000000000000..9a0fb16f9ee635 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2017-08-23-17-02-55.bpo-29505.BL6Yt8.rst @@ -0,0 +1 @@ +Add fuzz tests for float(str), int(str), unicode(str); for oss-fuzz. From fb017edb88c4557130530e3a22254c54e475db51 Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Thu, 24 Aug 2017 11:15:30 -0700 Subject: [PATCH 17/23] Blacklist test_fuzz from coverage tests. It seems to fail with a MemoryError on some machines. (But not mine, so I can't debug. If anyone can debug, I'm sure this would be better off fixed than ignored...) See discussion on https://github.com/python/cpython/pull/2878 --- .travis.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index ba1e417c633af8..c895490fb1fec2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -62,7 +62,9 @@ matrix: ./venv/bin/python -m pip install -U coverage script: # Skip tests that re-run the entire test suite. - - ./venv/bin/python -m coverage run --pylib -m test -uall,-cpu -x test_multiprocessing_fork -x test_multiprocessing_forkserver -x test_multiprocessing_spawn + # Also skip test_fuzz, it seems to be broken sometimes, and the coverage + # results are not interesting. + - ./venv/bin/python -m coverage run --pylib -m test -uall,-cpu -x test_multiprocessing_fork -x test_multiprocessing_forkserver -x test_multiprocessing_spawn -x test_fuzz after_script: # Probably should be after_success once test suite updated to run under coverage.py. # Make the `coverage` command available to Codecov w/ a version of Python that can parse all source files. - source ./venv/bin/activate From c4eda5df13724096dc11a1bef77411f4204a641f Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Thu, 24 Aug 2017 16:59:10 -0700 Subject: [PATCH 18/23] Remove outdated comment. --- Modules/_fuzz/fuzzer.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/Modules/_fuzz/fuzzer.c b/Modules/_fuzz/fuzzer.c index d16a667d3d1246..747f48f46de861 100644 --- a/Modules/_fuzz/fuzzer.c +++ b/Modules/_fuzz/fuzzer.c @@ -1,8 +1,5 @@ /* A fuzz test for CPython. - Unusually for CPython, this is written in C++ for the benefit of linking with - libFuzzer. - The only exposed function is LLVMFuzzerTestOneInput, which is called by fuzzers and by the _fuzz module for smoke tests. From 83967f99324b3c0a5a7ec4794ce05db8a04e803e Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Thu, 24 Aug 2017 17:03:22 -0700 Subject: [PATCH 19/23] Fix incorrect mod for base (should be % 37), and document why we're taking it. --- Modules/_fuzz/fuzzer.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Modules/_fuzz/fuzzer.c b/Modules/_fuzz/fuzzer.c index 747f48f46de861..6760307a95b6e0 100644 --- a/Modules/_fuzz/fuzzer.c +++ b/Modules/_fuzz/fuzzer.c @@ -30,8 +30,12 @@ static int fuzz_builtin_float(const char* data, size_t size) { /* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */ static int fuzz_builtin_int(const char* data, size_t size) { - int base = _Py_HashBytes(data, size) % 36; + /* Pick a random valid base. (When the fuzzed function takes extra + parameters, it's somewhat normal to hash the input to generate those + parameters. We want to exercise all code paths, so we do so here.) */ + int base = _Py_HashBytes(data, size) % 37; if (base == 1) { + // 1 is the only number between 0 and 36 that is not a valid base. base = 0; } if (base == -1) { From 52dccc288b5fd0e3e2d64f649b8471b4ae5e9b53 Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Thu, 24 Aug 2017 17:07:00 -0700 Subject: [PATCH 20/23] Use more idiomatic NULL checks rather than PyErr_Occurred(). --- Modules/_fuzz/fuzzer.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Modules/_fuzz/fuzzer.c b/Modules/_fuzz/fuzzer.c index 6760307a95b6e0..36f721ee626164 100644 --- a/Modules/_fuzz/fuzzer.c +++ b/Modules/_fuzz/fuzzer.c @@ -46,14 +46,14 @@ static int fuzz_builtin_int(const char* data, size_t size) { } PyObject* s = PyUnicode_FromStringAndSize(data, size); - if (PyErr_Occurred()) { + if (s == NULL) { if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { PyErr_Clear(); } return 0; } PyObject* l = PyLong_FromUnicodeObject(s, base); - if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) { + if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) { PyErr_Clear(); } PyErr_Clear(); @@ -65,7 +65,7 @@ static int fuzz_builtin_int(const char* data, size_t size) { /* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */ static int fuzz_builtin_unicode(const char* data, size_t size) { PyObject* s = PyUnicode_FromStringAndSize(data, size); - if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { PyErr_Clear(); } Py_XDECREF(s); @@ -76,7 +76,8 @@ static int fuzz_builtin_unicode(const char* data, size_t size) { static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) { int rv = fuzzer((const char*) data, size); if (PyErr_Occurred()) { - /* Fuzz tests should handle expected errors for themselves. */ + /* Fuzz tests should handle expected errors for themselves. + This is last-ditch check in case they didn't. */ PyErr_Print(); abort(); } From 4327fd9a430271db9ef8578110f5115cb1a6eb0a Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Thu, 24 Aug 2017 17:09:49 -0700 Subject: [PATCH 21/23] Attempt to explain a little more why these exist / what the relationship is with oss-fuzz. --- Modules/_fuzz/README.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Modules/_fuzz/README.rst b/Modules/_fuzz/README.rst index 5cf24a54debe1f..b48f3c89a42bb6 100644 --- a/Modules/_fuzz/README.rst +++ b/Modules/_fuzz/README.rst @@ -8,6 +8,11 @@ oss-fuzz works against a library exposing a function of the form that library (``fuzzer.c``), and include a ``_fuzz`` module for testing with some toy values -- no fuzzing occurs in Python's test suite. +oss-fuzz will regularly pull from CPython, discover all the tests in +``fuzz_tests.txt``, and run them -- so adding a new test here means it will +automatically be run in oss-fuzz, while also being smoke-tested as part of +CPython's test suite. + Adding a new fuzz test ---------------------- From 6da8e979475e5e1c9dc2ca58263338d0b33f1854 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Tue, 5 Sep 2017 23:03:30 -0700 Subject: [PATCH 22/23] Renamed _fuzz to _xxtestfuzz and cleanup. Testing it resulted in a few cleanups such as PY_SSIZE_T_CLEAN being required to avoid from crashing in an opt build. --- Lib/test/test_fuzz.py | 21 ----------------- Lib/test/test_xxtestfuzz.py | 23 +++++++++++++++++++ Modules/{_fuzz => _xxtestfuzz}/README.rst | 0 .../_xxtestfuzz.c} | 5 ++-- Modules/{_fuzz => _xxtestfuzz}/fuzz_tests.txt | 0 Modules/{_fuzz => _xxtestfuzz}/fuzzer.c | 0 setup.py | 4 ++-- 7 files changed, 28 insertions(+), 25 deletions(-) delete mode 100644 Lib/test/test_fuzz.py create mode 100644 Lib/test/test_xxtestfuzz.py rename Modules/{_fuzz => _xxtestfuzz}/README.rst (100%) rename Modules/{_fuzz/_fuzzmodule.c => _xxtestfuzz/_xxtestfuzz.c} (93%) rename Modules/{_fuzz => _xxtestfuzz}/fuzz_tests.txt (100%) rename Modules/{_fuzz => _xxtestfuzz}/fuzzer.c (100%) diff --git a/Lib/test/test_fuzz.py b/Lib/test/test_fuzz.py deleted file mode 100644 index 3840820e82eeea..00000000000000 --- a/Lib/test/test_fuzz.py +++ /dev/null @@ -1,21 +0,0 @@ -import test.support -import unittest - -_fuzz = test.support.import_module('_fuzz') - -class TestFuzz(unittest.TestCase): - - def test_fuzz(self): - """Run the fuzz tests on sample input. - - This isn't meaningful and only checks it doesn't crash. - """ - _fuzz.run(b"") - _fuzz.run(b"\0") - _fuzz.run(b"{") - _fuzz.run(b" ") - _fuzz.run(b"x") - _fuzz.run(b"1") - -if __name__ == "__main__": - unittest.main() diff --git a/Lib/test/test_xxtestfuzz.py b/Lib/test/test_xxtestfuzz.py new file mode 100644 index 00000000000000..532f5fe72aa5f0 --- /dev/null +++ b/Lib/test/test_xxtestfuzz.py @@ -0,0 +1,23 @@ +import faulthandler +import test.support +import unittest + +_xxtestfuzz = test.support.import_module('_xxtestfuzz') + + +class TestFuzzer(unittest.TestCase): + """To keep our https://github.com/google/oss-fuzz API working.""" + + def test_sample_input_smoke_test(self): + """This is only a regression test: Check that it doesn't crash.""" + _xxtestfuzz.run(b"") + _xxtestfuzz.run(b"\0") + _xxtestfuzz.run(b"{") + _xxtestfuzz.run(b" ") + _xxtestfuzz.run(b"x") + _xxtestfuzz.run(b"1") + + +if __name__ == "__main__": + faulthandler.enable() + unittest.main() diff --git a/Modules/_fuzz/README.rst b/Modules/_xxtestfuzz/README.rst similarity index 100% rename from Modules/_fuzz/README.rst rename to Modules/_xxtestfuzz/README.rst diff --git a/Modules/_fuzz/_fuzzmodule.c b/Modules/_xxtestfuzz/_xxtestfuzz.c similarity index 93% rename from Modules/_fuzz/_fuzzmodule.c rename to Modules/_xxtestfuzz/_xxtestfuzz.c index 397f1a57a684c2..781dd23500a29e 100644 --- a/Modules/_fuzz/_fuzzmodule.c +++ b/Modules/_xxtestfuzz/_xxtestfuzz.c @@ -1,3 +1,4 @@ +#define PY_SSIZE_T_CLEAN #include #include #include @@ -6,7 +7,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size); static PyObject* _fuzz_run(PyObject* self, PyObject* args) { const char* buf; - size_t size; + Py_ssize_t size; if (!PyArg_ParseTuple(args, "s#", &buf, &size)) { return NULL; } @@ -41,7 +42,7 @@ static struct PyModuleDef _fuzzmodule = { }; PyMODINIT_FUNC -PyInit__fuzz(void) +PyInit__xxtestfuzz(void) { PyObject *m = NULL; diff --git a/Modules/_fuzz/fuzz_tests.txt b/Modules/_xxtestfuzz/fuzz_tests.txt similarity index 100% rename from Modules/_fuzz/fuzz_tests.txt rename to Modules/_xxtestfuzz/fuzz_tests.txt diff --git a/Modules/_fuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c similarity index 100% rename from Modules/_fuzz/fuzzer.c rename to Modules/_xxtestfuzz/fuzzer.c diff --git a/setup.py b/setup.py index 06a7078f43e634..16dfa231cb9b87 100644 --- a/setup.py +++ b/setup.py @@ -717,8 +717,8 @@ def detect_modules(self): # Fuzz tests. exts.append( Extension( - '_fuzz', - ['_fuzz/_fuzzmodule.c', '_fuzz/fuzzer.c']) + '_xxtestfuzz', + ['_xxtestfuzz/_xxtestfuzz.c', '_xxtestfuzz/fuzzer.c']) ) # From 43620ae3e2da3853909d65be5dec0c5bd678ce75 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Tue, 5 Sep 2017 23:25:24 -0700 Subject: [PATCH 23/23] don't skip test_fuzz (besides, it was renamed) That was just hiding the actual error in the first place that happened due to PY_SSIZE_T_CLEAN not being there. --- .travis.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index c895490fb1fec2..ba1e417c633af8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -62,9 +62,7 @@ matrix: ./venv/bin/python -m pip install -U coverage script: # Skip tests that re-run the entire test suite. - # Also skip test_fuzz, it seems to be broken sometimes, and the coverage - # results are not interesting. - - ./venv/bin/python -m coverage run --pylib -m test -uall,-cpu -x test_multiprocessing_fork -x test_multiprocessing_forkserver -x test_multiprocessing_spawn -x test_fuzz + - ./venv/bin/python -m coverage run --pylib -m test -uall,-cpu -x test_multiprocessing_fork -x test_multiprocessing_forkserver -x test_multiprocessing_spawn after_script: # Probably should be after_success once test suite updated to run under coverage.py. # Make the `coverage` command available to Codecov w/ a version of Python that can parse all source files. - source ./venv/bin/activate