From 04ac47b343b10f2182c4b3730d4be241b2397a4d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 16 Aug 2024 19:13:37 +0300 Subject: [PATCH 1/4] gh-123067: Fix quadratic complexity in parsing cookies with backslashes This fixes CVE-2024-7592. --- Lib/http/cookies.py | 34 ++++------------- Lib/test/test_http_cookies.py | 38 +++++++++++++++++++ ...-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst | 1 + 3 files changed, 47 insertions(+), 26 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py index 351faf428a20cd..11a67e8a2e008b 100644 --- a/Lib/http/cookies.py +++ b/Lib/http/cookies.py @@ -184,8 +184,12 @@ def _quote(str): return '"' + str.translate(_Translator) + '"' -_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") -_QuotePatt = re.compile(r"[\\].") +_unquote_re = re.compile(r'\\(?:([0-3][0-7][0-7])|(["\\]))') +def _unquote_replace(m): + if m[1]: + return chr(int(m[1], 8)) + else: + return m[2] def _unquote(str): # If there aren't any doublequotes, @@ -205,30 +209,8 @@ def _unquote(str): # \012 --> \n # \" --> " # - i = 0 - n = len(str) - res = [] - while 0 <= i < n: - o_match = _OctalPatt.search(str, i) - q_match = _QuotePatt.search(str, i) - if not o_match and not q_match: # Neither matched - res.append(str[i:]) - break - # else: - j = k = -1 - if o_match: - j = o_match.start(0) - if q_match: - k = q_match.start(0) - if q_match and (not o_match or k < j): # QuotePatt matched - res.append(str[i:k]) - res.append(str[k+1]) - i = k + 2 - else: # OctalPatt matched - res.append(str[i:j]) - res.append(chr(int(str[j+1:j+4], 8))) - i = j + 4 - return _nulljoin(res) + + return _unquote_re.sub(_unquote_replace, str) # The _getdate() routine is used to set the expiration time in the cookie's HTTP # header. By default, _getdate() returns the current time in the appropriate diff --git a/Lib/test/test_http_cookies.py b/Lib/test/test_http_cookies.py index 925c8697f60de6..13b526d49b0856 100644 --- a/Lib/test/test_http_cookies.py +++ b/Lib/test/test_http_cookies.py @@ -5,6 +5,7 @@ import doctest from http import cookies import pickle +from test import support class CookieTests(unittest.TestCase): @@ -58,6 +59,43 @@ def test_basic(self): for k, v in sorted(case['dict'].items()): self.assertEqual(C[k].value, v) + def test_unquote(self): + cases = [ + (r'a="b=\""', 'b="'), + (r'a="b=\\"', 'b=\\'), + (r'a="b=\="', 'b=\\='), + (r'a="b=\n"', 'b=\\n'), + (r'a="b=\042"', 'b="'), + (r'a="b=\134"', 'b=\\'), + (r'a="b=\377"', 'b=\xff'), + (r'a="b=\400"', 'b=\\400'), + (r'a="b=\42"', 'b=\\42'), + (r'a="b=\\042"', 'b=\\042'), + (r'a="b=\\134"', 'b=\\134'), + (r'a="b=\\\""', 'b=\\"'), + (r'a="b=\\\042"', 'b=\\"'), + (r'a="b=\134\""', 'b=\\"'), + (r'a="b=\134\042"', 'b=\\"'), + ] + for encoded, decoded in cases: + with self.subTest(encoded): + C = cookies.SimpleCookie() + C.load(encoded) + self.assertEqual(C['a'].value, decoded) + + @support.requires_resource('cpu') + def test_unquote_large(self): + n = 10**6 + for encoded in r'\\', r'\134': + with self.subTest(encoded): + data = 'a="b=' + encoded*n + ';"' + C = cookies.SimpleCookie() + C.load(data) + value = C['a'].value + self.assertEqual(value[:3], 'b=\\') + self.assertEqual(value[-2:], '\\;') + self.assertEqual(len(value), n + 3) + def test_load(self): C = cookies.SimpleCookie() C.load('Customer="WILE_E_COYOTE"; Version=1; Path=/acme') diff --git a/Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst b/Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst new file mode 100644 index 00000000000000..158b938a65a2d4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst @@ -0,0 +1 @@ +Fix quadratic complexity in parsing cookies with backslashes. From ab87c992c2d4cd28560178048915bc9636d6566e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 16 Aug 2024 19:38:20 +0300 Subject: [PATCH 2/4] Restore the current behavior for backslash-escaping. --- Lib/http/cookies.py | 2 +- Lib/test/test_http_cookies.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py index 11a67e8a2e008b..464abeb0fb253a 100644 --- a/Lib/http/cookies.py +++ b/Lib/http/cookies.py @@ -184,7 +184,7 @@ def _quote(str): return '"' + str.translate(_Translator) + '"' -_unquote_re = re.compile(r'\\(?:([0-3][0-7][0-7])|(["\\]))') +_unquote_re = re.compile(r'\\(?:([0-3][0-7][0-7])|(.))') def _unquote_replace(m): if m[1]: return chr(int(m[1], 8)) diff --git a/Lib/test/test_http_cookies.py b/Lib/test/test_http_cookies.py index 13b526d49b0856..8879902a6e2f41 100644 --- a/Lib/test/test_http_cookies.py +++ b/Lib/test/test_http_cookies.py @@ -63,13 +63,13 @@ def test_unquote(self): cases = [ (r'a="b=\""', 'b="'), (r'a="b=\\"', 'b=\\'), - (r'a="b=\="', 'b=\\='), - (r'a="b=\n"', 'b=\\n'), + (r'a="b=\="', 'b=='), + (r'a="b=\n"', 'b=n'), (r'a="b=\042"', 'b="'), (r'a="b=\134"', 'b=\\'), (r'a="b=\377"', 'b=\xff'), - (r'a="b=\400"', 'b=\\400'), - (r'a="b=\42"', 'b=\\42'), + (r'a="b=\400"', 'b=400'), + (r'a="b=\42"', 'b=42'), (r'a="b=\\042"', 'b=\\042'), (r'a="b=\\134"', 'b=\\134'), (r'a="b=\\\""', 'b=\\"'), From 1fe24921da4c6c547da82e11c9703f3588dc5fab Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 17 Aug 2024 12:40:11 +0300 Subject: [PATCH 3/4] Cache the sub() method, not the compiled pattern object. --- Lib/http/cookies.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py index 464abeb0fb253a..6b9ed24ad8ec78 100644 --- a/Lib/http/cookies.py +++ b/Lib/http/cookies.py @@ -184,7 +184,8 @@ def _quote(str): return '"' + str.translate(_Translator) + '"' -_unquote_re = re.compile(r'\\(?:([0-3][0-7][0-7])|(.))') +_unquote_sub = re.compile(r'\\(?:([0-3][0-7][0-7])|(.))').sub + def _unquote_replace(m): if m[1]: return chr(int(m[1], 8)) @@ -209,8 +210,7 @@ def _unquote(str): # \012 --> \n # \" --> " # - - return _unquote_re.sub(_unquote_replace, str) + return _unquote_sub(_unquote_replace, str) # The _getdate() routine is used to set the expiration time in the cookie's HTTP # header. By default, _getdate() returns the current time in the appropriate From 8256ed2228137c87d4b20747db84a9cdf0fa1d34 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 17 Aug 2024 13:08:20 +0300 Subject: [PATCH 4/4] Add a reference to the module in NEWS. --- .../next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst b/Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst index 158b938a65a2d4..6a234561fe31a3 100644 --- a/Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst +++ b/Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst @@ -1 +1 @@ -Fix quadratic complexity in parsing cookies with backslashes. +Fix quadratic complexity in parsing ``"``-quoted cookie values with backslashes by :mod:`http.cookies`.