Skip to content

Commit 18dfbd0

Browse files
tdwyergpshead
andauthored
gh-102988: Detect email address parsing errors and return empty tuple to indicate the parsing error (old API) (#105127)
Detect email address parsing errors and return empty tuple to indicate the parsing error (old API). This fixes or at least ameliorates CVE-2023-27043. --------- Co-authored-by: Gregory P. Smith <greg@krypto.org>
1 parent 6782fc0 commit 18dfbd0

File tree

5 files changed

+172
-10
lines changed

5 files changed

+172
-10
lines changed

Doc/library/email.utils.rst

+25-1
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ of the new API.
6565
*email address* parts. Returns a tuple of that information, unless the parse
6666
fails, in which case a 2-tuple of ``('', '')`` is returned.
6767

68+
.. versionchanged:: 3.12
69+
For security reasons, addresses that were ambiguous and could parse into
70+
multiple different addresses now cause ``('', '')`` to be returned
71+
instead of only one of the *potential* addresses.
72+
6873

6974
.. function:: formataddr(pair, charset='utf-8')
7075

@@ -87,7 +92,7 @@ of the new API.
8792
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
8893
*fieldvalues* is a sequence of header field values as might be returned by
8994
:meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
90-
example that gets all the recipients of a message::
95+
example that gets all the recipients of a message:
9196

9297
from email.utils import getaddresses
9398

@@ -97,6 +102,25 @@ of the new API.
97102
resent_ccs = msg.get_all('resent-cc', [])
98103
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
99104

105+
When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')``
106+
is returned in its place. Other errors in parsing the list of
107+
addresses such as a fieldvalue seemingly parsing into multiple
108+
addresses may result in a list containing a single empty 2-tuple
109+
``[('', '')]`` being returned rather than returning potentially
110+
invalid output.
111+
112+
Example malformed input parsing:
113+
114+
.. doctest::
115+
116+
>>> from email.utils import getaddresses
117+
>>> getaddresses(['alice@example.com <bob@example.com>', 'me@example.com'])
118+
[('', '')]
119+
120+
.. versionchanged:: 3.12
121+
The 2-tuple of ``('', '')`` in the returned values when parsing
122+
fails were added as to address a security issue.
123+
100124

101125
.. function:: parsedate(date)
102126

Doc/whatsnew/3.12.rst

+8
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,14 @@ dis
570570
:data:`~dis.hasarg` collection instead.
571571
(Contributed by Irit Katriel in :gh:`94216`.)
572572

573+
email
574+
-----
575+
576+
* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return
577+
``('', '')`` 2-tuples in more situations where invalid email addresses are
578+
encountered instead of potentially inaccurate values.
579+
(Contributed by Thomas Dwyer for :gh:`102988` to ameliorate CVE-2023-27043.)
580+
573581
fractions
574582
---------
575583

Lib/email/utils.py

+57-6
Original file line numberDiff line numberDiff line change
@@ -106,12 +106,54 @@ def formataddr(pair, charset='utf-8'):
106106
return address
107107

108108

109+
def _pre_parse_validation(email_header_fields):
110+
accepted_values = []
111+
for v in email_header_fields:
112+
s = v.replace('\\(', '').replace('\\)', '')
113+
if s.count('(') != s.count(')'):
114+
v = "('', '')"
115+
accepted_values.append(v)
116+
117+
return accepted_values
118+
119+
120+
def _post_parse_validation(parsed_email_header_tuples):
121+
accepted_values = []
122+
# The parser would have parsed a correctly formatted domain-literal
123+
# The existence of an [ after parsing indicates a parsing failure
124+
for v in parsed_email_header_tuples:
125+
if '[' in v[1]:
126+
v = ('', '')
127+
accepted_values.append(v)
128+
129+
return accepted_values
130+
109131

110132
def getaddresses(fieldvalues):
111-
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
112-
all = COMMASPACE.join(str(v) for v in fieldvalues)
133+
"""Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
134+
135+
When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
136+
its place.
137+
138+
If the resulting list of parsed address is not the same as the number of
139+
fieldvalues in the input list a parsing error has occurred. A list
140+
containing a single empty 2-tuple [('', '')] is returned in its place.
141+
This is done to avoid invalid output.
142+
"""
143+
fieldvalues = [str(v) for v in fieldvalues]
144+
fieldvalues = _pre_parse_validation(fieldvalues)
145+
all = COMMASPACE.join(v for v in fieldvalues)
113146
a = _AddressList(all)
114-
return a.addresslist
147+
result = _post_parse_validation(a.addresslist)
148+
149+
n = 0
150+
for v in fieldvalues:
151+
n += v.count(',') + 1
152+
153+
if len(result) != n:
154+
return [('', '')]
155+
156+
return result
115157

116158

117159
def _format_timetuple_and_zone(timetuple, zone):
@@ -212,9 +254,18 @@ def parseaddr(addr):
212254
Return a tuple of realname and email address, unless the parse fails, in
213255
which case return a 2-tuple of ('', '').
214256
"""
215-
addrs = _AddressList(addr).addresslist
216-
if not addrs:
217-
return '', ''
257+
if isinstance(addr, list):
258+
addr = addr[0]
259+
260+
if not isinstance(addr, str):
261+
return ('', '')
262+
263+
addr = _pre_parse_validation([addr])[0]
264+
addrs = _post_parse_validation(_AddressList(addr).addresslist)
265+
266+
if not addrs or len(addrs) > 1:
267+
return ('', '')
268+
218269
return addrs[0]
219270

220271

Lib/test/test_email/test_email.py

+78-3
Original file line numberDiff line numberDiff line change
@@ -3319,15 +3319,90 @@ def test_getaddresses(self):
33193319
[('Al Person', 'aperson@dom.ain'),
33203320
('Bud Person', 'bperson@dom.ain')])
33213321

3322+
def test_getaddresses_parsing_errors(self):
3323+
"""Test for parsing errors from CVE-2023-27043"""
3324+
eq = self.assertEqual
3325+
eq(utils.getaddresses(['alice@example.org(<bob@example.com>']),
3326+
[('', '')])
3327+
eq(utils.getaddresses(['alice@example.org)<bob@example.com>']),
3328+
[('', '')])
3329+
eq(utils.getaddresses(['alice@example.org<<bob@example.com>']),
3330+
[('', '')])
3331+
eq(utils.getaddresses(['alice@example.org><bob@example.com>']),
3332+
[('', '')])
3333+
eq(utils.getaddresses(['alice@example.org@<bob@example.com>']),
3334+
[('', '')])
3335+
eq(utils.getaddresses(['alice@example.org,<bob@example.com>']),
3336+
[('', 'alice@example.org'), ('', 'bob@example.com')])
3337+
eq(utils.getaddresses(['alice@example.org;<bob@example.com>']),
3338+
[('', '')])
3339+
eq(utils.getaddresses(['alice@example.org:<bob@example.com>']),
3340+
[('', '')])
3341+
eq(utils.getaddresses(['alice@example.org.<bob@example.com>']),
3342+
[('', '')])
3343+
eq(utils.getaddresses(['alice@example.org"<bob@example.com>']),
3344+
[('', '')])
3345+
eq(utils.getaddresses(['alice@example.org[<bob@example.com>']),
3346+
[('', '')])
3347+
eq(utils.getaddresses(['alice@example.org]<bob@example.com>']),
3348+
[('', '')])
3349+
3350+
def test_parseaddr_parsing_errors(self):
3351+
"""Test for parsing errors from CVE-2023-27043"""
3352+
eq = self.assertEqual
3353+
eq(utils.parseaddr(['alice@example.org(<bob@example.com>']),
3354+
('', ''))
3355+
eq(utils.parseaddr(['alice@example.org)<bob@example.com>']),
3356+
('', ''))
3357+
eq(utils.parseaddr(['alice@example.org<<bob@example.com>']),
3358+
('', ''))
3359+
eq(utils.parseaddr(['alice@example.org><bob@example.com>']),
3360+
('', ''))
3361+
eq(utils.parseaddr(['alice@example.org@<bob@example.com>']),
3362+
('', ''))
3363+
eq(utils.parseaddr(['alice@example.org,<bob@example.com>']),
3364+
('', ''))
3365+
eq(utils.parseaddr(['alice@example.org;<bob@example.com>']),
3366+
('', ''))
3367+
eq(utils.parseaddr(['alice@example.org:<bob@example.com>']),
3368+
('', ''))
3369+
eq(utils.parseaddr(['alice@example.org.<bob@example.com>']),
3370+
('', ''))
3371+
eq(utils.parseaddr(['alice@example.org"<bob@example.com>']),
3372+
('', ''))
3373+
eq(utils.parseaddr(['alice@example.org[<bob@example.com>']),
3374+
('', ''))
3375+
eq(utils.parseaddr(['alice@example.org]<bob@example.com>']),
3376+
('', ''))
3377+
33223378
def test_getaddresses_nasty(self):
33233379
eq = self.assertEqual
33243380
eq(utils.getaddresses(['foo: ;']), [('', '')])
3325-
eq(utils.getaddresses(
3326-
['[]*-- =~$']),
3327-
[('', ''), ('', ''), ('', '*--')])
3381+
eq(utils.getaddresses(['[]*-- =~$']), [('', '')])
33283382
eq(utils.getaddresses(
33293383
['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
33303384
[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
3385+
eq(utils.getaddresses(
3386+
[r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>']),
3387+
[('Pete (A nice ) chap his account his host)', 'pete@silly.test')])
3388+
eq(utils.getaddresses(
3389+
['(Empty list)(start)Undisclosed recipients :(nobody(I know))']),
3390+
[('', '')])
3391+
eq(utils.getaddresses(
3392+
['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']),
3393+
[('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')])
3394+
eq(utils.getaddresses(
3395+
['John Doe <jdoe@machine(comment). example>']),
3396+
[('John Doe (comment)', 'jdoe@machine.example')])
3397+
eq(utils.getaddresses(
3398+
['"Mary Smith: Personal Account" <smith@home.example>']),
3399+
[('Mary Smith: Personal Account', 'smith@home.example')])
3400+
eq(utils.getaddresses(
3401+
['Undisclosed recipients:;']),
3402+
[('', '')])
3403+
eq(utils.getaddresses(
3404+
[r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>']),
3405+
[('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')])
33313406

33323407
def test_getaddresses_embedded_comment(self):
33333408
"""Test proper handling of a nested comment"""
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
CVE-2023-27043: Prevent :func:`email.utils.parseaddr`
2+
and :func:`email.utils.getaddresses` from returning the realname portion of an
3+
invalid RFC2822 email header in the email address portion of the 2-tuple
4+
returned after being parsed by :class:`email._parseaddr.AddressList`.

0 commit comments

Comments
 (0)