From 64a07216160910015e2f3afb91cae0201ff40bcc Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Tue, 1 Apr 2025 08:08:16 -0700 Subject: [PATCH 001/126] Remove broken badge from readme --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 38e4d8fa8c..e472bd9f6a 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,6 @@ A Python-3 (CPython >= 3.13.0) Interpreter written in Rust :snake: :scream: [![docs.rs](https://docs.rs/rustpython/badge.svg)](https://docs.rs/rustpython/) [![Crates.io](https://img.shields.io/crates/v/rustpython)](https://crates.io/crates/rustpython) [![dependency status](https://deps.rs/crate/rustpython/0.1.1/status.svg)](https://deps.rs/crate/rustpython/0.1.1) -[![WAPM package](https://wapm.io/package/rustpython/badge.svg?style=flat)](https://wapm.io/package/rustpython) [![Open in Gitpod](https://img.shields.io/static/v1?label=Open%20in&message=Gitpod&color=1aa6e4&logo=gitpod)](https://gitpod.io#https://github.com/RustPython/RustPython) ## Usage From 2bf233280684bddaa001f6e17b1bd48d7ebc4996 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Wed, 2 Apr 2025 00:31:25 -0700 Subject: [PATCH 002/126] Cleanup whats_left.py (#5654) * cleanup whats_left.py * add features flag --- whats_left.py | 45 +++++++++------------------------------------ 1 file changed, 9 insertions(+), 36 deletions(-) diff --git a/whats_left.py b/whats_left.py index 7c1c30ba6c..82df5cabe1 100755 --- a/whats_left.py +++ b/whats_left.py @@ -55,6 +55,12 @@ def parse_args(): action="store_true", help="print output as JSON (instead of line by line)", ) + parser.add_argument( + "--features", + action="store", + help="which features to enable when building RustPython (default: ssl)", + default="ssl", + ) args = parser.parse_args() return args @@ -62,46 +68,13 @@ def parse_args(): args = parse_args() - -# modules suggested for deprecation by PEP 594 (www.python.org/dev/peps/pep-0594/) -# some of these might be implemented, but they are not a priority -PEP_594_MODULES = { - "aifc", - "asynchat", - "asyncore", - "audioop", - "binhex", - "cgi", - "cgitb", - "chunk", - "crypt", - "formatter", - "fpectl", - "imghdr", - "imp", - "macpath", - "msilib", - "nntplib", - "nis", - "ossaudiodev", - "parser", - "pipes", - "smtpd", - "sndhdr", - "spwd", - "sunau", - "telnetlib", - "uu", - "xdrlib", -} - # CPython specific modules (mostly consisting of templates/tests) CPYTHON_SPECIFIC_MODS = { 'xxmodule', 'xxsubtype', 'xxlimited', '_xxtestfuzz', '_testbuffer', '_testcapi', '_testimportmultiple', '_testinternalcapi', '_testmultiphase', '_testlimitedcapi' } -IGNORED_MODULES = {"this", "antigravity"} | PEP_594_MODULES | CPYTHON_SPECIFIC_MODS +IGNORED_MODULES = {"this", "antigravity"} | CPYTHON_SPECIFIC_MODS sys.path = [ path @@ -446,9 +419,9 @@ def remove_one_indent(s): f.write(output + "\n") -subprocess.run(["cargo", "build", "--release", "--features=ssl"], check=True) +subprocess.run(["cargo", "build", "--release", f"--features={args.features}"], check=True) result = subprocess.run( - ["cargo", "run", "--release", "--features=ssl", "-q", "--", GENERATED_FILE], + ["cargo", "run", "--release", f"--features={args.features}", "-q", "--", GENERATED_FILE], env={**os.environ.copy(), "RUSTPYTHONPATH": "Lib"}, text=True, capture_output=True, From 8063148598be283b7f5aa0972d8e29217f47c9f8 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Thu, 3 Apr 2025 22:04:13 -0700 Subject: [PATCH 003/126] Fix clippy lints from rust 1.86 update (#5665) * handle rust 1.86 update * fix windows clippy lint * disable cspell under jit/instruction --------- Co-authored-by: Jeong YunWon --- common/src/fileutils.rs | 2 +- jit/src/instructions.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/common/src/fileutils.rs b/common/src/fileutils.rs index 67713c0148..5a0d380e20 100644 --- a/common/src/fileutils.rs +++ b/common/src/fileutils.rs @@ -78,7 +78,7 @@ pub mod windows { .encode_wide() .collect::>() .split(|&c| c == '.' as u16) - .last() + .next_back() .and_then(|s| String::from_utf16(s).ok()); if let Some(file_extension) = file_extension { diff --git a/jit/src/instructions.rs b/jit/src/instructions.rs index bf30e51d74..830a578562 100644 --- a/jit/src/instructions.rs +++ b/jit/src/instructions.rs @@ -1,3 +1,4 @@ +// cspell: disable use super::{JitCompileError, JitSig, JitType}; use cranelift::codegen::ir::FuncRef; use cranelift::prelude::*; @@ -559,7 +560,7 @@ impl<'a, 'b> FunctionCompiler<'a, 'b> { Ok(()) } - Instruction::SetupLoop { .. } => { + Instruction::SetupLoop => { let loop_head = self.builder.create_block(); self.builder.ins().jump(loop_head, &[]); self.builder.switch_to_block(loop_head); From 6620aa07af4b1c296d1e60c18b3f70463bec43ba Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Thu, 3 Apr 2025 09:53:45 -0700 Subject: [PATCH 004/126] update email to 3.13.2 --- Lib/email/__init__.py | 1 - Lib/email/_encoded_words.py | 60 +- Lib/email/_header_value_parser.py | 1086 ++++++++++++++++------------- Lib/email/_parseaddr.py | 26 +- Lib/email/_policybase.py | 22 +- Lib/email/architecture.rst | 2 +- Lib/email/base64mime.py | 6 +- Lib/email/charset.py | 20 +- Lib/email/contentmanager.py | 23 +- Lib/email/encoders.py | 4 - Lib/email/errors.py | 10 + Lib/email/feedparser.py | 23 +- Lib/email/generator.py | 28 +- Lib/email/header.py | 11 +- Lib/email/headerregistry.py | 76 +- Lib/email/iterators.py | 3 - Lib/email/message.py | 70 +- Lib/email/mime/application.py | 2 +- Lib/email/mime/audio.py | 87 ++- Lib/email/mime/base.py | 1 - Lib/email/mime/image.py | 125 +++- Lib/email/mime/message.py | 1 - Lib/email/mime/multipart.py | 1 - Lib/email/mime/nonmultipart.py | 1 - Lib/email/mime/text.py | 4 +- Lib/email/parser.py | 9 +- Lib/email/policy.py | 21 +- Lib/email/quoprimime.py | 3 +- Lib/email/utils.py | 250 +++++-- 29 files changed, 1197 insertions(+), 779 deletions(-) diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py index fae872439e..9fa4778300 100644 --- a/Lib/email/__init__.py +++ b/Lib/email/__init__.py @@ -25,7 +25,6 @@ ] - # Some convenience routines. Don't import Parser and Message as side-effects # of importing email since those cascadingly import most of the rest of the # email package. diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py index 5eaab36ed0..6795a606de 100644 --- a/Lib/email/_encoded_words.py +++ b/Lib/email/_encoded_words.py @@ -62,7 +62,7 @@ # regex based decoder. _q_byte_subber = functools.partial(re.compile(br'=([a-fA-F0-9]{2})').sub, - lambda m: bytes([int(m.group(1), 16)])) + lambda m: bytes.fromhex(m.group(1).decode())) def decode_q(encoded): encoded = encoded.replace(b'_', b' ') @@ -98,30 +98,42 @@ def len_q(bstring): # def decode_b(encoded): - defects = [] + # First try encoding with validate=True, fixing the padding if needed. + # This will succeed only if encoded includes no invalid characters. pad_err = len(encoded) % 4 - if pad_err: - defects.append(errors.InvalidBase64PaddingDefect()) - padded_encoded = encoded + b'==='[:4-pad_err] - else: - padded_encoded = encoded + missing_padding = b'==='[:4-pad_err] if pad_err else b'' try: - return base64.b64decode(padded_encoded, validate=True), defects + return ( + base64.b64decode(encoded + missing_padding, validate=True), + [errors.InvalidBase64PaddingDefect()] if pad_err else [], + ) except binascii.Error: - # Since we had correct padding, this must an invalid char error. - defects = [errors.InvalidBase64CharactersDefect()] + # Since we had correct padding, this is likely an invalid char error. + # # The non-alphabet characters are ignored as far as padding - # goes, but we don't know how many there are. So we'll just - # try various padding lengths until something works. - for i in 0, 1, 2, 3: + # goes, but we don't know how many there are. So try without adding + # padding to see if it works. + try: + return ( + base64.b64decode(encoded, validate=False), + [errors.InvalidBase64CharactersDefect()], + ) + except binascii.Error: + # Add as much padding as could possibly be necessary (extra padding + # is ignored). try: - return base64.b64decode(encoded+b'='*i, validate=False), defects + return ( + base64.b64decode(encoded + b'==', validate=False), + [errors.InvalidBase64CharactersDefect(), + errors.InvalidBase64PaddingDefect()], + ) except binascii.Error: - if i==0: - defects.append(errors.InvalidBase64PaddingDefect()) - else: - # This should never happen. - raise AssertionError("unexpected binascii.Error") + # This only happens when the encoded string's length is 1 more + # than a multiple of 4, which is invalid. + # + # bpo-27397: Just return the encoded string since there's no + # way to decode. + return encoded, [errors.InvalidBase64LengthDefect()] def encode_b(bstring): return base64.b64encode(bstring).decode('ascii') @@ -167,15 +179,15 @@ def decode(ew): # Turn the CTE decoded bytes into unicode. try: string = bstring.decode(charset) - except UnicodeError: + except UnicodeDecodeError: defects.append(errors.UndecodableBytesDefect("Encoded word " - "contains bytes not decodable using {} charset".format(charset))) + f"contains bytes not decodable using {charset!r} charset")) string = bstring.decode(charset, 'surrogateescape') - except LookupError: + except (LookupError, UnicodeEncodeError): string = bstring.decode('ascii', 'surrogateescape') if charset.lower() != 'unknown-8bit': - defects.append(errors.CharsetError("Unknown charset {} " - "in encoded word; decoded as unknown bytes".format(charset))) + defects.append(errors.CharsetError(f"Unknown charset {charset!r} " + f"in encoded word; decoded as unknown bytes")) return string, charset, lang, defects diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 57d01fbcb0..ec2215a5e5 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -68,9 +68,9 @@ """ import re +import sys import urllib # For urllib.parse.unquote from string import hexdigits -from collections import OrderedDict from operator import itemgetter from email import _encoded_words as _ew from email import errors @@ -92,93 +92,23 @@ ASPECIALS = TSPECIALS | set("*'%") ATTRIBUTE_ENDS = ASPECIALS | WSP EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%') +NLSET = {'\n', '\r'} +SPECIALSNL = SPECIALS | NLSET def quote_string(value): return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' -# -# Accumulator for header folding -# - -class _Folded: - - def __init__(self, maxlen, policy): - self.maxlen = maxlen - self.policy = policy - self.lastlen = 0 - self.stickyspace = None - self.firstline = True - self.done = [] - self.current = [] +# Match a RFC 2047 word, looks like =?utf-8?q?someword?= +rfc2047_matcher = re.compile(r''' + =\? # literal =? + [^?]* # charset + \? # literal ? + [qQbB] # literal 'q' or 'b', case insensitive + \? # literal ? + .*? # encoded word + \?= # literal ?= +''', re.VERBOSE | re.MULTILINE) - def newline(self): - self.done.extend(self.current) - self.done.append(self.policy.linesep) - self.current.clear() - self.lastlen = 0 - - def finalize(self): - if self.current: - self.newline() - - def __str__(self): - return ''.join(self.done) - - def append(self, stoken): - self.current.append(stoken) - - def append_if_fits(self, token, stoken=None): - if stoken is None: - stoken = str(token) - l = len(stoken) - if self.stickyspace is not None: - stickyspace_len = len(self.stickyspace) - if self.lastlen + stickyspace_len + l <= self.maxlen: - self.current.append(self.stickyspace) - self.lastlen += stickyspace_len - self.current.append(stoken) - self.lastlen += l - self.stickyspace = None - self.firstline = False - return True - if token.has_fws: - ws = token.pop_leading_fws() - if ws is not None: - self.stickyspace += str(ws) - stickyspace_len += len(ws) - token._fold(self) - return True - if stickyspace_len and l + 1 <= self.maxlen: - margin = self.maxlen - l - if 0 < margin < stickyspace_len: - trim = stickyspace_len - margin - self.current.append(self.stickyspace[:trim]) - self.stickyspace = self.stickyspace[trim:] - stickyspace_len = trim - self.newline() - self.current.append(self.stickyspace) - self.current.append(stoken) - self.lastlen = l + stickyspace_len - self.stickyspace = None - self.firstline = False - return True - if not self.firstline: - self.newline() - self.current.append(self.stickyspace) - self.current.append(stoken) - self.stickyspace = None - self.firstline = False - return True - if self.lastlen + l <= self.maxlen: - self.current.append(stoken) - self.lastlen += l - return True - if l < self.maxlen: - self.newline() - self.current.append(stoken) - self.lastlen = l - return True - return False # # TokenList and its subclasses @@ -187,6 +117,8 @@ def append_if_fits(self, token, stoken=None): class TokenList(list): token_type = None + syntactic_break = True + ew_combine_allowed = True def __init__(self, *args, **kw): super().__init__(*args, **kw) @@ -207,84 +139,13 @@ def value(self): def all_defects(self): return sum((x.all_defects for x in self), self.defects) - # - # Folding API - # - # parts(): - # - # return a list of objects that constitute the "higher level syntactic - # objects" specified by the RFC as the best places to fold a header line. - # The returned objects must include leading folding white space, even if - # this means mutating the underlying parse tree of the object. Each object - # is only responsible for returning *its* parts, and should not drill down - # to any lower level except as required to meet the leading folding white - # space constraint. - # - # _fold(folded): - # - # folded: the result accumulator. This is an instance of _Folded. - # (XXX: I haven't finished factoring this out yet, the folding code - # pretty much uses this as a state object.) When the folded.current - # contains as much text as will fit, the _fold method should call - # folded.newline. - # folded.lastlen: the current length of the test stored in folded.current. - # folded.maxlen: The maximum number of characters that may appear on a - # folded line. Differs from the policy setting in that "no limit" is - # represented by +inf, which means it can be used in the trivially - # logical fashion in comparisons. - # - # Currently no subclasses implement parts, and I think this will remain - # true. A subclass only needs to implement _fold when the generic version - # isn't sufficient. _fold will need to be implemented primarily when it is - # possible for encoded words to appear in the specialized token-list, since - # there is no generic algorithm that can know where exactly the encoded - # words are allowed. A _fold implementation is responsible for filling - # lines in the same general way that the top level _fold does. It may, and - # should, call the _fold method of sub-objects in a similar fashion to that - # of the top level _fold. - # - # XXX: I'm hoping it will be possible to factor the existing code further - # to reduce redundancy and make the logic clearer. - - @property - def parts(self): - klass = self.__class__ - this = [] - for token in self: - if token.startswith_fws(): - if this: - yield this[0] if len(this)==1 else klass(this) - this.clear() - end_ws = token.pop_trailing_ws() - this.append(token) - if end_ws: - yield klass(this) - this = [end_ws] - if this: - yield this[0] if len(this)==1 else klass(this) - def startswith_fws(self): return self[0].startswith_fws() - def pop_leading_fws(self): - if self[0].token_type == 'fws': - return self.pop(0) - return self[0].pop_leading_fws() - - def pop_trailing_ws(self): - if self[-1].token_type == 'cfws': - return self.pop(-1) - return self[-1].pop_trailing_ws() - @property - def has_fws(self): - for part in self: - if part.has_fws: - return True - return False - - def has_leading_comment(self): - return self[0].has_leading_comment() + def as_ew_allowed(self): + """True if all top level tokens of this part may be RFC2047 encoded.""" + return all(part.as_ew_allowed for part in self) @property def comments(self): @@ -294,71 +155,13 @@ def comments(self): return comments def fold(self, *, policy): - # max_line_length 0/None means no limit, ie: infinitely long. - maxlen = policy.max_line_length or float("+inf") - folded = _Folded(maxlen, policy) - self._fold(folded) - folded.finalize() - return str(folded) - - def as_encoded_word(self, charset): - # This works only for things returned by 'parts', which include - # the leading fws, if any, that should be used. - res = [] - ws = self.pop_leading_fws() - if ws: - res.append(ws) - trailer = self.pop(-1) if self[-1].token_type=='fws' else '' - res.append(_ew.encode(str(self), charset)) - res.append(trailer) - return ''.join(res) - - def cte_encode(self, charset, policy): - res = [] - for part in self: - res.append(part.cte_encode(charset, policy)) - return ''.join(res) - - def _fold(self, folded): - encoding = 'utf-8' if folded.policy.utf8 else 'ascii' - for part in self.parts: - tstr = str(part) - tlen = len(tstr) - try: - str(part).encode(encoding) - except UnicodeEncodeError: - if any(isinstance(x, errors.UndecodableBytesDefect) - for x in part.all_defects): - charset = 'unknown-8bit' - else: - # XXX: this should be a policy setting when utf8 is False. - charset = 'utf-8' - tstr = part.cte_encode(charset, folded.policy) - tlen = len(tstr) - if folded.append_if_fits(part, tstr): - continue - # Peel off the leading whitespace if any and make it sticky, to - # avoid infinite recursion. - ws = part.pop_leading_fws() - if ws is not None: - # Peel off the leading whitespace and make it sticky, to - # avoid infinite recursion. - folded.stickyspace = str(part.pop(0)) - if folded.append_if_fits(part): - continue - if part.has_fws: - part._fold(folded) - continue - # There are no fold points in this one; it is too long for a single - # line and can't be split...we just have to put it on its own line. - folded.append(tstr) - folded.newline() + return _refold_parse_tree(self, policy=policy) def pprint(self, indent=''): - print('\n'.join(self._pp(indent=''))) + print(self.ppstr(indent=indent)) def ppstr(self, indent=''): - return '\n'.join(self._pp(indent='')) + return '\n'.join(self._pp(indent=indent)) def _pp(self, indent=''): yield '{}{}/{}('.format( @@ -390,213 +193,35 @@ def comments(self): class UnstructuredTokenList(TokenList): - token_type = 'unstructured' - def _fold(self, folded): - last_ew = None - encoding = 'utf-8' if folded.policy.utf8 else 'ascii' - for part in self.parts: - tstr = str(part) - is_ew = False - try: - str(part).encode(encoding) - except UnicodeEncodeError: - if any(isinstance(x, errors.UndecodableBytesDefect) - for x in part.all_defects): - charset = 'unknown-8bit' - else: - charset = 'utf-8' - if last_ew is not None: - # We've already done an EW, combine this one with it - # if there's room. - chunk = get_unstructured( - ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset) - oldlastlen = sum(len(x) for x in folded.current[:last_ew]) - schunk = str(chunk) - lchunk = len(schunk) - if oldlastlen + lchunk <= folded.maxlen: - del folded.current[last_ew:] - folded.append(schunk) - folded.lastlen = oldlastlen + lchunk - continue - tstr = part.as_encoded_word(charset) - is_ew = True - if folded.append_if_fits(part, tstr): - if is_ew: - last_ew = len(folded.current) - 1 - continue - if is_ew or last_ew: - # It's too big to fit on the line, but since we've - # got encoded words we can use encoded word folding. - part._fold_as_ew(folded) - continue - # Peel off the leading whitespace if any and make it sticky, to - # avoid infinite recursion. - ws = part.pop_leading_fws() - if ws is not None: - folded.stickyspace = str(ws) - if folded.append_if_fits(part): - continue - if part.has_fws: - part._fold(folded) - continue - # It can't be split...we just have to put it on its own line. - folded.append(tstr) - folded.newline() - last_ew = None - - def cte_encode(self, charset, policy): - res = [] - last_ew = None - for part in self: - spart = str(part) - try: - spart.encode('us-ascii') - res.append(spart) - except UnicodeEncodeError: - if last_ew is None: - res.append(part.cte_encode(charset, policy)) - last_ew = len(res) - else: - tl = get_unstructured(''.join(res[last_ew:] + [spart])) - res.append(tl.as_encoded_word(charset)) - return ''.join(res) - class Phrase(TokenList): - token_type = 'phrase' - def _fold(self, folded): - # As with Unstructured, we can have pure ASCII with or without - # surrogateescape encoded bytes, or we could have unicode. But this - # case is more complicated, since we have to deal with the various - # sub-token types and how they can be composed in the face of - # unicode-that-needs-CTE-encoding, and the fact that if a token a - # comment that becomes a barrier across which we can't compose encoded - # words. - last_ew = None - encoding = 'utf-8' if folded.policy.utf8 else 'ascii' - for part in self.parts: - tstr = str(part) - tlen = len(tstr) - has_ew = False - try: - str(part).encode(encoding) - except UnicodeEncodeError: - if any(isinstance(x, errors.UndecodableBytesDefect) - for x in part.all_defects): - charset = 'unknown-8bit' - else: - charset = 'utf-8' - if last_ew is not None and not part.has_leading_comment(): - # We've already done an EW, let's see if we can combine - # this one with it. The last_ew logic ensures that all we - # have at this point is atoms, no comments or quoted - # strings. So we can treat the text between the last - # encoded word and the content of this token as - # unstructured text, and things will work correctly. But - # we have to strip off any trailing comment on this token - # first, and if it is a quoted string we have to pull out - # the content (we're encoding it, so it no longer needs to - # be quoted). - if part[-1].token_type == 'cfws' and part.comments: - remainder = part.pop(-1) - else: - remainder = '' - for i, token in enumerate(part): - if token.token_type == 'bare-quoted-string': - part[i] = UnstructuredTokenList(token[:]) - chunk = get_unstructured( - ''.join(folded.current[last_ew:]+[tstr])).as_encoded_word(charset) - schunk = str(chunk) - lchunk = len(schunk) - if last_ew + lchunk <= folded.maxlen: - del folded.current[last_ew:] - folded.append(schunk) - folded.lastlen = sum(len(x) for x in folded.current) - continue - tstr = part.as_encoded_word(charset) - tlen = len(tstr) - has_ew = True - if folded.append_if_fits(part, tstr): - if has_ew and not part.comments: - last_ew = len(folded.current) - 1 - elif part.comments or part.token_type == 'quoted-string': - # If a comment is involved we can't combine EWs. And if a - # quoted string is involved, it's not worth the effort to - # try to combine them. - last_ew = None - continue - part._fold(folded) - - def cte_encode(self, charset, policy): - res = [] - last_ew = None - is_ew = False - for part in self: - spart = str(part) - try: - spart.encode('us-ascii') - res.append(spart) - except UnicodeEncodeError: - is_ew = True - if last_ew is None: - if not part.comments: - last_ew = len(res) - res.append(part.cte_encode(charset, policy)) - elif not part.has_leading_comment(): - if part[-1].token_type == 'cfws' and part.comments: - remainder = part.pop(-1) - else: - remainder = '' - for i, token in enumerate(part): - if token.token_type == 'bare-quoted-string': - part[i] = UnstructuredTokenList(token[:]) - tl = get_unstructured(''.join(res[last_ew:] + [spart])) - res[last_ew:] = [tl.as_encoded_word(charset)] - if part.comments or (not is_ew and part.token_type == 'quoted-string'): - last_ew = None - return ''.join(res) - class Word(TokenList): - token_type = 'word' class CFWSList(WhiteSpaceTokenList): - token_type = 'cfws' - def has_leading_comment(self): - return bool(self.comments) - class Atom(TokenList): - token_type = 'atom' class Token(TokenList): - token_type = 'token' + encode_as_ew = False class EncodedWord(TokenList): - token_type = 'encoded-word' cte = None charset = None lang = None - @property - def encoded(self): - if self.cte is not None: - return self.cte - _ew.encode(str(self), self.charset) - - class QuotedString(TokenList): @@ -812,7 +437,10 @@ def route(self): def addr_spec(self): for x in self: if x.token_type == 'addr-spec': - return x.addr_spec + if x.local_part: + return x.addr_spec + else: + return quote_string(x.local_part) + x.addr_spec else: return '<>' @@ -867,6 +495,7 @@ def display_name(self): class Domain(TokenList): token_type = 'domain' + as_ew_allowed = False @property def domain(self): @@ -874,18 +503,23 @@ def domain(self): class DotAtom(TokenList): - token_type = 'dot-atom' class DotAtomText(TokenList): - token_type = 'dot-atom-text' + as_ew_allowed = True + + +class NoFoldLiteral(TokenList): + token_type = 'no-fold-literal' + as_ew_allowed = False class AddrSpec(TokenList): token_type = 'addr-spec' + as_ew_allowed = False @property def local_part(self): @@ -918,24 +552,30 @@ def addr_spec(self): class ObsLocalPart(TokenList): token_type = 'obs-local-part' + as_ew_allowed = False class DisplayName(Phrase): token_type = 'display-name' + ew_combine_allowed = False @property def display_name(self): res = TokenList(self) + if len(res) == 0: + return res.value if res[0].token_type == 'cfws': res.pop(0) else: - if res[0][0].token_type == 'cfws': + if (isinstance(res[0], TokenList) and + res[0][0].token_type == 'cfws'): res[0] = TokenList(res[0][1:]) if res[-1].token_type == 'cfws': res.pop() else: - if res[-1][-1].token_type == 'cfws': + if (isinstance(res[-1], TokenList) and + res[-1][-1].token_type == 'cfws'): res[-1] = TokenList(res[-1][:-1]) return res.value @@ -948,11 +588,15 @@ def value(self): for x in self: if x.token_type == 'quoted-string': quote = True - if quote: + if len(self) != 0 and quote: pre = post = '' - if self[0].token_type=='cfws' or self[0][0].token_type=='cfws': + if (self[0].token_type == 'cfws' or + isinstance(self[0], TokenList) and + self[0][0].token_type == 'cfws'): pre = ' ' - if self[-1].token_type=='cfws' or self[-1][-1].token_type=='cfws': + if (self[-1].token_type == 'cfws' or + isinstance(self[-1], TokenList) and + self[-1][-1].token_type == 'cfws'): post = ' ' return pre+quote_string(self.display_name)+post else: @@ -962,6 +606,7 @@ def value(self): class LocalPart(TokenList): token_type = 'local-part' + as_ew_allowed = False @property def value(self): @@ -997,6 +642,7 @@ def local_part(self): class DomainLiteral(TokenList): token_type = 'domain-literal' + as_ew_allowed = False @property def domain(self): @@ -1083,6 +729,7 @@ def stripped_value(self): class MimeParameters(TokenList): token_type = 'mime-parameters' + syntactic_break = False @property def params(self): @@ -1091,7 +738,7 @@ def params(self): # to assume the RFC 2231 pieces can come in any order. However, we # output them in the order that we first see a given name, which gives # us a stable __str__. - params = OrderedDict() + params = {} # Using order preserving dict from Python 3.7+ for token in self: if not token.token_type.endswith('parameter'): continue @@ -1142,7 +789,7 @@ def params(self): else: try: value = value.decode(charset, 'surrogateescape') - except LookupError: + except (LookupError, UnicodeEncodeError): # XXX: there should really be a custom defect for # unknown character set to make it easy to find, # because otherwise unknown charset is a silent @@ -1167,6 +814,10 @@ def __str__(self): class ParameterizedHeaderValue(TokenList): + # Set this false so that the value doesn't wind up on a new line even + # if it and the parameters would fit there but not on the first line. + syntactic_break = False + @property def params(self): for token in reversed(self): @@ -1174,58 +825,50 @@ def params(self): return token.params return {} - @property - def parts(self): - if self and self[-1].token_type == 'mime-parameters': - # We don't want to start a new line if all of the params don't fit - # after the value, so unwrap the parameter list. - return TokenList(self[:-1] + self[-1]) - return TokenList(self).parts - class ContentType(ParameterizedHeaderValue): - token_type = 'content-type' + as_ew_allowed = False maintype = 'text' subtype = 'plain' class ContentDisposition(ParameterizedHeaderValue): - token_type = 'content-disposition' + as_ew_allowed = False content_disposition = None class ContentTransferEncoding(TokenList): - token_type = 'content-transfer-encoding' + as_ew_allowed = False cte = '7bit' class HeaderLabel(TokenList): - token_type = 'header-label' + as_ew_allowed = False -class Header(TokenList): +class MsgID(TokenList): + token_type = 'msg-id' + as_ew_allowed = False - token_type = 'header' + def fold(self, policy): + # message-id tokens may not be folded. + return str(self) + policy.linesep + + +class MessageID(MsgID): + token_type = 'message-id' - def _fold(self, folded): - folded.append(str(self.pop(0))) - folded.lastlen = len(folded.current[0]) - # The first line of the header is different from all others: we don't - # want to start a new object on a new line if it has any fold points in - # it that would allow part of it to be on the first header line. - # Further, if the first fold point would fit on the new line, we want - # to do that, but if it doesn't we want to put it on the first line. - # Folded supports this via the stickyspace attribute. If this - # attribute is not None, it does the special handling. - folded.stickyspace = str(self.pop(0)) if self[0].token_type == 'cfws' else '' - rest = self.pop(0) - if self: - raise ValueError("Malformed Header token list") - rest._fold(folded) + +class InvalidMessageID(MessageID): + token_type = 'invalid-message-id' + + +class Header(TokenList): + token_type = 'header' # @@ -1234,6 +877,10 @@ def _fold(self, folded): class Terminal(str): + as_ew_allowed = True + ew_combine_allowed = True + syntactic_break = True + def __new__(cls, value, token_type): self = super().__new__(cls, value) self.token_type = token_type @@ -1243,6 +890,9 @@ def __new__(cls, value, token_type): def __repr__(self): return "{}({})".format(self.__class__.__name__, super().__repr__()) + def pprint(self): + print(self.__class__.__name__ + '/' + self.token_type) + @property def all_defects(self): return list(self.defects) @@ -1256,29 +906,14 @@ def _pp(self, indent=''): '' if not self.defects else ' {}'.format(self.defects), )] - def cte_encode(self, charset, policy): - value = str(self) - try: - value.encode('us-ascii') - return value - except UnicodeEncodeError: - return _ew.encode(value, charset) - def pop_trailing_ws(self): # This terminates the recursion. return None - def pop_leading_fws(self): - # This terminates the recursion. - return None - @property def comments(self): return [] - def has_leading_comment(self): - return False - def __getnewargs__(self): return(str(self), self.token_type) @@ -1292,8 +927,6 @@ def value(self): def startswith_fws(self): return True - has_fws = True - class ValueTerminal(Terminal): @@ -1304,11 +937,6 @@ def value(self): def startswith_fws(self): return False - has_fws = False - - def as_encoded_word(self, charset): - return _ew.encode(str(self), charset) - class EWWhiteSpaceTerminal(WhiteSpaceTerminal): @@ -1316,14 +944,12 @@ class EWWhiteSpaceTerminal(WhiteSpaceTerminal): def value(self): return '' - @property - def encoded(self): - return self[:] - def __str__(self): return '' - has_fws = True + +class _InvalidEwError(errors.HeaderParseError): + """Invalid encoded word found while parsing headers.""" # XXX these need to become classes and used as instances so @@ -1331,6 +957,8 @@ def __str__(self): # up other parse trees. Maybe should have tests for that, too. DOT = ValueTerminal('.', 'dot') ListSeparator = ValueTerminal(',', 'list-separator') +ListSeparator.as_ew_allowed = False +ListSeparator.syntactic_break = False RouteComponentMarker = ValueTerminal('@', 'route-component-marker') # @@ -1356,15 +984,14 @@ def __str__(self): _wsp_splitter = re.compile(r'([{}]+)'.format(''.join(WSP))).split _non_atom_end_matcher = re.compile(r"[^{}]+".format( - ''.join(ATOM_ENDS).replace('\\','\\\\').replace(']',r'\]'))).match + re.escape(''.join(ATOM_ENDS)))).match _non_printable_finder = re.compile(r"[\x00-\x20\x7F]").findall _non_token_end_matcher = re.compile(r"[^{}]+".format( - ''.join(TOKEN_ENDS).replace('\\','\\\\').replace(']',r'\]'))).match + re.escape(''.join(TOKEN_ENDS)))).match _non_attribute_end_matcher = re.compile(r"[^{}]+".format( - ''.join(ATTRIBUTE_ENDS).replace('\\','\\\\').replace(']',r'\]'))).match + re.escape(''.join(ATTRIBUTE_ENDS)))).match _non_extended_attribute_end_matcher = re.compile(r"[^{}]+".format( - ''.join(EXTENDED_ATTRIBUTE_ENDS).replace( - '\\','\\\\').replace(']',r'\]'))).match + re.escape(''.join(EXTENDED_ATTRIBUTE_ENDS)))).match def _validate_xtext(xtext): """If input token contains ASCII non-printables, register a defect.""" @@ -1431,7 +1058,10 @@ def get_encoded_word(value): raise errors.HeaderParseError( "expected encoded word but found {}".format(value)) remstr = ''.join(remainder) - if len(remstr) > 1 and remstr[0] in hexdigits and remstr[1] in hexdigits: + if (len(remstr) > 1 and + remstr[0] in hexdigits and + remstr[1] in hexdigits and + tok.count('?') < 2): # The ? after the CTE was followed by an encoded word escape (=XX). rest, *remainder = remstr.split('?=', 1) tok = tok + '?=' + rest @@ -1442,8 +1072,8 @@ def get_encoded_word(value): value = ''.join(remainder) try: text, charset, lang, defects = _ew.decode('=?' + tok + '?=') - except ValueError: - raise errors.HeaderParseError( + except (ValueError, KeyError): + raise _InvalidEwError( "encoded word format invalid: '{}'".format(ew.cte)) ew.charset = charset ew.lang = lang @@ -1458,6 +1088,10 @@ def get_encoded_word(value): _validate_xtext(vtext) ew.append(vtext) text = ''.join(remainder) + # Encoded words should be followed by a WS + if value and value[0] not in WSP: + ew.defects.append(errors.InvalidHeaderDefect( + "missing trailing whitespace after encoded-word")) return ew, value def get_unstructured(value): @@ -1489,9 +1123,12 @@ def get_unstructured(value): token, value = get_fws(value) unstructured.append(token) continue + valid_ew = True if value.startswith('=?'): try: token, value = get_encoded_word(value) + except _InvalidEwError: + valid_ew = False except errors.HeaderParseError: # XXX: Need to figure out how to register defects when # appropriate here. @@ -1510,6 +1147,14 @@ def get_unstructured(value): unstructured.append(token) continue tok, *remainder = _wsp_splitter(value, 1) + # Split in the middle of an atom if there is a rfc2047 encoded word + # which does not have WSP on both sides. The defect will be registered + # the next time through the loop. + # This needs to only be performed when the encoded word is valid; + # otherwise, performing it on an invalid encoded word can cause + # the parser to go in an infinite loop. + if valid_ew and rfc2047_matcher.search(tok): + tok, *remainder = value.partition('=?') vtext = ValueTerminal(tok, 'vtext') _validate_xtext(vtext) unstructured.append(vtext) @@ -1571,21 +1216,33 @@ def get_bare_quoted_string(value): value is the text between the quote marks, with whitespace preserved and quoted pairs decoded. """ - if value[0] != '"': + if not value or value[0] != '"': raise errors.HeaderParseError( "expected '\"' but found '{}'".format(value)) bare_quoted_string = BareQuotedString() value = value[1:] + if value and value[0] == '"': + token, value = get_qcontent(value) + bare_quoted_string.append(token) while value and value[0] != '"': if value[0] in WSP: token, value = get_fws(value) elif value[:2] == '=?': + valid_ew = False try: token, value = get_encoded_word(value) bare_quoted_string.defects.append(errors.InvalidHeaderDefect( "encoded word inside quoted string")) + valid_ew = True except errors.HeaderParseError: token, value = get_qcontent(value) + # Collapse the whitespace between two encoded words that occur in a + # bare-quoted-string. + if valid_ew and len(bare_quoted_string) > 1: + if (bare_quoted_string[-1].token_type == 'fws' and + bare_quoted_string[-2].token_type == 'encoded-word'): + bare_quoted_string[-1] = EWWhiteSpaceTerminal( + bare_quoted_string[-1], 'fws') else: token, value = get_qcontent(value) bare_quoted_string.append(token) @@ -1742,6 +1399,9 @@ def get_word(value): leader, value = get_cfws(value) else: leader = None + if not value: + raise errors.HeaderParseError( + "Expected 'atom' or 'quoted-string' but found nothing.") if value[0]=='"': token, value = get_quoted_string(value) elif value[0] in SPECIALS: @@ -1797,7 +1457,7 @@ def get_local_part(value): """ local_part = LocalPart() leader = None - if value[0] in CFWS_LEADER: + if value and value[0] in CFWS_LEADER: leader, value = get_cfws(value) if not value: raise errors.HeaderParseError( @@ -1863,13 +1523,18 @@ def get_obs_local_part(value): raise token, value = get_cfws(value) obs_local_part.append(token) + if not obs_local_part: + raise errors.HeaderParseError( + "expected obs-local-part but found '{}'".format(value)) if (obs_local_part[0].token_type == 'dot' or obs_local_part[0].token_type=='cfws' and + len(obs_local_part) > 1 and obs_local_part[1].token_type=='dot'): obs_local_part.defects.append(errors.InvalidHeaderDefect( "Invalid leading '.' in local part")) if (obs_local_part[-1].token_type == 'dot' or obs_local_part[-1].token_type=='cfws' and + len(obs_local_part) > 1 and obs_local_part[-2].token_type=='dot'): obs_local_part.defects.append(errors.InvalidHeaderDefect( "Invalid trailing '.' in local part")) @@ -1951,7 +1616,7 @@ def get_domain(value): """ domain = Domain() leader = None - if value[0] in CFWS_LEADER: + if value and value[0] in CFWS_LEADER: leader, value = get_cfws(value) if not value: raise errors.HeaderParseError( @@ -1966,6 +1631,8 @@ def get_domain(value): token, value = get_dot_atom(value) except errors.HeaderParseError: token, value = get_atom(value) + if value and value[0] == '@': + raise errors.HeaderParseError('Invalid Domain') if leader is not None: token[:0] = [leader] domain.append(token) @@ -1989,7 +1656,7 @@ def get_addr_spec(value): addr_spec.append(token) if not value or value[0] != '@': addr_spec.defects.append(errors.InvalidHeaderDefect( - "add-spec local part with no domain")) + "addr-spec local part with no domain")) return addr_spec, value addr_spec.append(ValueTerminal('@', 'address-at-symbol')) token, value = get_domain(value[1:]) @@ -2025,6 +1692,8 @@ def get_obs_route(value): if value[0] in CFWS_LEADER: token, value = get_cfws(value) obs_route.append(token) + if not value: + break if value[0] == '@': obs_route.append(RouteComponentMarker) token, value = get_domain(value[1:]) @@ -2043,7 +1712,7 @@ def get_angle_addr(value): """ angle_addr = AngleAddr() - if value[0] in CFWS_LEADER: + if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) angle_addr.append(token) if not value or value[0] != '<': @@ -2053,7 +1722,7 @@ def get_angle_addr(value): value = value[1:] # Although it is not legal per RFC5322, SMTP uses '<>' in certain # circumstances. - if value[0] == '>': + if value and value[0] == '>': angle_addr.append(ValueTerminal('>', 'angle-addr-end')) angle_addr.defects.append(errors.InvalidHeaderDefect( "null addr-spec in angle-addr")) @@ -2105,6 +1774,9 @@ def get_name_addr(value): name_addr = NameAddr() # Both the optional display name and the angle-addr can start with cfws. leader = None + if not value: + raise errors.HeaderParseError( + "expected name-addr but found '{}'".format(value)) if value[0] in CFWS_LEADER: leader, value = get_cfws(value) if not value: @@ -2119,7 +1791,10 @@ def get_name_addr(value): raise errors.HeaderParseError( "expected name-addr but found '{}'".format(token)) if leader is not None: - token[0][:0] = [leader] + if isinstance(token[0], TokenList): + token[0][:0] = [leader] + else: + token[:0] = [leader] leader = None name_addr.append(token) token, value = get_angle_addr(value) @@ -2281,7 +1956,7 @@ def get_group(value): if not value: group.defects.append(errors.InvalidHeaderDefect( "end of header in group")) - if value[0] != ';': + elif value[0] != ';': raise errors.HeaderParseError( "expected ';' at end of group but found {}".format(value)) group.append(ValueTerminal(';', 'group-terminator')) @@ -2335,7 +2010,7 @@ def get_address_list(value): try: token, value = get_address(value) address_list.append(token) - except errors.HeaderParseError as err: + except errors.HeaderParseError: leader = None if value[0] in CFWS_LEADER: leader, value = get_cfws(value) @@ -2370,10 +2045,122 @@ def get_address_list(value): address_list.defects.append(errors.InvalidHeaderDefect( "invalid address in address-list")) if value: # Must be a , at this point. - address_list.append(ValueTerminal(',', 'list-separator')) + address_list.append(ListSeparator) value = value[1:] return address_list, value + +def get_no_fold_literal(value): + """ no-fold-literal = "[" *dtext "]" + """ + no_fold_literal = NoFoldLiteral() + if not value: + raise errors.HeaderParseError( + "expected no-fold-literal but found '{}'".format(value)) + if value[0] != '[': + raise errors.HeaderParseError( + "expected '[' at the start of no-fold-literal " + "but found '{}'".format(value)) + no_fold_literal.append(ValueTerminal('[', 'no-fold-literal-start')) + value = value[1:] + token, value = get_dtext(value) + no_fold_literal.append(token) + if not value or value[0] != ']': + raise errors.HeaderParseError( + "expected ']' at the end of no-fold-literal " + "but found '{}'".format(value)) + no_fold_literal.append(ValueTerminal(']', 'no-fold-literal-end')) + return no_fold_literal, value[1:] + +def get_msg_id(value): + """msg-id = [CFWS] "<" id-left '@' id-right ">" [CFWS] + id-left = dot-atom-text / obs-id-left + id-right = dot-atom-text / no-fold-literal / obs-id-right + no-fold-literal = "[" *dtext "]" + """ + msg_id = MsgID() + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + msg_id.append(token) + if not value or value[0] != '<': + raise errors.HeaderParseError( + "expected msg-id but found '{}'".format(value)) + msg_id.append(ValueTerminal('<', 'msg-id-start')) + value = value[1:] + # Parse id-left. + try: + token, value = get_dot_atom_text(value) + except errors.HeaderParseError: + try: + # obs-id-left is same as local-part of add-spec. + token, value = get_obs_local_part(value) + msg_id.defects.append(errors.ObsoleteHeaderDefect( + "obsolete id-left in msg-id")) + except errors.HeaderParseError: + raise errors.HeaderParseError( + "expected dot-atom-text or obs-id-left" + " but found '{}'".format(value)) + msg_id.append(token) + if not value or value[0] != '@': + msg_id.defects.append(errors.InvalidHeaderDefect( + "msg-id with no id-right")) + # Even though there is no id-right, if the local part + # ends with `>` let's just parse it too and return + # along with the defect. + if value and value[0] == '>': + msg_id.append(ValueTerminal('>', 'msg-id-end')) + value = value[1:] + return msg_id, value + msg_id.append(ValueTerminal('@', 'address-at-symbol')) + value = value[1:] + # Parse id-right. + try: + token, value = get_dot_atom_text(value) + except errors.HeaderParseError: + try: + token, value = get_no_fold_literal(value) + except errors.HeaderParseError: + try: + token, value = get_domain(value) + msg_id.defects.append(errors.ObsoleteHeaderDefect( + "obsolete id-right in msg-id")) + except errors.HeaderParseError: + raise errors.HeaderParseError( + "expected dot-atom-text, no-fold-literal or obs-id-right" + " but found '{}'".format(value)) + msg_id.append(token) + if value and value[0] == '>': + value = value[1:] + else: + msg_id.defects.append(errors.InvalidHeaderDefect( + "missing trailing '>' on msg-id")) + msg_id.append(ValueTerminal('>', 'msg-id-end')) + if value and value[0] in CFWS_LEADER: + token, value = get_cfws(value) + msg_id.append(token) + return msg_id, value + + +def parse_message_id(value): + """message-id = "Message-ID:" msg-id CRLF + """ + message_id = MessageID() + try: + token, value = get_msg_id(value) + message_id.append(token) + except errors.HeaderParseError as ex: + token = get_unstructured(value) + message_id = InvalidMessageID(token) + message_id.defects.append( + errors.InvalidHeaderDefect("Invalid msg-id: {!r}".format(ex))) + else: + # Value after parsing a valid msg_id should be None. + if value: + message_id.defects.append(errors.InvalidHeaderDefect( + "Unexpected {!r}".format(value))) + + return message_id + # # XXX: As I begin to add additional header parsers, I'm realizing we probably # have two level of parser routines: the get_XXX methods that get a token in @@ -2615,8 +2402,8 @@ def get_section(value): digits += value[0] value = value[1:] if digits[0] == '0' and digits != '0': - section.defects.append(errors.InvalidHeaderError("section number" - "has an invalid leading 0")) + section.defects.append(errors.InvalidHeaderDefect( + "section number has an invalid leading 0")) section.number = int(digits) section.append(ValueTerminal(digits, 'digits')) return section, value @@ -2679,7 +2466,6 @@ def get_parameter(value): raise errors.HeaderParseError("Parameter not followed by '='") param.append(ValueTerminal('=', 'parameter-separator')) value = value[1:] - leader = None if value and value[0] in CFWS_LEADER: token, value = get_cfws(value) param.append(token) @@ -2754,7 +2540,7 @@ def get_parameter(value): if value[0] != "'": raise errors.HeaderParseError("Expected RFC2231 char/lang encoding " "delimiter, but found {!r}".format(value)) - appendto.append(ValueTerminal("'", 'RFC2231 delimiter')) + appendto.append(ValueTerminal("'", 'RFC2231-delimiter')) value = value[1:] if value and value[0] != "'": token, value = get_attrtext(value) @@ -2763,7 +2549,7 @@ def get_parameter(value): if not value or value[0] != "'": raise errors.HeaderParseError("Expected RFC2231 char/lang encoding " "delimiter, but found {}".format(value)) - appendto.append(ValueTerminal("'", 'RFC2231 delimiter')) + appendto.append(ValueTerminal("'", 'RFC2231-delimiter')) value = value[1:] if remainder is not None: # Treat the rest of value as bare quoted string content. @@ -2771,6 +2557,9 @@ def get_parameter(value): while value: if value[0] in WSP: token, value = get_fws(value) + elif value[0] == '"': + token = ValueTerminal('"', 'DQUOTE') + value = value[1:] else: token, value = get_qcontent(value) v.append(token) @@ -2791,7 +2580,7 @@ def parse_mime_parameters(value): the formal RFC grammar, but it is more convenient for us for the set of parameters to be treated as its own TokenList. - This is 'parse' routine because it consumes the reminaing value, but it + This is 'parse' routine because it consumes the remaining value, but it would never be called to parse a full header. Instead it is called to parse everything after the non-parameter value of a specific MIME header. @@ -2801,7 +2590,7 @@ def parse_mime_parameters(value): try: token, value = get_parameter(value) mime_parameters.append(token) - except errors.HeaderParseError as err: + except errors.HeaderParseError: leader = None if value[0] in CFWS_LEADER: leader, value = get_cfws(value) @@ -2859,7 +2648,6 @@ def parse_content_type_header(value): don't do that. """ ctype = ContentType() - recover = False if not value: ctype.defects.append(errors.HeaderMissingRequiredValue( "Missing content type specification")) @@ -2968,3 +2756,323 @@ def parse_content_transfer_encoding_header(value): token, value = get_phrase(value) cte_header.append(token) return cte_header + + +# +# Header folding +# +# Header folding is complex, with lots of rules and corner cases. The +# following code does its best to obey the rules and handle the corner +# cases, but you can be sure there are few bugs:) +# +# This folder generally canonicalizes as it goes, preferring the stringified +# version of each token. The tokens contain information that supports the +# folder, including which tokens can be encoded in which ways. +# +# Folded text is accumulated in a simple list of strings ('lines'), each +# one of which should be less than policy.max_line_length ('maxlen'). +# + +def _steal_trailing_WSP_if_exists(lines): + wsp = '' + if lines and lines[-1] and lines[-1][-1] in WSP: + wsp = lines[-1][-1] + lines[-1] = lines[-1][:-1] + return wsp + +def _refold_parse_tree(parse_tree, *, policy): + """Return string of contents of parse_tree folded according to RFC rules. + + """ + # max_line_length 0/None means no limit, ie: infinitely long. + maxlen = policy.max_line_length or sys.maxsize + encoding = 'utf-8' if policy.utf8 else 'us-ascii' + lines = [''] # Folded lines to be output + leading_whitespace = '' # When we have whitespace between two encoded + # words, we may need to encode the whitespace + # at the beginning of the second word. + last_ew = None # Points to the last encoded character if there's an ew on + # the line + last_charset = None + wrap_as_ew_blocked = 0 + want_encoding = False # This is set to True if we need to encode this part + end_ew_not_allowed = Terminal('', 'wrap_as_ew_blocked') + parts = list(parse_tree) + while parts: + part = parts.pop(0) + if part is end_ew_not_allowed: + wrap_as_ew_blocked -= 1 + continue + tstr = str(part) + if not want_encoding: + if part.token_type == 'ptext': + # Encode if tstr contains special characters. + want_encoding = not SPECIALSNL.isdisjoint(tstr) + else: + # Encode if tstr contains newlines. + want_encoding = not NLSET.isdisjoint(tstr) + try: + tstr.encode(encoding) + charset = encoding + except UnicodeEncodeError: + if any(isinstance(x, errors.UndecodableBytesDefect) + for x in part.all_defects): + charset = 'unknown-8bit' + else: + # If policy.utf8 is false this should really be taken from a + # 'charset' property on the policy. + charset = 'utf-8' + want_encoding = True + + if part.token_type == 'mime-parameters': + # Mime parameter folding (using RFC2231) is extra special. + _fold_mime_parameters(part, lines, maxlen, encoding) + continue + + if want_encoding and not wrap_as_ew_blocked: + if not part.as_ew_allowed: + want_encoding = False + last_ew = None + if part.syntactic_break: + encoded_part = part.fold(policy=policy)[:-len(policy.linesep)] + if policy.linesep not in encoded_part: + # It fits on a single line + if len(encoded_part) > maxlen - len(lines[-1]): + # But not on this one, so start a new one. + newline = _steal_trailing_WSP_if_exists(lines) + # XXX what if encoded_part has no leading FWS? + lines.append(newline) + lines[-1] += encoded_part + continue + # Either this is not a major syntactic break, so we don't + # want it on a line by itself even if it fits, or it + # doesn't fit on a line by itself. Either way, fall through + # to unpacking the subparts and wrapping them. + if not hasattr(part, 'encode'): + # It's not a Terminal, do each piece individually. + parts = list(part) + parts + want_encoding = False + continue + elif part.as_ew_allowed: + # It's a terminal, wrap it as an encoded word, possibly + # combining it with previously encoded words if allowed. + if (last_ew is not None and + charset != last_charset and + (last_charset == 'unknown-8bit' or + last_charset == 'utf-8' and charset != 'us-ascii')): + last_ew = None + last_ew = _fold_as_ew(tstr, lines, maxlen, last_ew, + part.ew_combine_allowed, charset, leading_whitespace) + # This whitespace has been added to the lines in _fold_as_ew() + # so clear it now. + leading_whitespace = '' + last_charset = charset + want_encoding = False + continue + else: + # It's a terminal which should be kept non-encoded + # (e.g. a ListSeparator). + last_ew = None + want_encoding = False + # fall through + + if len(tstr) <= maxlen - len(lines[-1]): + lines[-1] += tstr + continue + + # This part is too long to fit. The RFC wants us to break at + # "major syntactic breaks", so unless we don't consider this + # to be one, check if it will fit on the next line by itself. + leading_whitespace = '' + if (part.syntactic_break and + len(tstr) + 1 <= maxlen): + newline = _steal_trailing_WSP_if_exists(lines) + if newline or part.startswith_fws(): + # We're going to fold the data onto a new line here. Due to + # the way encoded strings handle continuation lines, we need to + # be prepared to encode any whitespace if the next line turns + # out to start with an encoded word. + lines.append(newline + tstr) + + whitespace_accumulator = [] + for char in lines[-1]: + if char not in WSP: + break + whitespace_accumulator.append(char) + leading_whitespace = ''.join(whitespace_accumulator) + last_ew = None + continue + if not hasattr(part, 'encode'): + # It's not a terminal, try folding the subparts. + newparts = list(part) + if not part.as_ew_allowed: + wrap_as_ew_blocked += 1 + newparts.append(end_ew_not_allowed) + parts = newparts + parts + continue + if part.as_ew_allowed and not wrap_as_ew_blocked: + # It doesn't need CTE encoding, but encode it anyway so we can + # wrap it. + parts.insert(0, part) + want_encoding = True + continue + # We can't figure out how to wrap, it, so give up. + newline = _steal_trailing_WSP_if_exists(lines) + if newline or part.startswith_fws(): + lines.append(newline + tstr) + else: + # We can't fold it onto the next line either... + lines[-1] += tstr + + return policy.linesep.join(lines) + policy.linesep + +def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, leading_whitespace): + """Fold string to_encode into lines as encoded word, combining if allowed. + Return the new value for last_ew, or None if ew_combine_allowed is False. + + If there is already an encoded word in the last line of lines (indicated by + a non-None value for last_ew) and ew_combine_allowed is true, decode the + existing ew, combine it with to_encode, and re-encode. Otherwise, encode + to_encode. In either case, split to_encode as necessary so that the + encoded segments fit within maxlen. + + """ + if last_ew is not None and ew_combine_allowed: + to_encode = str( + get_unstructured(lines[-1][last_ew:] + to_encode)) + lines[-1] = lines[-1][:last_ew] + elif to_encode[0] in WSP: + # We're joining this to non-encoded text, so don't encode + # the leading blank. + leading_wsp = to_encode[0] + to_encode = to_encode[1:] + if (len(lines[-1]) == maxlen): + lines.append(_steal_trailing_WSP_if_exists(lines)) + lines[-1] += leading_wsp + + trailing_wsp = '' + if to_encode[-1] in WSP: + # Likewise for the trailing space. + trailing_wsp = to_encode[-1] + to_encode = to_encode[:-1] + new_last_ew = len(lines[-1]) if last_ew is None else last_ew + + encode_as = 'utf-8' if charset == 'us-ascii' else charset + + # The RFC2047 chrome takes up 7 characters plus the length + # of the charset name. + chrome_len = len(encode_as) + 7 + + if (chrome_len + 1) >= maxlen: + raise errors.HeaderParseError( + "max_line_length is too small to fit an encoded word") + + while to_encode: + remaining_space = maxlen - len(lines[-1]) + text_space = remaining_space - chrome_len - len(leading_whitespace) + if text_space <= 0: + lines.append(' ') + continue + + # If we are at the start of a continuation line, prepend whitespace + # (we only want to do this when the line starts with an encoded word + # but if we're folding in this helper function, then we know that we + # are going to be writing out an encoded word.) + if len(lines) > 1 and len(lines[-1]) == 1 and leading_whitespace: + encoded_word = _ew.encode(leading_whitespace, charset=encode_as) + lines[-1] += encoded_word + leading_whitespace = '' + + to_encode_word = to_encode[:text_space] + encoded_word = _ew.encode(to_encode_word, charset=encode_as) + excess = len(encoded_word) - remaining_space + while excess > 0: + # Since the chunk to encode is guaranteed to fit into less than 100 characters, + # shrinking it by one at a time shouldn't take long. + to_encode_word = to_encode_word[:-1] + encoded_word = _ew.encode(to_encode_word, charset=encode_as) + excess = len(encoded_word) - remaining_space + lines[-1] += encoded_word + to_encode = to_encode[len(to_encode_word):] + leading_whitespace = '' + + if to_encode: + lines.append(' ') + new_last_ew = len(lines[-1]) + lines[-1] += trailing_wsp + return new_last_ew if ew_combine_allowed else None + +def _fold_mime_parameters(part, lines, maxlen, encoding): + """Fold TokenList 'part' into the 'lines' list as mime parameters. + + Using the decoded list of parameters and values, format them according to + the RFC rules, including using RFC2231 encoding if the value cannot be + expressed in 'encoding' and/or the parameter+value is too long to fit + within 'maxlen'. + + """ + # Special case for RFC2231 encoding: start from decoded values and use + # RFC2231 encoding iff needed. + # + # Note that the 1 and 2s being added to the length calculations are + # accounting for the possibly-needed spaces and semicolons we'll be adding. + # + for name, value in part.params: + # XXX What if this ';' puts us over maxlen the first time through the + # loop? We should split the header value onto a newline in that case, + # but to do that we need to recognize the need earlier or reparse the + # header, so I'm going to ignore that bug for now. It'll only put us + # one character over. + if not lines[-1].rstrip().endswith(';'): + lines[-1] += ';' + charset = encoding + error_handler = 'strict' + try: + value.encode(encoding) + encoding_required = False + except UnicodeEncodeError: + encoding_required = True + if utils._has_surrogates(value): + charset = 'unknown-8bit' + error_handler = 'surrogateescape' + else: + charset = 'utf-8' + if encoding_required: + encoded_value = urllib.parse.quote( + value, safe='', errors=error_handler) + tstr = "{}*={}''{}".format(name, charset, encoded_value) + else: + tstr = '{}={}'.format(name, quote_string(value)) + if len(lines[-1]) + len(tstr) + 1 < maxlen: + lines[-1] = lines[-1] + ' ' + tstr + continue + elif len(tstr) + 2 <= maxlen: + lines.append(' ' + tstr) + continue + # We need multiple sections. We are allowed to mix encoded and + # non-encoded sections, but we aren't going to. We'll encode them all. + section = 0 + extra_chrome = charset + "''" + while value: + chrome_len = len(name) + len(str(section)) + 3 + len(extra_chrome) + if maxlen <= chrome_len + 3: + # We need room for the leading blank, the trailing semicolon, + # and at least one character of the value. If we don't + # have that, we'd be stuck, so in that case fall back to + # the RFC standard width. + maxlen = 78 + splitpoint = maxchars = maxlen - chrome_len - 2 + while True: + partial = value[:splitpoint] + encoded_value = urllib.parse.quote( + partial, safe='', errors=error_handler) + if len(encoded_value) <= maxchars: + break + splitpoint -= 1 + lines.append(" {}*{}*={}{}".format( + name, section, extra_chrome, encoded_value)) + extra_chrome = '' + section += 1 + value = value[splitpoint:] + if value: + lines[-1] += ';' diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py index cdfa3729ad..0f1bf8e425 100644 --- a/Lib/email/_parseaddr.py +++ b/Lib/email/_parseaddr.py @@ -13,7 +13,7 @@ 'quote', ] -import time, calendar +import time SPACE = ' ' EMPTYSTRING = '' @@ -65,8 +65,10 @@ def _parsedate_tz(data): """ if not data: - return + return None data = data.split() + if not data: # This happens for whitespace-only input. + return None # The FWS after the comma after the day-of-week is optional, so search and # adjust for this. if data[0].endswith(',') or data[0].lower() in _daynames: @@ -93,6 +95,8 @@ def _parsedate_tz(data): return None data = data[:5] [dd, mm, yy, tm, tz] = data + if not (dd and mm and yy): + return None mm = mm.lower() if mm not in _monthnames: dd, mm = mm, dd.lower() @@ -108,6 +112,8 @@ def _parsedate_tz(data): yy, tm = tm, yy if yy[-1] == ',': yy = yy[:-1] + if not yy: + return None if not yy[0].isdigit(): yy, tz = tz, yy if tm[-1] == ',': @@ -126,6 +132,8 @@ def _parsedate_tz(data): tss = 0 elif len(tm) == 3: [thh, tmm, tss] = tm + else: + return None else: return None try: @@ -186,6 +194,9 @@ def mktime_tz(data): # No zone info, so localtime is better assumption than GMT return time.mktime(data[:8] + (-1,)) else: + # Delay the import, since mktime_tz is rarely used + import calendar + t = calendar.timegm(data) return t - data[9] @@ -379,7 +390,12 @@ def getaddrspec(self): aslist.append('@') self.pos += 1 self.gotonext() - return EMPTYSTRING.join(aslist) + self.getdomain() + domain = self.getdomain() + if not domain: + # Invalid domain, return an empty address instead of returning a + # local part to denote failed parsing. + return EMPTYSTRING + return EMPTYSTRING.join(aslist) + domain def getdomain(self): """Get the complete domain name from an address.""" @@ -394,6 +410,10 @@ def getdomain(self): elif self.field[self.pos] == '.': self.pos += 1 sdlist.append('.') + elif self.field[self.pos] == '@': + # bpo-34155: Don't parse domains with two `@` like + # `a@malicious.org@important.com`. + return EMPTYSTRING elif self.field[self.pos] in self.atomends: break else: diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py index df4649676a..c9f0d74309 100644 --- a/Lib/email/_policybase.py +++ b/Lib/email/_policybase.py @@ -152,11 +152,18 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta): mangle_from_ -- a flag that, when True escapes From_ lines in the body of the message by putting a `>' in front of them. This is used when the message is being - serialized by a generator. Default: True. + serialized by a generator. Default: False. message_factory -- the class to use to create new message objects. If the value is None, the default is Message. + verify_generated_headers + -- if true, the generator verifies that each header + they are properly folded, so that a parser won't + treat it as multiple headers, start-of-body, or + part of another header. + This is a check against custom Header & fold() + implementations. """ raise_on_defect = False @@ -165,6 +172,7 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta): max_line_length = 78 mangle_from_ = False message_factory = None + verify_generated_headers = True def handle_defect(self, obj, defect): """Based on policy, either raise defect or call register_defect. @@ -294,12 +302,12 @@ def header_source_parse(self, sourcelines): """+ The name is parsed as everything up to the ':' and returned unmodified. The value is determined by stripping leading whitespace off the - remainder of the first line, joining all subsequent lines together, and + remainder of the first line joined with all subsequent lines, and stripping any trailing carriage return or linefeed characters. """ name, value = sourcelines[0].split(':', 1) - value = value.lstrip(' \t') + ''.join(sourcelines[1:]) + value = ''.join((value, *sourcelines[1:])).lstrip(' \t\r\n') return (name, value.rstrip('\r\n')) def header_store_parse(self, name, value): @@ -361,8 +369,12 @@ def _fold(self, name, value, sanitize): # Assume it is a Header-like object. h = value if h is not None: - parts.append(h.encode(linesep=self.linesep, - maxlinelen=self.max_line_length)) + # The Header class interprets a value of None for maxlinelen as the + # default value of 78, as recommended by RFC 2822. + maxlinelen = 0 + if self.max_line_length is not None: + maxlinelen = self.max_line_length + parts.append(h.encode(linesep=self.linesep, maxlinelen=maxlinelen)) parts.append(self.linesep) return ''.join(parts) diff --git a/Lib/email/architecture.rst b/Lib/email/architecture.rst index 78572ae63b..fcd10bde13 100644 --- a/Lib/email/architecture.rst +++ b/Lib/email/architecture.rst @@ -66,7 +66,7 @@ data payloads. Message Lifecycle ----------------- -The general lifecyle of a message is: +The general lifecycle of a message is: Creation A `Message` object can be created by a Parser, or it can be diff --git a/Lib/email/base64mime.py b/Lib/email/base64mime.py index 17f0818f6c..4cdf22666e 100644 --- a/Lib/email/base64mime.py +++ b/Lib/email/base64mime.py @@ -45,7 +45,6 @@ MISC_LEN = 7 - # Helpers def header_length(bytearray): """Return the length of s when it is encoded with base64.""" @@ -57,7 +56,6 @@ def header_length(bytearray): return n - def header_encode(header_bytes, charset='iso-8859-1'): """Encode a single header line with Base64 encoding in a given charset. @@ -72,7 +70,6 @@ def header_encode(header_bytes, charset='iso-8859-1'): return '=?%s?b?%s?=' % (charset, encoded) - def body_encode(s, maxlinelen=76, eol=NL): r"""Encode a string with base64. @@ -84,7 +81,7 @@ def body_encode(s, maxlinelen=76, eol=NL): in an email. """ if not s: - return s + return "" encvec = [] max_unencoded = maxlinelen * 3 // 4 @@ -98,7 +95,6 @@ def body_encode(s, maxlinelen=76, eol=NL): return EMPTYSTRING.join(encvec) - def decode(string): """Decode a raw base64 string, returning a bytes object. diff --git a/Lib/email/charset.py b/Lib/email/charset.py index ee564040c6..043801107b 100644 --- a/Lib/email/charset.py +++ b/Lib/email/charset.py @@ -18,7 +18,6 @@ from email.encoders import encode_7or8bit - # Flags for types of header encodings QP = 1 # Quoted-Printable BASE64 = 2 # Base64 @@ -32,7 +31,6 @@ EMPTYSTRING = '' - # Defaults CHARSETS = { # input header enc body enc output conv @@ -104,7 +102,6 @@ } - # Convenience functions for extending the above mappings def add_charset(charset, header_enc=None, body_enc=None, output_charset=None): """Add character set properties to the global registry. @@ -112,8 +109,8 @@ def add_charset(charset, header_enc=None, body_enc=None, output_charset=None): charset is the input character set, and must be the canonical name of a character set. - Optional header_enc and body_enc is either Charset.QP for - quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for + Optional header_enc and body_enc is either charset.QP for + quoted-printable, charset.BASE64 for base64 encoding, charset.SHORTEST for the shortest of qp or base64 encoding, or None for no encoding. SHORTEST is only valid for header_enc. It describes how message headers and message bodies in the input charset are to be encoded. Default is no @@ -153,7 +150,6 @@ def add_codec(charset, codecname): CODEC_MAP[charset] = codecname - # Convenience function for encoding strings, taking into account # that they might be unknown-8bit (ie: have surrogate-escaped bytes) def _encode(string, codec): @@ -163,7 +159,6 @@ def _encode(string, codec): return string.encode(codec) - class Charset: """Map character sets to their email properties. @@ -185,13 +180,13 @@ class Charset: header_encoding: If the character set must be encoded before it can be used in an email header, this attribute will be set to - Charset.QP (for quoted-printable), Charset.BASE64 (for - base64 encoding), or Charset.SHORTEST for the shortest of + charset.QP (for quoted-printable), charset.BASE64 (for + base64 encoding), or charset.SHORTEST for the shortest of QP or BASE64 encoding. Otherwise, it will be None. body_encoding: Same as header_encoding, but describes the encoding for the mail message's body, which indeed may be different than the - header encoding. Charset.SHORTEST is not allowed for + header encoding. charset.SHORTEST is not allowed for body_encoding. output_charset: Some character sets must be converted before they can be @@ -241,11 +236,9 @@ def __init__(self, input_charset=DEFAULT_CHARSET): self.output_codec = CODEC_MAP.get(self.output_charset, self.output_charset) - def __str__(self): + def __repr__(self): return self.input_charset.lower() - __repr__ = __str__ - def __eq__(self, other): return str(self) == str(other).lower() @@ -348,7 +341,6 @@ def header_encode_lines(self, string, maxlengths): if not lines and not current_line: lines.append(None) else: - separator = (' ' if lines else '') joined_line = EMPTYSTRING.join(current_line) header_bytes = _encode(joined_line, codec) lines.append(encoder(header_bytes)) diff --git a/Lib/email/contentmanager.py b/Lib/email/contentmanager.py index b904ded94c..b4f5830bea 100644 --- a/Lib/email/contentmanager.py +++ b/Lib/email/contentmanager.py @@ -72,12 +72,14 @@ def get_non_text_content(msg): return msg.get_payload(decode=True) for maintype in 'audio image video application'.split(): raw_data_manager.add_get_handler(maintype, get_non_text_content) +del maintype def get_message_content(msg): return msg.get_payload(0) for subtype in 'rfc822 external-body'.split(): raw_data_manager.add_get_handler('message/'+subtype, get_message_content) +del subtype def get_and_fixup_unknown_message_content(msg): @@ -144,15 +146,15 @@ def _encode_text(string, charset, cte, policy): linesep = policy.linesep.encode('ascii') def embedded_body(lines): return linesep.join(lines) + linesep def normal_body(lines): return b'\n'.join(lines) + b'\n' - if cte==None: + if cte is None: # Use heuristics to decide on the "best" encoding. - try: - return '7bit', normal_body(lines).decode('ascii') - except UnicodeDecodeError: - pass - if (policy.cte_type == '8bit' and - max(len(x) for x in lines) <= policy.max_line_length): - return '8bit', normal_body(lines).decode('ascii', 'surrogateescape') + if max((len(x) for x in lines), default=0) <= policy.max_line_length: + try: + return '7bit', normal_body(lines).decode('ascii') + except UnicodeDecodeError: + pass + if policy.cte_type == '8bit': + return '8bit', normal_body(lines).decode('ascii', 'surrogateescape') sniff = embedded_body(lines[:10]) sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'), policy.max_line_length) @@ -238,9 +240,7 @@ def set_bytes_content(msg, data, maintype, subtype, cte='base64', data = binascii.b2a_qp(data, istext=False, header=False, quotetabs=True) data = data.decode('ascii') elif cte == '7bit': - # Make sure it really is only ASCII. The early warning here seems - # worth the overhead...if you care write your own content manager :). - data.encode('ascii') + data = data.decode('ascii') elif cte in ('8bit', 'binary'): data = data.decode('ascii', 'surrogateescape') msg.set_payload(data) @@ -248,3 +248,4 @@ def set_bytes_content(msg, data, maintype, subtype, cte='base64', _finalize_set(msg, disposition, filename, cid, params) for typ in (bytes, bytearray, memoryview): raw_data_manager.add_set_handler(typ, set_bytes_content) +del typ diff --git a/Lib/email/encoders.py b/Lib/email/encoders.py index 0a66acb624..17bd1ab7b1 100644 --- a/Lib/email/encoders.py +++ b/Lib/email/encoders.py @@ -16,7 +16,6 @@ from quopri import encodestring as _encodestring - def _qencode(s): enc = _encodestring(s, quotetabs=True) # Must encode spaces, which quopri.encodestring() doesn't do @@ -34,7 +33,6 @@ def encode_base64(msg): msg['Content-Transfer-Encoding'] = 'base64' - def encode_quopri(msg): """Encode the message's payload in quoted-printable. @@ -46,7 +44,6 @@ def encode_quopri(msg): msg['Content-Transfer-Encoding'] = 'quoted-printable' - def encode_7or8bit(msg): """Set the Content-Transfer-Encoding header to 7bit or 8bit.""" orig = msg.get_payload(decode=True) @@ -64,6 +61,5 @@ def encode_7or8bit(msg): msg['Content-Transfer-Encoding'] = '7bit' - def encode_noop(msg): """Do nothing.""" diff --git a/Lib/email/errors.py b/Lib/email/errors.py index 791239fa6a..02aa5eced6 100644 --- a/Lib/email/errors.py +++ b/Lib/email/errors.py @@ -29,6 +29,10 @@ class CharsetError(MessageError): """An illegal charset was given.""" +class HeaderWriteError(MessageError): + """Error while writing headers.""" + + # These are parsing defects which the parser was able to work around. class MessageDefect(ValueError): """Base class for a message defect.""" @@ -73,6 +77,9 @@ class InvalidBase64PaddingDefect(MessageDefect): class InvalidBase64CharactersDefect(MessageDefect): """base64 encoded sequence had characters not in base64 alphabet""" +class InvalidBase64LengthDefect(MessageDefect): + """base64 encoded sequence had invalid length (1 mod 4)""" + # These errors are specific to header parsing. class HeaderDefect(MessageDefect): @@ -105,3 +112,6 @@ class NonASCIILocalPartDefect(HeaderDefect): """local_part contains non-ASCII characters""" # This defect only occurs during unicode parsing, not when # parsing messages decoded from binary. + +class InvalidDateDefect(HeaderDefect): + """Header has unparsable or invalid date""" diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py index 7c07ca8645..06d6b4a3af 100644 --- a/Lib/email/feedparser.py +++ b/Lib/email/feedparser.py @@ -37,11 +37,12 @@ headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])') EMPTYSTRING = '' NL = '\n' +boundaryendRE = re.compile( + r'(?P--)?(?P[ \t]*)(?P\r\n|\r|\n)?$') NeedMoreData = object() - class BufferedSubFile(object): """A file-ish object that can have new data loaded into it. @@ -132,7 +133,6 @@ def __next__(self): return line - class FeedParser: """A feed-style parser of email.""" @@ -189,7 +189,7 @@ def close(self): assert not self._msgstack # Look for final set of defects if root.get_content_maintype() == 'multipart' \ - and not root.is_multipart(): + and not root.is_multipart() and not self._headersonly: defect = errors.MultipartInvariantViolationDefect() self.policy.handle_defect(root, defect) return root @@ -266,7 +266,7 @@ def _parsegen(self): yield NeedMoreData continue break - msg = self._pop_message() + self._pop_message() # We need to pop the EOF matcher in order to tell if we're at # the end of the current file, not the end of the last block # of message headers. @@ -320,7 +320,7 @@ def _parsegen(self): self._cur.set_payload(EMPTYSTRING.join(lines)) return # Make sure a valid content type was specified per RFC 2045:6.4. - if (self._cur.get('content-transfer-encoding', '8bit').lower() + if (str(self._cur.get('content-transfer-encoding', '8bit')).lower() not in ('7bit', '8bit', 'binary')): defect = errors.InvalidMultipartContentTransferEncodingDefect() self.policy.handle_defect(self._cur, defect) @@ -329,9 +329,10 @@ def _parsegen(self): # this onto the input stream until we've scanned past the # preamble. separator = '--' + boundary - boundaryre = re.compile( - '(?P' + re.escape(separator) + - r')(?P--)?(?P[ \t]*)(?P\r\n|\r|\n)?$') + def boundarymatch(line): + if not line.startswith(separator): + return None + return boundaryendRE.match(line, len(separator)) capturing_preamble = True preamble = [] linesep = False @@ -343,7 +344,7 @@ def _parsegen(self): continue if line == '': break - mo = boundaryre.match(line) + mo = boundarymatch(line) if mo: # If we're looking at the end boundary, we're done with # this multipart. If there was a newline at the end of @@ -375,13 +376,13 @@ def _parsegen(self): if line is NeedMoreData: yield NeedMoreData continue - mo = boundaryre.match(line) + mo = boundarymatch(line) if not mo: self._input.unreadline(line) break # Recurse to parse this subpart; the input stream points # at the subpart's first line. - self._input.push_eof_matcher(boundaryre.match) + self._input.push_eof_matcher(boundarymatch) for retval in self._parsegen(): if retval is NeedMoreData: yield NeedMoreData diff --git a/Lib/email/generator.py b/Lib/email/generator.py index ae670c2353..47b9df8f4e 100644 --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -14,15 +14,16 @@ from copy import deepcopy from io import StringIO, BytesIO from email.utils import _has_surrogates +from email.errors import HeaderWriteError UNDERSCORE = '_' NL = '\n' # XXX: no longer used by the code below. NLCRE = re.compile(r'\r\n|\r|\n') fcre = re.compile(r'^From ', re.MULTILINE) +NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]') - class Generator: """Generates output from a Message object tree. @@ -170,7 +171,7 @@ def _write(self, msg): # parameter. # # The way we do this, so as to make the _handle_*() methods simpler, - # is to cache any subpart writes into a buffer. The we write the + # is to cache any subpart writes into a buffer. Then we write the # headers and the buffer contents. That way, subpart handlers can # Do The Right Thing, and can still modify the Content-Type: header if # necessary. @@ -186,7 +187,11 @@ def _write(self, msg): # If we munged the cte, copy the message again and re-fix the CTE. if munge_cte: msg = deepcopy(msg) - msg.replace_header('content-transfer-encoding', munge_cte[0]) + # Preserve the header order if the CTE header already exists. + if msg.get('content-transfer-encoding') is None: + msg['Content-Transfer-Encoding'] = munge_cte[0] + else: + msg.replace_header('content-transfer-encoding', munge_cte[0]) msg.replace_header('content-type', munge_cte[1]) # Write the headers. First we see if the message object wants to # handle that itself. If not, we'll do it generically. @@ -219,7 +224,16 @@ def _dispatch(self, msg): def _write_headers(self, msg): for h, v in msg.raw_items(): - self.write(self.policy.fold(h, v)) + folded = self.policy.fold(h, v) + if self.policy.verify_generated_headers: + linesep = self.policy.linesep + if not folded.endswith(self.policy.linesep): + raise HeaderWriteError( + f'folded header does not end with {linesep!r}: {folded!r}') + if NEWLINE_WITHOUT_FWSP.search(folded.removesuffix(linesep)): + raise HeaderWriteError( + f'folded header contains newline: {folded!r}') + self.write(folded) # A blank line always separates headers from body self.write(self._NL) @@ -240,7 +254,7 @@ def _handle_text(self, msg): # existing message. msg = deepcopy(msg) del msg['content-transfer-encoding'] - msg.set_payload(payload, charset) + msg.set_payload(msg._payload, charset) payload = msg.get_payload() self._munge_cte = (msg['content-transfer-encoding'], msg['content-type']) @@ -388,7 +402,7 @@ def _make_boundary(cls, text=None): def _compile_re(cls, s, flags): return re.compile(s, flags) - + class BytesGenerator(Generator): """Generates a bytes version of a Message object tree. @@ -439,7 +453,6 @@ def _compile_re(cls, s, flags): return re.compile(s.encode('ascii'), flags) - _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' class DecodedGenerator(Generator): @@ -499,7 +512,6 @@ def _dispatch(self, msg): }, file=self) - # Helper used by Generator._make_boundary _width = len(repr(sys.maxsize-1)) _fmt = '%%0%dd' % _width diff --git a/Lib/email/header.py b/Lib/email/header.py index c7b2dd9f31..984851a7d9 100644 --- a/Lib/email/header.py +++ b/Lib/email/header.py @@ -36,11 +36,11 @@ =\? # literal =? (?P[^?]*?) # non-greedy up to the next ? is the charset \? # literal ? - (?P[qb]) # either a "q" or a "b", case insensitive + (?P[qQbB]) # either a "q" or a "b", case insensitive \? # literal ? (?P.*?) # non-greedy up to the next ?= is the encoded string \?= # literal ?= - ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) + ''', re.VERBOSE | re.MULTILINE) # Field name regexp, including trailing colon, but not separating whitespace, # according to RFC 2822. Character range is from tilde to exclamation mark. @@ -52,12 +52,10 @@ _embedded_header = re.compile(r'\n[^ \t]+:') - # Helpers _max_append = email.quoprimime._max_append - def decode_header(header): """Decode a message header value without converting charset. @@ -152,7 +150,6 @@ def decode_header(header): return collapsed - def make_header(decoded_seq, maxlinelen=None, header_name=None, continuation_ws=' '): """Create a Header from a sequence of pairs as returned by decode_header() @@ -175,7 +172,6 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None, return h - class Header: def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None, @@ -409,7 +405,6 @@ def _normalize(self): self._chunks = chunks - class _ValueFormatter: def __init__(self, headerlen, maxlen, continuation_ws, splitchars): self._maxlen = maxlen @@ -431,7 +426,7 @@ def newline(self): if end_of_line != (' ', ''): self._current_line.push(*end_of_line) if len(self._current_line) > 0: - if self._current_line.is_onlyws(): + if self._current_line.is_onlyws() and self._lines: self._lines[-1] += str(self._current_line) else: self._lines.append(str(self._current_line)) diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py index 0fc2231e5c..543141dc42 100644 --- a/Lib/email/headerregistry.py +++ b/Lib/email/headerregistry.py @@ -2,10 +2,6 @@ This module provides an implementation of the HeaderRegistry API. The implementation is designed to flexibly follow RFC5322 rules. - -Eventually HeaderRegistry will be a public API, but it isn't yet, -and will probably change some before that happens. - """ from types import MappingProxyType @@ -31,6 +27,11 @@ def __init__(self, display_name='', username='', domain='', addr_spec=None): without any Content Transfer Encoding. """ + + inputs = ''.join(filter(None, (display_name, username, domain, addr_spec))) + if '\r' in inputs or '\n' in inputs: + raise ValueError("invalid arguments; address parts cannot contain CR or LF") + # This clause with its potential 'raise' may only happen when an # application program creates an Address object using an addr_spec # keyword. The email library code itself must always supply username @@ -69,11 +70,9 @@ def addr_spec(self): """The addr_spec (username@domain) portion of the address, quoted according to RFC 5322 rules, but with no Content Transfer Encoding. """ - nameset = set(self.username) - if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS): - lp = parser.quote_string(self.username) - else: - lp = self.username + lp = self.username + if not parser.DOT_ATOM_ENDS.isdisjoint(lp): + lp = parser.quote_string(lp) if self.domain: return lp + '@' + self.domain if not lp: @@ -86,19 +85,17 @@ def __repr__(self): self.display_name, self.username, self.domain) def __str__(self): - nameset = set(self.display_name) - if len(nameset) > len(nameset-parser.SPECIALS): - disp = parser.quote_string(self.display_name) - else: - disp = self.display_name + disp = self.display_name + if not parser.SPECIALS.isdisjoint(disp): + disp = parser.quote_string(disp) if disp: addr_spec = '' if self.addr_spec=='<>' else self.addr_spec return "{} <{}>".format(disp, addr_spec) return self.addr_spec def __eq__(self, other): - if type(other) != type(self): - return False + if not isinstance(other, Address): + return NotImplemented return (self.display_name == other.display_name and self.username == other.username and self.domain == other.domain) @@ -141,17 +138,15 @@ def __str__(self): if self.display_name is None and len(self.addresses)==1: return str(self.addresses[0]) disp = self.display_name - if disp is not None: - nameset = set(disp) - if len(nameset) > len(nameset-parser.SPECIALS): - disp = parser.quote_string(disp) + if disp is not None and not parser.SPECIALS.isdisjoint(disp): + disp = parser.quote_string(disp) adrstr = ", ".join(str(x) for x in self.addresses) adrstr = ' ' + adrstr if adrstr else adrstr return "{}:{};".format(disp, adrstr) def __eq__(self, other): - if type(other) != type(self): - return False + if not isinstance(other, Group): + return NotImplemented return (self.display_name == other.display_name and self.addresses == other.addresses) @@ -223,7 +218,7 @@ def __reduce__(self): self.__class__.__bases__, str(self), ), - self.__dict__) + self.__getstate__()) @classmethod def _reconstruct(cls, value): @@ -245,13 +240,16 @@ def fold(self, *, policy): the header name and the ': ' separator. """ - # At some point we need to only put fws here if it was in the source. + # At some point we need to put fws here if it was in the source. header = parser.Header([ parser.HeaderLabel([ parser.ValueTerminal(self.name, 'header-name'), parser.ValueTerminal(':', 'header-sep')]), - parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]), - self._parse_tree]) + ]) + if self._parse_tree: + header.append( + parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')])) + header.append(self._parse_tree) return header.fold(policy=policy) @@ -300,7 +298,14 @@ def parse(cls, value, kwds): kwds['parse_tree'] = parser.TokenList() return if isinstance(value, str): - value = utils.parsedate_to_datetime(value) + kwds['decoded'] = value + try: + value = utils.parsedate_to_datetime(value) + except ValueError: + kwds['defects'].append(errors.InvalidDateDefect('Invalid date value or format')) + kwds['datetime'] = None + kwds['parse_tree'] = parser.TokenList() + return kwds['datetime'] = value kwds['decoded'] = utils.format_datetime(kwds['datetime']) kwds['parse_tree'] = cls.value_parser(kwds['decoded']) @@ -369,8 +374,8 @@ def groups(self): @property def addresses(self): if self._addresses is None: - self._addresses = tuple([address for group in self._groups - for address in group.addresses]) + self._addresses = tuple(address for group in self._groups + for address in group.addresses) return self._addresses @@ -517,6 +522,18 @@ def cte(self): return self._cte +class MessageIDHeader: + + max_count = 1 + value_parser = staticmethod(parser.parse_message_id) + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = parse_tree = cls.value_parser(value) + kwds['decoded'] = str(parse_tree) + kwds['defects'].extend(parse_tree.all_defects) + + # The header factory # _default_header_map = { @@ -539,6 +556,7 @@ def cte(self): 'content-type': ContentTypeHeader, 'content-disposition': ContentDispositionHeader, 'content-transfer-encoding': ContentTransferEncodingHeader, + 'message-id': MessageIDHeader, } class HeaderRegistry: diff --git a/Lib/email/iterators.py b/Lib/email/iterators.py index b5502ee975..3410935e38 100644 --- a/Lib/email/iterators.py +++ b/Lib/email/iterators.py @@ -15,7 +15,6 @@ from io import StringIO - # This function will become a method of the Message class def walk(self): """Walk over the message tree, yielding each subpart. @@ -29,7 +28,6 @@ def walk(self): yield from subpart.walk() - # These two functions are imported into the Iterators.py interface module. def body_line_iterator(msg, decode=False): """Iterate over the parts, returning string payloads line-by-line. @@ -55,7 +53,6 @@ def typed_subpart_iterator(msg, maintype='text', subtype=None): yield subpart - def _structure(msg, fp=None, level=0, include_default=False): """A handy debugging aid""" if fp is None: diff --git a/Lib/email/message.py b/Lib/email/message.py index f932186875..46bb8c2194 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -6,6 +6,7 @@ __all__ = ['Message', 'EmailMessage'] +import binascii import re import quopri from io import BytesIO, StringIO @@ -13,7 +14,7 @@ # Intrapackage imports from email import utils from email import errors -from email._policybase import Policy, compat32 +from email._policybase import compat32 from email import charset as _charset from email._encoded_words import decode_b Charset = _charset.Charset @@ -34,7 +35,7 @@ def _splitparam(param): if not sep: return a.strip(), None return a.strip(), b.strip() - + def _formatparam(param, value=None, quote=True): """Convenience function to format and return a key=value pair. @@ -129,7 +130,8 @@ def _decode_uu(encoded): decoded_lines.append(decoded_line) return b''.join(decoded_lines) - + + class Message: """Basic message object. @@ -169,7 +171,7 @@ def as_string(self, unixfrom=False, maxheaderlen=0, policy=None): header. For backward compatibility reasons, if maxheaderlen is not specified it defaults to 0, so you must override it explicitly if you want a different maxheaderlen. 'policy' is passed to the - Generator instance used to serialize the mesasge; if it is not + Generator instance used to serialize the message; if it is not specified the policy associated with the message instance is used. If the message object contains binary data that is not encoded @@ -287,25 +289,26 @@ def get_payload(self, i=None, decode=False): # cte might be a Header, so for now stringify it. cte = str(self.get('content-transfer-encoding', '')).lower() # payload may be bytes here. - if isinstance(payload, str): - if utils._has_surrogates(payload): - bpayload = payload.encode('ascii', 'surrogateescape') - if not decode: + if not decode: + if isinstance(payload, str) and utils._has_surrogates(payload): + try: + bpayload = payload.encode('ascii', 'surrogateescape') try: - payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace') + payload = bpayload.decode(self.get_content_charset('ascii'), 'replace') except LookupError: payload = bpayload.decode('ascii', 'replace') - elif decode: - try: - bpayload = payload.encode('ascii') - except UnicodeError: - # This won't happen for RFC compliant messages (messages - # containing only ASCII code points in the unicode input). - # If it does happen, turn the string into bytes in a way - # guaranteed not to fail. - bpayload = payload.encode('raw-unicode-escape') - if not decode: + except UnicodeEncodeError: + pass return payload + if isinstance(payload, str): + try: + bpayload = payload.encode('ascii', 'surrogateescape') + except UnicodeEncodeError: + # This won't happen for RFC compliant messages (messages + # containing only ASCII code points in the unicode input). + # If it does happen, turn the string into bytes in a way + # guaranteed not to fail. + bpayload = payload.encode('raw-unicode-escape') if cte == 'quoted-printable': return quopri.decodestring(bpayload) elif cte == 'base64': @@ -337,7 +340,7 @@ def set_payload(self, payload, charset=None): return if not isinstance(charset, Charset): charset = Charset(charset) - payload = payload.encode(charset.output_charset) + payload = payload.encode(charset.output_charset, 'surrogateescape') if hasattr(payload, 'decode'): self._payload = payload.decode('ascii', 'surrogateescape') else: @@ -446,7 +449,11 @@ def __delitem__(self, name): self._headers = newheaders def __contains__(self, name): - return name.lower() in [k.lower() for k, v in self._headers] + name_lower = name.lower() + for k, v in self._headers: + if name_lower == k.lower(): + return True + return False def __iter__(self): for field, value in self._headers: @@ -973,7 +980,7 @@ def __init__(self, policy=None): if policy is None: from email.policy import default policy = default - Message.__init__(self, policy) + super().__init__(policy) def as_string(self, unixfrom=False, maxheaderlen=None, policy=None): @@ -983,14 +990,14 @@ def as_string(self, unixfrom=False, maxheaderlen=None, policy=None): header. maxheaderlen is retained for backward compatibility with the base Message class, but defaults to None, meaning that the policy value for max_line_length controls the header maximum length. 'policy' is - passed to the Generator instance used to serialize the mesasge; if it + passed to the Generator instance used to serialize the message; if it is not specified the policy associated with the message instance is used. """ policy = self.policy if policy is None else policy if maxheaderlen is None: maxheaderlen = policy.max_line_length - return super().as_string(maxheaderlen=maxheaderlen, policy=policy) + return super().as_string(unixfrom, maxheaderlen, policy) def __str__(self): return self.as_string(policy=self.policy.clone(utf8=True)) @@ -1007,7 +1014,7 @@ def _find_body(self, part, preferencelist): if subtype in preferencelist: yield (preferencelist.index(subtype), part) return - if maintype != 'multipart': + if maintype != 'multipart' or not self.is_multipart(): return if subtype != 'related': for subpart in part.iter_parts(): @@ -1066,7 +1073,16 @@ def iter_attachments(self): maintype, subtype = self.get_content_type().split('/') if maintype != 'multipart' or subtype == 'alternative': return - parts = self.get_payload().copy() + payload = self.get_payload() + # Certain malformed messages can have content type set to `multipart/*` + # but still have single part body, in which case payload.copy() can + # fail with AttributeError. + try: + parts = payload.copy() + except AttributeError: + # payload is not a list, it is most probably a string. + return + if maintype == 'multipart' and subtype == 'related': # For related, we treat everything but the root as an attachment. # The root may be indicated by 'start'; if there's no start or we @@ -1103,7 +1119,7 @@ def iter_parts(self): Return an empty iterator for a non-multipart. """ - if self.get_content_maintype() == 'multipart': + if self.is_multipart(): yield from self.get_payload() def get_content(self, *args, content_manager=None, **kw): diff --git a/Lib/email/mime/application.py b/Lib/email/mime/application.py index 6877e554e1..f67cbad3f0 100644 --- a/Lib/email/mime/application.py +++ b/Lib/email/mime/application.py @@ -17,7 +17,7 @@ def __init__(self, _data, _subtype='octet-stream', _encoder=encoders.encode_base64, *, policy=None, **_params): """Create an application/* type MIME document. - _data is a string containing the raw application data. + _data contains the bytes for the raw application data. _subtype is the MIME content type subtype, defaulting to 'octet-stream'. diff --git a/Lib/email/mime/audio.py b/Lib/email/mime/audio.py index 4bcd7b224a..aa0c4905cb 100644 --- a/Lib/email/mime/audio.py +++ b/Lib/email/mime/audio.py @@ -6,39 +6,10 @@ __all__ = ['MIMEAudio'] -import sndhdr - -from io import BytesIO from email import encoders from email.mime.nonmultipart import MIMENonMultipart - -_sndhdr_MIMEmap = {'au' : 'basic', - 'wav' :'x-wav', - 'aiff':'x-aiff', - 'aifc':'x-aiff', - } - -# There are others in sndhdr that don't have MIME types. :( -# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma?? -def _whatsnd(data): - """Try to identify a sound file type. - - sndhdr.what() has a pretty cruddy interface, unfortunately. This is why - we re-do it here. It would be easier to reverse engineer the Unix 'file' - command and use the standard 'magic' file, as shipped with a modern Unix. - """ - hdr = data[:512] - fakefile = BytesIO(hdr) - for testfn in sndhdr.tests: - res = testfn(hdr, fakefile) - if res is not None: - return _sndhdr_MIMEmap.get(res[0]) - return None - - - class MIMEAudio(MIMENonMultipart): """Class for generating audio/* MIME documents.""" @@ -46,8 +17,8 @@ def __init__(self, _audiodata, _subtype=None, _encoder=encoders.encode_base64, *, policy=None, **_params): """Create an audio/* type MIME document. - _audiodata is a string containing the raw audio data. If this data - can be decoded by the standard Python `sndhdr' module, then the + _audiodata contains the bytes for the raw audio data. If this data + can be decoded as au, wav, aiff, or aifc, then the subtype will be automatically included in the Content-Type header. Otherwise, you can specify the specific audio subtype via the _subtype parameter. If _subtype is not given, and no subtype can be @@ -65,10 +36,62 @@ def __init__(self, _audiodata, _subtype=None, header. """ if _subtype is None: - _subtype = _whatsnd(_audiodata) + _subtype = _what(_audiodata) if _subtype is None: raise TypeError('Could not find audio MIME subtype') MIMENonMultipart.__init__(self, 'audio', _subtype, policy=policy, **_params) self.set_payload(_audiodata) _encoder(self) + + +_rules = [] + + +# Originally from the sndhdr module. +# +# There are others in sndhdr that don't have MIME types. :( +# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma?? +def _what(data): + # Try to identify a sound file type. + # + # sndhdr.what() had a pretty cruddy interface, unfortunately. This is why + # we re-do it here. It would be easier to reverse engineer the Unix 'file' + # command and use the standard 'magic' file, as shipped with a modern Unix. + for testfn in _rules: + if res := testfn(data): + return res + else: + return None + + +def rule(rulefunc): + _rules.append(rulefunc) + return rulefunc + + +@rule +def _aiff(h): + if not h.startswith(b'FORM'): + return None + if h[8:12] in {b'AIFC', b'AIFF'}: + return 'x-aiff' + else: + return None + + +@rule +def _au(h): + if h.startswith(b'.snd'): + return 'basic' + else: + return None + + +@rule +def _wav(h): + # 'RIFF' 'WAVE' 'fmt ' + if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ': + return None + else: + return "x-wav" diff --git a/Lib/email/mime/base.py b/Lib/email/mime/base.py index 1a3f9b51f6..f601f621ce 100644 --- a/Lib/email/mime/base.py +++ b/Lib/email/mime/base.py @@ -11,7 +11,6 @@ from email import message - class MIMEBase(message.Message): """Base class for MIME specializations.""" diff --git a/Lib/email/mime/image.py b/Lib/email/mime/image.py index 92724643cd..4b7f2f9cba 100644 --- a/Lib/email/mime/image.py +++ b/Lib/email/mime/image.py @@ -6,13 +6,10 @@ __all__ = ['MIMEImage'] -import imghdr - from email import encoders from email.mime.nonmultipart import MIMENonMultipart - class MIMEImage(MIMENonMultipart): """Class for generating image/* type MIME documents.""" @@ -20,11 +17,11 @@ def __init__(self, _imagedata, _subtype=None, _encoder=encoders.encode_base64, *, policy=None, **_params): """Create an image/* type MIME document. - _imagedata is a string containing the raw image data. If this data - can be decoded by the standard Python `imghdr' module, then the - subtype will be automatically included in the Content-Type header. - Otherwise, you can specify the specific image subtype via the _subtype - parameter. + _imagedata contains the bytes for the raw image data. If the data + type can be detected (jpeg, png, gif, tiff, rgb, pbm, pgm, ppm, + rast, xbm, bmp, webp, and exr attempted), then the subtype will be + automatically included in the Content-Type header. Otherwise, you can + specify the specific image subtype via the _subtype parameter. _encoder is a function which will perform the actual encoding for transport of the image data. It takes one argument, which is this @@ -37,11 +34,119 @@ def __init__(self, _imagedata, _subtype=None, constructor, which turns them into parameters on the Content-Type header. """ - if _subtype is None: - _subtype = imghdr.what(None, _imagedata) + _subtype = _what(_imagedata) if _subtype is None else _subtype if _subtype is None: raise TypeError('Could not guess image MIME subtype') MIMENonMultipart.__init__(self, 'image', _subtype, policy=policy, **_params) self.set_payload(_imagedata) _encoder(self) + + +_rules = [] + + +# Originally from the imghdr module. +def _what(data): + for rule in _rules: + if res := rule(data): + return res + else: + return None + + +def rule(rulefunc): + _rules.append(rulefunc) + return rulefunc + + +@rule +def _jpeg(h): + """JPEG data with JFIF or Exif markers; and raw JPEG""" + if h[6:10] in (b'JFIF', b'Exif'): + return 'jpeg' + elif h[:4] == b'\xff\xd8\xff\xdb': + return 'jpeg' + + +@rule +def _png(h): + if h.startswith(b'\211PNG\r\n\032\n'): + return 'png' + + +@rule +def _gif(h): + """GIF ('87 and '89 variants)""" + if h[:6] in (b'GIF87a', b'GIF89a'): + return 'gif' + + +@rule +def _tiff(h): + """TIFF (can be in Motorola or Intel byte order)""" + if h[:2] in (b'MM', b'II'): + return 'tiff' + + +@rule +def _rgb(h): + """SGI image library""" + if h.startswith(b'\001\332'): + return 'rgb' + + +@rule +def _pbm(h): + """PBM (portable bitmap)""" + if len(h) >= 3 and \ + h[0] == ord(b'P') and h[1] in b'14' and h[2] in b' \t\n\r': + return 'pbm' + + +@rule +def _pgm(h): + """PGM (portable graymap)""" + if len(h) >= 3 and \ + h[0] == ord(b'P') and h[1] in b'25' and h[2] in b' \t\n\r': + return 'pgm' + + +@rule +def _ppm(h): + """PPM (portable pixmap)""" + if len(h) >= 3 and \ + h[0] == ord(b'P') and h[1] in b'36' and h[2] in b' \t\n\r': + return 'ppm' + + +@rule +def _rast(h): + """Sun raster file""" + if h.startswith(b'\x59\xA6\x6A\x95'): + return 'rast' + + +@rule +def _xbm(h): + """X bitmap (X10 or X11)""" + if h.startswith(b'#define '): + return 'xbm' + + +@rule +def _bmp(h): + if h.startswith(b'BM'): + return 'bmp' + + +@rule +def _webp(h): + if h.startswith(b'RIFF') and h[8:12] == b'WEBP': + return 'webp' + + +@rule +def _exr(h): + if h.startswith(b'\x76\x2f\x31\x01'): + return 'exr' diff --git a/Lib/email/mime/message.py b/Lib/email/mime/message.py index 07e4f2d119..61836b5a78 100644 --- a/Lib/email/mime/message.py +++ b/Lib/email/mime/message.py @@ -10,7 +10,6 @@ from email.mime.nonmultipart import MIMENonMultipart - class MIMEMessage(MIMENonMultipart): """Class representing message/* MIME documents.""" diff --git a/Lib/email/mime/multipart.py b/Lib/email/mime/multipart.py index 2d3f288810..94d81c771a 100644 --- a/Lib/email/mime/multipart.py +++ b/Lib/email/mime/multipart.py @@ -9,7 +9,6 @@ from email.mime.base import MIMEBase - class MIMEMultipart(MIMEBase): """Base class for MIME multipart/* type messages.""" diff --git a/Lib/email/mime/nonmultipart.py b/Lib/email/mime/nonmultipart.py index e1f51968b5..a41386eb14 100644 --- a/Lib/email/mime/nonmultipart.py +++ b/Lib/email/mime/nonmultipart.py @@ -10,7 +10,6 @@ from email.mime.base import MIMEBase - class MIMENonMultipart(MIMEBase): """Base class for MIME non-multipart type messages.""" diff --git a/Lib/email/mime/text.py b/Lib/email/mime/text.py index 35b4423830..7672b78913 100644 --- a/Lib/email/mime/text.py +++ b/Lib/email/mime/text.py @@ -6,11 +6,9 @@ __all__ = ['MIMEText'] -from email.charset import Charset from email.mime.nonmultipart import MIMENonMultipart - class MIMEText(MIMENonMultipart): """Class for generating text/* type MIME documents.""" @@ -37,6 +35,6 @@ def __init__(self, _text, _subtype='plain', _charset=None, *, policy=None): _charset = 'utf-8' MIMENonMultipart.__init__(self, 'text', _subtype, policy=policy, - **{'charset': str(_charset)}) + charset=str(_charset)) self.set_payload(_text, _charset) diff --git a/Lib/email/parser.py b/Lib/email/parser.py index 555b172560..06d99b17f2 100644 --- a/Lib/email/parser.py +++ b/Lib/email/parser.py @@ -13,7 +13,6 @@ from email._policybase import compat32 - class Parser: def __init__(self, _class=None, *, policy=compat32): """Parser of RFC 2822 and MIME email messages. @@ -50,10 +49,7 @@ def parse(self, fp, headersonly=False): feedparser = FeedParser(self._class, policy=self.policy) if headersonly: feedparser._set_headersonly() - while True: - data = fp.read(8192) - if not data: - break + while data := fp.read(8192): feedparser.feed(data) return feedparser.close() @@ -68,7 +64,6 @@ def parsestr(self, text, headersonly=False): return self.parse(StringIO(text), headersonly=headersonly) - class HeaderParser(Parser): def parse(self, fp, headersonly=True): return Parser.parse(self, fp, True) @@ -76,7 +71,7 @@ def parse(self, fp, headersonly=True): def parsestr(self, text, headersonly=True): return Parser.parsestr(self, text, True) - + class BytesParser: def __init__(self, *args, **kw): diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 5131311ac5..6e109b6501 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -3,6 +3,7 @@ """ import re +import sys from email._policybase import Policy, Compat32, compat32, _extend_docstrings from email.utils import _has_surrogates from email.headerregistry import HeaderRegistry as HeaderRegistry @@ -20,7 +21,7 @@ 'HTTP', ] -linesep_splitter = re.compile(r'\n|\r') +linesep_splitter = re.compile(r'\n|\r\n?') @_extend_docstrings class EmailPolicy(Policy): @@ -118,13 +119,13 @@ def header_source_parse(self, sourcelines): """+ The name is parsed as everything up to the ':' and returned unmodified. The value is determined by stripping leading whitespace off the - remainder of the first line, joining all subsequent lines together, and + remainder of the first line joined with all subsequent lines, and stripping any trailing carriage return or linefeed characters. (This is the same as Compat32). """ name, value = sourcelines[0].split(':', 1) - value = value.lstrip(' \t') + ''.join(sourcelines[1:]) + value = ''.join((value, *sourcelines[1:])).lstrip(' \t\r\n') return (name, value.rstrip('\r\n')) def header_store_parse(self, name, value): @@ -203,14 +204,22 @@ def fold_binary(self, name, value): def _fold(self, name, value, refold_binary=False): if hasattr(value, 'name'): return value.fold(policy=self) - maxlen = self.max_line_length if self.max_line_length else float('inf') - lines = value.splitlines() + maxlen = self.max_line_length if self.max_line_length else sys.maxsize + # We can't use splitlines here because it splits on more than \r and \n. + lines = linesep_splitter.split(value) refold = (self.refold_source == 'all' or self.refold_source == 'long' and (lines and len(lines[0])+len(name)+2 > maxlen or any(len(x) > maxlen for x in lines[1:]))) - if refold or refold_binary and _has_surrogates(value): + + if not refold: + if not self.utf8: + refold = not value.isascii() + elif refold_binary: + refold = _has_surrogates(value) + if refold: return self.header_factory(name, ''.join(lines)).fold(policy=self) + return name + ': ' + self.linesep.join(lines) + self.linesep diff --git a/Lib/email/quoprimime.py b/Lib/email/quoprimime.py index c543eb59ae..27fcbb5a26 100644 --- a/Lib/email/quoprimime.py +++ b/Lib/email/quoprimime.py @@ -148,6 +148,7 @@ def header_encode(header_bytes, charset='iso-8859-1'): _QUOPRI_BODY_ENCODE_MAP = _QUOPRI_BODY_MAP[:] for c in b'\r\n': _QUOPRI_BODY_ENCODE_MAP[c] = chr(c) +del c def body_encode(body, maxlinelen=76, eol=NL): """Encode with quoted-printable, wrapping at maxlinelen characters. @@ -173,7 +174,7 @@ def body_encode(body, maxlinelen=76, eol=NL): if not body: return body - # quote speacial characters + # quote special characters body = body.translate(_QUOPRI_BODY_ENCODE_MAP) soft_break = '=' + eol diff --git a/Lib/email/utils.py b/Lib/email/utils.py index a759d23308..e42674fa4f 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -25,8 +25,6 @@ import os import re import time -import random -import socket import datetime import urllib.parse @@ -36,9 +34,6 @@ from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz -# Intrapackage imports -from email.charset import Charset - COMMASPACE = ', ' EMPTYSTRING = '' UEMPTYSTRING = '' @@ -48,11 +43,12 @@ specialsre = re.compile(r'[][\\()<>@,:;".]') escapesre = re.compile(r'[\\"]') + def _has_surrogates(s): - """Return True if s contains surrogate-escaped binary data.""" + """Return True if s may contain surrogate-escaped binary data.""" # This check is based on the fact that unless there are surrogates, utf8 # (Python's default encoding) can encode any string. This is the fastest - # way to check for surrogates, see issue 11454 for timings. + # way to check for surrogates, see bpo-11454 (moved to gh-55663) for timings. try: s.encode() return False @@ -81,7 +77,7 @@ def formataddr(pair, charset='utf-8'): If the first element of pair is false, then the second element is returned unmodified. - Optional charset if given is the character set that is used to encode + The optional charset is the character set that is used to encode realname in case realname is not ASCII safe. Can be an instance of str or a Charset-like object which has a header_encode method. Default is 'utf-8'. @@ -94,6 +90,8 @@ def formataddr(pair, charset='utf-8'): name.encode('ascii') except UnicodeEncodeError: if isinstance(charset, str): + # lazy import to improve module import time + from email.charset import Charset charset = Charset(charset) encoded_name = charset.header_encode(name) return "%s <%s>" % (encoded_name, address) @@ -106,24 +104,127 @@ def formataddr(pair, charset='utf-8'): return address +def _iter_escaped_chars(addr): + pos = 0 + escape = False + for pos, ch in enumerate(addr): + if escape: + yield (pos, '\\' + ch) + escape = False + elif ch == '\\': + escape = True + else: + yield (pos, ch) + if escape: + yield (pos, '\\') + + +def _strip_quoted_realnames(addr): + """Strip real names between quotes.""" + if '"' not in addr: + # Fast path + return addr + + start = 0 + open_pos = None + result = [] + for pos, ch in _iter_escaped_chars(addr): + if ch == '"': + if open_pos is None: + open_pos = pos + else: + if start != open_pos: + result.append(addr[start:open_pos]) + start = pos + 1 + open_pos = None + + if start < len(addr): + result.append(addr[start:]) + + return ''.join(result) -def getaddresses(fieldvalues): - """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" - all = COMMASPACE.join(fieldvalues) - a = _AddressList(all) - return a.addresslist +supports_strict_parsing = True +def getaddresses(fieldvalues, *, strict=True): + """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. -ecre = re.compile(r''' - =\? # literal =? - (?P[^?]*?) # non-greedy up to the next ? is the charset - \? # literal ? - (?P[qb]) # either a "q" or a "b", case insensitive - \? # literal ? - (?P.*?) # non-greedy up to the next ?= is the atom - \?= # literal ?= - ''', re.VERBOSE | re.IGNORECASE) + When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in + its place. + + If strict is true, use a strict parser which rejects malformed inputs. + """ + + # If strict is true, if the resulting list of parsed addresses is greater + # than the number of fieldvalues in the input list, a parsing error has + # occurred and consequently a list containing a single empty 2-tuple [('', + # '')] is returned in its place. This is done to avoid invalid output. + # + # Malformed input: getaddresses(['alice@example.com ']) + # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] + # Safe output: [('', '')] + + if not strict: + all = COMMASPACE.join(str(v) for v in fieldvalues) + a = _AddressList(all) + return a.addresslist + + fieldvalues = [str(v) for v in fieldvalues] + fieldvalues = _pre_parse_validation(fieldvalues) + addr = COMMASPACE.join(fieldvalues) + a = _AddressList(addr) + result = _post_parse_validation(a.addresslist) + + # Treat output as invalid if the number of addresses is not equal to the + # expected number of addresses. + n = 0 + for v in fieldvalues: + # When a comma is used in the Real Name part it is not a deliminator. + # So strip those out before counting the commas. + v = _strip_quoted_realnames(v) + # Expected number of addresses: 1 + number of commas + n += 1 + v.count(',') + if len(result) != n: + return [('', '')] + + return result + + +def _check_parenthesis(addr): + # Ignore parenthesis in quoted real names. + addr = _strip_quoted_realnames(addr) + + opens = 0 + for pos, ch in _iter_escaped_chars(addr): + if ch == '(': + opens += 1 + elif ch == ')': + opens -= 1 + if opens < 0: + return False + return (opens == 0) + + +def _pre_parse_validation(email_header_fields): + accepted_values = [] + for v in email_header_fields: + if not _check_parenthesis(v): + v = "('', '')" + accepted_values.append(v) + + return accepted_values + + +def _post_parse_validation(parsed_email_header_tuples): + accepted_values = [] + # The parser would have parsed a correctly formatted domain-literal + # The existence of an [ after parsing indicates a parsing failure + for v in parsed_email_header_tuples: + if '[' in v[1]: + v = ('', '') + accepted_values.append(v) + + return accepted_values def _format_timetuple_and_zone(timetuple, zone): @@ -140,7 +241,7 @@ def formatdate(timeval=None, localtime=False, usegmt=False): Fri, 09 Nov 2001 01:08:47 -0000 - Optional timeval if given is a floating point time value as accepted by + Optional timeval if given is a floating-point time value as accepted by gmtime() and localtime(), otherwise the current time is used. Optional localtime is a flag that when True, interprets timeval, and @@ -155,13 +256,13 @@ def formatdate(timeval=None, localtime=False, usegmt=False): # 2822 requires that day and month names be the English abbreviations. if timeval is None: timeval = time.time() - if localtime or usegmt: - dt = datetime.datetime.fromtimestamp(timeval, datetime.timezone.utc) - else: - dt = datetime.datetime.utcfromtimestamp(timeval) + dt = datetime.datetime.fromtimestamp(timeval, datetime.timezone.utc) + if localtime: dt = dt.astimezone() usegmt = False + elif not usegmt: + dt = dt.replace(tzinfo=None) return format_datetime(dt, usegmt) def format_datetime(dt, usegmt=False): @@ -193,6 +294,11 @@ def make_msgid(idstring=None, domain=None): portion of the message id after the '@'. It defaults to the locally defined hostname. """ + # Lazy imports to speedup module import time + # (no other functions in email.utils need these modules) + import random + import socket + timeval = int(time.time()*100) pid = os.getpid() randint = random.getrandbits(64) @@ -207,17 +313,43 @@ def make_msgid(idstring=None, domain=None): def parsedate_to_datetime(data): - *dtuple, tz = _parsedate_tz(data) + parsed_date_tz = _parsedate_tz(data) + if parsed_date_tz is None: + raise ValueError('Invalid date value or format "%s"' % str(data)) + *dtuple, tz = parsed_date_tz if tz is None: return datetime.datetime(*dtuple[:6]) return datetime.datetime(*dtuple[:6], tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) -def parseaddr(addr): - addrs = _AddressList(addr).addresslist - if not addrs: - return '', '' +def parseaddr(addr, *, strict=True): + """ + Parse addr into its constituent realname and email address parts. + + Return a tuple of realname and email address, unless the parse fails, in + which case return a 2-tuple of ('', ''). + + If strict is True, use a strict parser which rejects malformed inputs. + """ + if not strict: + addrs = _AddressList(addr).addresslist + if not addrs: + return ('', '') + return addrs[0] + + if isinstance(addr, list): + addr = addr[0] + + if not isinstance(addr, str): + return ('', '') + + addr = _pre_parse_validation([addr])[0] + addrs = _post_parse_validation(_AddressList(addr).addresslist) + + if not addrs or len(addrs) > 1: + return ('', '') + return addrs[0] @@ -265,21 +397,13 @@ def decode_params(params): params is a sequence of 2-tuples containing (param name, string value). """ - # Copy params so we don't mess with the original - params = params[:] - new_params = [] + new_params = [params[0]] # Map parameter's name to a list of continuations. The values are a # 3-tuple of the continuation number, the string value, and a flag # specifying whether a particular segment is %-encoded. rfc2231_params = {} - name, value = params.pop(0) - new_params.append((name, value)) - while params: - name, value = params.pop(0) - if name.endswith('*'): - encoded = True - else: - encoded = False + for name, value in params[1:]: + encoded = name.endswith('*') value = unquote(value) mo = rfc2231_continuation.match(name) if mo: @@ -342,41 +466,23 @@ def collapse_rfc2231_value(value, errors='replace', # better than not having it. # -def localtime(dt=None, isdst=-1): +def localtime(dt=None, isdst=None): """Return local time as an aware datetime object. If called without arguments, return current time. Otherwise *dt* argument should be a datetime instance, and it is converted to the local time zone according to the system time zone database. If *dt* is naive (that is, dt.tzinfo is None), it is assumed to be in local time. - In this case, a positive or zero value for *isdst* causes localtime to - presume initially that summer time (for example, Daylight Saving Time) - is or is not (respectively) in effect for the specified time. A - negative value for *isdst* causes the localtime() function to attempt - to divine whether summer time is in effect for the specified time. + The isdst parameter is ignored. """ + if isdst is not None: + import warnings + warnings._deprecated( + "The 'isdst' parameter to 'localtime'", + message='{name} is deprecated and slated for removal in Python {remove}', + remove=(3, 14), + ) if dt is None: - return datetime.datetime.now(datetime.timezone.utc).astimezone() - if dt.tzinfo is not None: - return dt.astimezone() - # We have a naive datetime. Convert to a (localtime) timetuple and pass to - # system mktime together with the isdst hint. System mktime will return - # seconds since epoch. - tm = dt.timetuple()[:-1] + (isdst,) - seconds = time.mktime(tm) - localtm = time.localtime(seconds) - try: - delta = datetime.timedelta(seconds=localtm.tm_gmtoff) - tz = datetime.timezone(delta, localtm.tm_zone) - except AttributeError: - # Compute UTC offset and compare with the value implied by tm_isdst. - # If the values match, use the zone name implied by tm_isdst. - delta = dt - datetime.datetime(*time.gmtime(seconds)[:6]) - dst = time.daylight and localtm.tm_isdst > 0 - gmtoff = -(time.altzone if dst else time.timezone) - if delta == datetime.timedelta(seconds=gmtoff): - tz = datetime.timezone(delta, time.tzname[dst]) - else: - tz = datetime.timezone(delta) - return dt.replace(tzinfo=tz) + dt = datetime.datetime.now() + return dt.astimezone() From f1d45ee5a7a0f3b6c9f91d572e770ddf1a1fb106 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Thu, 3 Apr 2025 10:05:38 -0700 Subject: [PATCH 005/126] remove unused deprecated libraries --- Lib/aifc.py | 984 -------------------------------------------------- Lib/imghdr.py | 175 --------- Lib/sndhdr.py | 257 ------------- Lib/sunau.py | 531 --------------------------- 4 files changed, 1947 deletions(-) delete mode 100644 Lib/aifc.py delete mode 100644 Lib/imghdr.py delete mode 100644 Lib/sndhdr.py delete mode 100644 Lib/sunau.py diff --git a/Lib/aifc.py b/Lib/aifc.py deleted file mode 100644 index 5254987e22..0000000000 --- a/Lib/aifc.py +++ /dev/null @@ -1,984 +0,0 @@ -"""Stuff to parse AIFF-C and AIFF files. - -Unless explicitly stated otherwise, the description below is true -both for AIFF-C files and AIFF files. - -An AIFF-C file has the following structure. - - +-----------------+ - | FORM | - +-----------------+ - | | - +----+------------+ - | | AIFC | - | +------------+ - | | | - | | . | - | | . | - | | . | - +----+------------+ - -An AIFF file has the string "AIFF" instead of "AIFC". - -A chunk consists of an identifier (4 bytes) followed by a size (4 bytes, -big endian order), followed by the data. The size field does not include -the size of the 8 byte header. - -The following chunk types are recognized. - - FVER - (AIFF-C only). - MARK - <# of markers> (2 bytes) - list of markers: - (2 bytes, must be > 0) - (4 bytes) - ("pstring") - COMM - <# of channels> (2 bytes) - <# of sound frames> (4 bytes) - (2 bytes) - (10 bytes, IEEE 80-bit extended - floating point) - in AIFF-C files only: - (4 bytes) - ("pstring") - SSND - (4 bytes, not used by this program) - (4 bytes, not used by this program) - - -A pstring consists of 1 byte length, a string of characters, and 0 or 1 -byte pad to make the total length even. - -Usage. - -Reading AIFF files: - f = aifc.open(file, 'r') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods read(), seek(), and close(). -In some types of audio files, if the setpos() method is not used, -the seek() method is not necessary. - -This returns an instance of a class with the following public methods: - getnchannels() -- returns number of audio channels (1 for - mono, 2 for stereo) - getsampwidth() -- returns sample width in bytes - getframerate() -- returns sampling frequency - getnframes() -- returns number of audio frames - getcomptype() -- returns compression type ('NONE' for AIFF files) - getcompname() -- returns human-readable version of - compression type ('not compressed' for AIFF files) - getparams() -- returns a namedtuple consisting of all of the - above in the above order - getmarkers() -- get the list of marks in the audio file or None - if there are no marks - getmark(id) -- get mark with the specified id (raises an error - if the mark does not exist) - readframes(n) -- returns at most n frames of audio - rewind() -- rewind to the beginning of the audio stream - setpos(pos) -- seek to the specified position - tell() -- return the current position - close() -- close the instance (make it unusable) -The position returned by tell(), the position given to setpos() and -the position of marks are all compatible and have nothing to do with -the actual position in the file. -The close() method is called automatically when the class instance -is destroyed. - -Writing AIFF files: - f = aifc.open(file, 'w') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods write(), tell(), seek(), and -close(). - -This returns an instance of a class with the following public methods: - aiff() -- create an AIFF file (AIFF-C default) - aifc() -- create an AIFF-C file - setnchannels(n) -- set the number of channels - setsampwidth(n) -- set the sample width - setframerate(n) -- set the frame rate - setnframes(n) -- set the number of frames - setcomptype(type, name) - -- set the compression type and the - human-readable compression type - setparams(tuple) - -- set all parameters at once - setmark(id, pos, name) - -- add specified mark to the list of marks - tell() -- return current position in output file (useful - in combination with setmark()) - writeframesraw(data) - -- write audio frames without pathing up the - file header - writeframes(data) - -- write audio frames and patch up the file header - close() -- patch up the file header and close the - output file -You should set the parameters before the first writeframesraw or -writeframes. The total number of frames does not need to be set, -but when it is set to the correct value, the header does not have to -be patched up. -It is best to first set all parameters, perhaps possibly the -compression type, and then write audio frames using writeframesraw. -When all frames have been written, either call writeframes(b'') or -close() to patch up the sizes in the header. -Marks can be added anytime. If there are any marks, you must call -close() after all frames have been written. -The close() method is called automatically when the class instance -is destroyed. - -When a file is opened with the extension '.aiff', an AIFF file is -written, otherwise an AIFF-C file is written. This default can be -changed by calling aiff() or aifc() before the first writeframes or -writeframesraw. -""" - -import struct -import builtins -import warnings - -__all__ = ["Error", "open"] - - -warnings._deprecated(__name__, remove=(3, 13)) - - -class Error(Exception): - pass - -_AIFC_version = 0xA2805140 # Version 1 of AIFF-C - -def _read_long(file): - try: - return struct.unpack('>l', file.read(4))[0] - except struct.error: - raise EOFError from None - -def _read_ulong(file): - try: - return struct.unpack('>L', file.read(4))[0] - except struct.error: - raise EOFError from None - -def _read_short(file): - try: - return struct.unpack('>h', file.read(2))[0] - except struct.error: - raise EOFError from None - -def _read_ushort(file): - try: - return struct.unpack('>H', file.read(2))[0] - except struct.error: - raise EOFError from None - -def _read_string(file): - length = ord(file.read(1)) - if length == 0: - data = b'' - else: - data = file.read(length) - if length & 1 == 0: - dummy = file.read(1) - return data - -_HUGE_VAL = 1.79769313486231e+308 # See - -def _read_float(f): # 10 bytes - expon = _read_short(f) # 2 bytes - sign = 1 - if expon < 0: - sign = -1 - expon = expon + 0x8000 - himant = _read_ulong(f) # 4 bytes - lomant = _read_ulong(f) # 4 bytes - if expon == himant == lomant == 0: - f = 0.0 - elif expon == 0x7FFF: - f = _HUGE_VAL - else: - expon = expon - 16383 - f = (himant * 0x100000000 + lomant) * pow(2.0, expon - 63) - return sign * f - -def _write_short(f, x): - f.write(struct.pack('>h', x)) - -def _write_ushort(f, x): - f.write(struct.pack('>H', x)) - -def _write_long(f, x): - f.write(struct.pack('>l', x)) - -def _write_ulong(f, x): - f.write(struct.pack('>L', x)) - -def _write_string(f, s): - if len(s) > 255: - raise ValueError("string exceeds maximum pstring length") - f.write(struct.pack('B', len(s))) - f.write(s) - if len(s) & 1 == 0: - f.write(b'\x00') - -def _write_float(f, x): - import math - if x < 0: - sign = 0x8000 - x = x * -1 - else: - sign = 0 - if x == 0: - expon = 0 - himant = 0 - lomant = 0 - else: - fmant, expon = math.frexp(x) - if expon > 16384 or fmant >= 1 or fmant != fmant: # Infinity or NaN - expon = sign|0x7FFF - himant = 0 - lomant = 0 - else: # Finite - expon = expon + 16382 - if expon < 0: # denormalized - fmant = math.ldexp(fmant, expon) - expon = 0 - expon = expon | sign - fmant = math.ldexp(fmant, 32) - fsmant = math.floor(fmant) - himant = int(fsmant) - fmant = math.ldexp(fmant - fsmant, 32) - fsmant = math.floor(fmant) - lomant = int(fsmant) - _write_ushort(f, expon) - _write_ulong(f, himant) - _write_ulong(f, lomant) - -with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - from chunk import Chunk -from collections import namedtuple - -_aifc_params = namedtuple('_aifc_params', - 'nchannels sampwidth framerate nframes comptype compname') - -_aifc_params.nchannels.__doc__ = 'Number of audio channels (1 for mono, 2 for stereo)' -_aifc_params.sampwidth.__doc__ = 'Sample width in bytes' -_aifc_params.framerate.__doc__ = 'Sampling frequency' -_aifc_params.nframes.__doc__ = 'Number of audio frames' -_aifc_params.comptype.__doc__ = 'Compression type ("NONE" for AIFF files)' -_aifc_params.compname.__doc__ = ("""\ -A human-readable version of the compression type -('not compressed' for AIFF files)""") - - -class Aifc_read: - # Variables used in this class: - # - # These variables are available to the user though appropriate - # methods of this class: - # _file -- the open file with methods read(), close(), and seek() - # set through the __init__() method - # _nchannels -- the number of audio channels - # available through the getnchannels() method - # _nframes -- the number of audio frames - # available through the getnframes() method - # _sampwidth -- the number of bytes per audio sample - # available through the getsampwidth() method - # _framerate -- the sampling frequency - # available through the getframerate() method - # _comptype -- the AIFF-C compression type ('NONE' if AIFF) - # available through the getcomptype() method - # _compname -- the human-readable AIFF-C compression type - # available through the getcomptype() method - # _markers -- the marks in the audio file - # available through the getmarkers() and getmark() - # methods - # _soundpos -- the position in the audio stream - # available through the tell() method, set through the - # setpos() method - # - # These variables are used internally only: - # _version -- the AIFF-C version number - # _decomp -- the decompressor from builtin module cl - # _comm_chunk_read -- 1 iff the COMM chunk has been read - # _aifc -- 1 iff reading an AIFF-C file - # _ssnd_seek_needed -- 1 iff positioned correctly in audio - # file for readframes() - # _ssnd_chunk -- instantiation of a chunk class for the SSND chunk - # _framesize -- size of one frame in the file - - _file = None # Set here since __del__ checks it - - def initfp(self, file): - self._version = 0 - self._convert = None - self._markers = [] - self._soundpos = 0 - self._file = file - chunk = Chunk(file) - if chunk.getname() != b'FORM': - raise Error('file does not start with FORM id') - formdata = chunk.read(4) - if formdata == b'AIFF': - self._aifc = 0 - elif formdata == b'AIFC': - self._aifc = 1 - else: - raise Error('not an AIFF or AIFF-C file') - self._comm_chunk_read = 0 - self._ssnd_chunk = None - while 1: - self._ssnd_seek_needed = 1 - try: - chunk = Chunk(self._file) - except EOFError: - break - chunkname = chunk.getname() - if chunkname == b'COMM': - self._read_comm_chunk(chunk) - self._comm_chunk_read = 1 - elif chunkname == b'SSND': - self._ssnd_chunk = chunk - dummy = chunk.read(8) - self._ssnd_seek_needed = 0 - elif chunkname == b'FVER': - self._version = _read_ulong(chunk) - elif chunkname == b'MARK': - self._readmark(chunk) - chunk.skip() - if not self._comm_chunk_read or not self._ssnd_chunk: - raise Error('COMM chunk and/or SSND chunk missing') - - def __init__(self, f): - if isinstance(f, str): - file_object = builtins.open(f, 'rb') - try: - self.initfp(file_object) - except: - file_object.close() - raise - else: - # assume it is an open file object already - self.initfp(f) - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - # - # User visible methods. - # - def getfp(self): - return self._file - - def rewind(self): - self._ssnd_seek_needed = 1 - self._soundpos = 0 - - def close(self): - file = self._file - if file is not None: - self._file = None - file.close() - - def tell(self): - return self._soundpos - - def getnchannels(self): - return self._nchannels - - def getnframes(self): - return self._nframes - - def getsampwidth(self): - return self._sampwidth - - def getframerate(self): - return self._framerate - - def getcomptype(self): - return self._comptype - - def getcompname(self): - return self._compname - -## def getversion(self): -## return self._version - - def getparams(self): - return _aifc_params(self.getnchannels(), self.getsampwidth(), - self.getframerate(), self.getnframes(), - self.getcomptype(), self.getcompname()) - - def getmarkers(self): - if len(self._markers) == 0: - return None - return self._markers - - def getmark(self, id): - for marker in self._markers: - if id == marker[0]: - return marker - raise Error('marker {0!r} does not exist'.format(id)) - - def setpos(self, pos): - if pos < 0 or pos > self._nframes: - raise Error('position not in range') - self._soundpos = pos - self._ssnd_seek_needed = 1 - - def readframes(self, nframes): - if self._ssnd_seek_needed: - self._ssnd_chunk.seek(0) - dummy = self._ssnd_chunk.read(8) - pos = self._soundpos * self._framesize - if pos: - self._ssnd_chunk.seek(pos + 8) - self._ssnd_seek_needed = 0 - if nframes == 0: - return b'' - data = self._ssnd_chunk.read(nframes * self._framesize) - if self._convert and data: - data = self._convert(data) - self._soundpos = self._soundpos + len(data) // (self._nchannels - * self._sampwidth) - return data - - # - # Internal methods. - # - - def _alaw2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.alaw2lin(data, 2) - - def _ulaw2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.ulaw2lin(data, 2) - - def _adpcm2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - if not hasattr(self, '_adpcmstate'): - # first time - self._adpcmstate = None - data, self._adpcmstate = audioop.adpcm2lin(data, 2, self._adpcmstate) - return data - - def _sowt2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.byteswap(data, 2) - - def _read_comm_chunk(self, chunk): - self._nchannels = _read_short(chunk) - self._nframes = _read_long(chunk) - self._sampwidth = (_read_short(chunk) + 7) // 8 - self._framerate = int(_read_float(chunk)) - if self._sampwidth <= 0: - raise Error('bad sample width') - if self._nchannels <= 0: - raise Error('bad # of channels') - self._framesize = self._nchannels * self._sampwidth - if self._aifc: - #DEBUG: SGI's soundeditor produces a bad size :-( - kludge = 0 - if chunk.chunksize == 18: - kludge = 1 - warnings.warn('Warning: bad COMM chunk size') - chunk.chunksize = 23 - #DEBUG end - self._comptype = chunk.read(4) - #DEBUG start - if kludge: - length = ord(chunk.file.read(1)) - if length & 1 == 0: - length = length + 1 - chunk.chunksize = chunk.chunksize + length - chunk.file.seek(-1, 1) - #DEBUG end - self._compname = _read_string(chunk) - if self._comptype != b'NONE': - if self._comptype == b'G722': - self._convert = self._adpcm2lin - elif self._comptype in (b'ulaw', b'ULAW'): - self._convert = self._ulaw2lin - elif self._comptype in (b'alaw', b'ALAW'): - self._convert = self._alaw2lin - elif self._comptype in (b'sowt', b'SOWT'): - self._convert = self._sowt2lin - else: - raise Error('unsupported compression type') - self._sampwidth = 2 - else: - self._comptype = b'NONE' - self._compname = b'not compressed' - - def _readmark(self, chunk): - nmarkers = _read_short(chunk) - # Some files appear to contain invalid counts. - # Cope with this by testing for EOF. - try: - for i in range(nmarkers): - id = _read_short(chunk) - pos = _read_long(chunk) - name = _read_string(chunk) - if pos or name: - # some files appear to have - # dummy markers consisting of - # a position 0 and name '' - self._markers.append((id, pos, name)) - except EOFError: - w = ('Warning: MARK chunk contains only %s marker%s instead of %s' % - (len(self._markers), '' if len(self._markers) == 1 else 's', - nmarkers)) - warnings.warn(w) - -class Aifc_write: - # Variables used in this class: - # - # These variables are user settable through appropriate methods - # of this class: - # _file -- the open file with methods write(), close(), tell(), seek() - # set through the __init__() method - # _comptype -- the AIFF-C compression type ('NONE' in AIFF) - # set through the setcomptype() or setparams() method - # _compname -- the human-readable AIFF-C compression type - # set through the setcomptype() or setparams() method - # _nchannels -- the number of audio channels - # set through the setnchannels() or setparams() method - # _sampwidth -- the number of bytes per audio sample - # set through the setsampwidth() or setparams() method - # _framerate -- the sampling frequency - # set through the setframerate() or setparams() method - # _nframes -- the number of audio frames written to the header - # set through the setnframes() or setparams() method - # _aifc -- whether we're writing an AIFF-C file or an AIFF file - # set through the aifc() method, reset through the - # aiff() method - # - # These variables are used internally only: - # _version -- the AIFF-C version number - # _comp -- the compressor from builtin module cl - # _nframeswritten -- the number of audio frames actually written - # _datalength -- the size of the audio samples written to the header - # _datawritten -- the size of the audio samples actually written - - _file = None # Set here since __del__ checks it - - def __init__(self, f): - if isinstance(f, str): - file_object = builtins.open(f, 'wb') - try: - self.initfp(file_object) - except: - file_object.close() - raise - - # treat .aiff file extensions as non-compressed audio - if f.endswith('.aiff'): - self._aifc = 0 - else: - # assume it is an open file object already - self.initfp(f) - - def initfp(self, file): - self._file = file - self._version = _AIFC_version - self._comptype = b'NONE' - self._compname = b'not compressed' - self._convert = None - self._nchannels = 0 - self._sampwidth = 0 - self._framerate = 0 - self._nframes = 0 - self._nframeswritten = 0 - self._datawritten = 0 - self._datalength = 0 - self._markers = [] - self._marklength = 0 - self._aifc = 1 # AIFF-C is default - - def __del__(self): - self.close() - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - # - # User visible methods. - # - def aiff(self): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._aifc = 0 - - def aifc(self): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._aifc = 1 - - def setnchannels(self, nchannels): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if nchannels < 1: - raise Error('bad # of channels') - self._nchannels = nchannels - - def getnchannels(self): - if not self._nchannels: - raise Error('number of channels not set') - return self._nchannels - - def setsampwidth(self, sampwidth): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if sampwidth < 1 or sampwidth > 4: - raise Error('bad sample width') - self._sampwidth = sampwidth - - def getsampwidth(self): - if not self._sampwidth: - raise Error('sample width not set') - return self._sampwidth - - def setframerate(self, framerate): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if framerate <= 0: - raise Error('bad frame rate') - self._framerate = framerate - - def getframerate(self): - if not self._framerate: - raise Error('frame rate not set') - return self._framerate - - def setnframes(self, nframes): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._nframes = nframes - - def getnframes(self): - return self._nframeswritten - - def setcomptype(self, comptype, compname): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if comptype not in (b'NONE', b'ulaw', b'ULAW', - b'alaw', b'ALAW', b'G722', b'sowt', b'SOWT'): - raise Error('unsupported compression type') - self._comptype = comptype - self._compname = compname - - def getcomptype(self): - return self._comptype - - def getcompname(self): - return self._compname - -## def setversion(self, version): -## if self._nframeswritten: -## raise Error, 'cannot change parameters after starting to write' -## self._version = version - - def setparams(self, params): - nchannels, sampwidth, framerate, nframes, comptype, compname = params - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if comptype not in (b'NONE', b'ulaw', b'ULAW', - b'alaw', b'ALAW', b'G722', b'sowt', b'SOWT'): - raise Error('unsupported compression type') - self.setnchannels(nchannels) - self.setsampwidth(sampwidth) - self.setframerate(framerate) - self.setnframes(nframes) - self.setcomptype(comptype, compname) - - def getparams(self): - if not self._nchannels or not self._sampwidth or not self._framerate: - raise Error('not all parameters set') - return _aifc_params(self._nchannels, self._sampwidth, self._framerate, - self._nframes, self._comptype, self._compname) - - def setmark(self, id, pos, name): - if id <= 0: - raise Error('marker ID must be > 0') - if pos < 0: - raise Error('marker position must be >= 0') - if not isinstance(name, bytes): - raise Error('marker name must be bytes') - for i in range(len(self._markers)): - if id == self._markers[i][0]: - self._markers[i] = id, pos, name - return - self._markers.append((id, pos, name)) - - def getmark(self, id): - for marker in self._markers: - if id == marker[0]: - return marker - raise Error('marker {0!r} does not exist'.format(id)) - - def getmarkers(self): - if len(self._markers) == 0: - return None - return self._markers - - def tell(self): - return self._nframeswritten - - def writeframesraw(self, data): - if not isinstance(data, (bytes, bytearray)): - data = memoryview(data).cast('B') - self._ensure_header_written(len(data)) - nframes = len(data) // (self._sampwidth * self._nchannels) - if self._convert: - data = self._convert(data) - self._file.write(data) - self._nframeswritten = self._nframeswritten + nframes - self._datawritten = self._datawritten + len(data) - - def writeframes(self, data): - self.writeframesraw(data) - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten: - self._patchheader() - - def close(self): - if self._file is None: - return - try: - self._ensure_header_written(0) - if self._datawritten & 1: - # quick pad to even size - self._file.write(b'\x00') - self._datawritten = self._datawritten + 1 - self._writemarkers() - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten or \ - self._marklength: - self._patchheader() - finally: - # Prevent ref cycles - self._convert = None - f = self._file - self._file = None - f.close() - - # - # Internal methods. - # - - def _lin2alaw(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.lin2alaw(data, 2) - - def _lin2ulaw(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.lin2ulaw(data, 2) - - def _lin2adpcm(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - if not hasattr(self, '_adpcmstate'): - self._adpcmstate = None - data, self._adpcmstate = audioop.lin2adpcm(data, 2, self._adpcmstate) - return data - - def _lin2sowt(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.byteswap(data, 2) - - def _ensure_header_written(self, datasize): - if not self._nframeswritten: - if self._comptype in (b'ULAW', b'ulaw', - b'ALAW', b'alaw', b'G722', - b'sowt', b'SOWT'): - if not self._sampwidth: - self._sampwidth = 2 - if self._sampwidth != 2: - raise Error('sample width must be 2 when compressing ' - 'with ulaw/ULAW, alaw/ALAW, sowt/SOWT ' - 'or G7.22 (ADPCM)') - if not self._nchannels: - raise Error('# channels not specified') - if not self._sampwidth: - raise Error('sample width not specified') - if not self._framerate: - raise Error('sampling rate not specified') - self._write_header(datasize) - - def _init_compression(self): - if self._comptype == b'G722': - self._convert = self._lin2adpcm - elif self._comptype in (b'ulaw', b'ULAW'): - self._convert = self._lin2ulaw - elif self._comptype in (b'alaw', b'ALAW'): - self._convert = self._lin2alaw - elif self._comptype in (b'sowt', b'SOWT'): - self._convert = self._lin2sowt - - def _write_header(self, initlength): - if self._aifc and self._comptype != b'NONE': - self._init_compression() - self._file.write(b'FORM') - if not self._nframes: - self._nframes = initlength // (self._nchannels * self._sampwidth) - self._datalength = self._nframes * self._nchannels * self._sampwidth - if self._datalength & 1: - self._datalength = self._datalength + 1 - if self._aifc: - if self._comptype in (b'ulaw', b'ULAW', b'alaw', b'ALAW'): - self._datalength = self._datalength // 2 - if self._datalength & 1: - self._datalength = self._datalength + 1 - elif self._comptype == b'G722': - self._datalength = (self._datalength + 3) // 4 - if self._datalength & 1: - self._datalength = self._datalength + 1 - try: - self._form_length_pos = self._file.tell() - except (AttributeError, OSError): - self._form_length_pos = None - commlength = self._write_form_length(self._datalength) - if self._aifc: - self._file.write(b'AIFC') - self._file.write(b'FVER') - _write_ulong(self._file, 4) - _write_ulong(self._file, self._version) - else: - self._file.write(b'AIFF') - self._file.write(b'COMM') - _write_ulong(self._file, commlength) - _write_short(self._file, self._nchannels) - if self._form_length_pos is not None: - self._nframes_pos = self._file.tell() - _write_ulong(self._file, self._nframes) - if self._comptype in (b'ULAW', b'ulaw', b'ALAW', b'alaw', b'G722'): - _write_short(self._file, 8) - else: - _write_short(self._file, self._sampwidth * 8) - _write_float(self._file, self._framerate) - if self._aifc: - self._file.write(self._comptype) - _write_string(self._file, self._compname) - self._file.write(b'SSND') - if self._form_length_pos is not None: - self._ssnd_length_pos = self._file.tell() - _write_ulong(self._file, self._datalength + 8) - _write_ulong(self._file, 0) - _write_ulong(self._file, 0) - - def _write_form_length(self, datalength): - if self._aifc: - commlength = 18 + 5 + len(self._compname) - if commlength & 1: - commlength = commlength + 1 - verslength = 12 - else: - commlength = 18 - verslength = 0 - _write_ulong(self._file, 4 + verslength + self._marklength + \ - 8 + commlength + 16 + datalength) - return commlength - - def _patchheader(self): - curpos = self._file.tell() - if self._datawritten & 1: - datalength = self._datawritten + 1 - self._file.write(b'\x00') - else: - datalength = self._datawritten - if datalength == self._datalength and \ - self._nframes == self._nframeswritten and \ - self._marklength == 0: - self._file.seek(curpos, 0) - return - self._file.seek(self._form_length_pos, 0) - dummy = self._write_form_length(datalength) - self._file.seek(self._nframes_pos, 0) - _write_ulong(self._file, self._nframeswritten) - self._file.seek(self._ssnd_length_pos, 0) - _write_ulong(self._file, datalength + 8) - self._file.seek(curpos, 0) - self._nframes = self._nframeswritten - self._datalength = datalength - - def _writemarkers(self): - if len(self._markers) == 0: - return - self._file.write(b'MARK') - length = 2 - for marker in self._markers: - id, pos, name = marker - length = length + len(name) + 1 + 6 - if len(name) & 1 == 0: - length = length + 1 - _write_ulong(self._file, length) - self._marklength = length + 8 - _write_short(self._file, len(self._markers)) - for marker in self._markers: - id, pos, name = marker - _write_short(self._file, id) - _write_ulong(self._file, pos) - _write_string(self._file, name) - -def open(f, mode=None): - if mode is None: - if hasattr(f, 'mode'): - mode = f.mode - else: - mode = 'rb' - if mode in ('r', 'rb'): - return Aifc_read(f) - elif mode in ('w', 'wb'): - return Aifc_write(f) - else: - raise Error("mode must be 'r', 'rb', 'w', or 'wb'") - - -if __name__ == '__main__': - import sys - if not sys.argv[1:]: - sys.argv.append('/usr/demos/data/audio/bach.aiff') - fn = sys.argv[1] - with open(fn, 'r') as f: - print("Reading", fn) - print("nchannels =", f.getnchannels()) - print("nframes =", f.getnframes()) - print("sampwidth =", f.getsampwidth()) - print("framerate =", f.getframerate()) - print("comptype =", f.getcomptype()) - print("compname =", f.getcompname()) - if sys.argv[2:]: - gn = sys.argv[2] - print("Writing", gn) - with open(gn, 'w') as g: - g.setparams(f.getparams()) - while 1: - data = f.readframes(1024) - if not data: - break - g.writeframes(data) - print("Done.") diff --git a/Lib/imghdr.py b/Lib/imghdr.py deleted file mode 100644 index 6a372e66c7..0000000000 --- a/Lib/imghdr.py +++ /dev/null @@ -1,175 +0,0 @@ -"""Recognize image file formats based on their first few bytes.""" - -from os import PathLike -import warnings - -__all__ = ["what"] - - -warnings._deprecated(__name__, remove=(3, 13)) - - -#-------------------------# -# Recognize image headers # -#-------------------------# - -def what(file, h=None): - f = None - try: - if h is None: - if isinstance(file, (str, PathLike)): - f = open(file, 'rb') - h = f.read(32) - else: - location = file.tell() - h = file.read(32) - file.seek(location) - for tf in tests: - res = tf(h, f) - if res: - return res - finally: - if f: f.close() - return None - - -#---------------------------------# -# Subroutines per image file type # -#---------------------------------# - -tests = [] - -def test_jpeg(h, f): - """JPEG data with JFIF or Exif markers; and raw JPEG""" - if h[6:10] in (b'JFIF', b'Exif'): - return 'jpeg' - elif h[:4] == b'\xff\xd8\xff\xdb': - return 'jpeg' - -tests.append(test_jpeg) - -def test_png(h, f): - if h.startswith(b'\211PNG\r\n\032\n'): - return 'png' - -tests.append(test_png) - -def test_gif(h, f): - """GIF ('87 and '89 variants)""" - if h[:6] in (b'GIF87a', b'GIF89a'): - return 'gif' - -tests.append(test_gif) - -def test_tiff(h, f): - """TIFF (can be in Motorola or Intel byte order)""" - if h[:2] in (b'MM', b'II'): - return 'tiff' - -tests.append(test_tiff) - -def test_rgb(h, f): - """SGI image library""" - if h.startswith(b'\001\332'): - return 'rgb' - -tests.append(test_rgb) - -def test_pbm(h, f): - """PBM (portable bitmap)""" - if len(h) >= 3 and \ - h[0] == ord(b'P') and h[1] in b'14' and h[2] in b' \t\n\r': - return 'pbm' - -tests.append(test_pbm) - -def test_pgm(h, f): - """PGM (portable graymap)""" - if len(h) >= 3 and \ - h[0] == ord(b'P') and h[1] in b'25' and h[2] in b' \t\n\r': - return 'pgm' - -tests.append(test_pgm) - -def test_ppm(h, f): - """PPM (portable pixmap)""" - if len(h) >= 3 and \ - h[0] == ord(b'P') and h[1] in b'36' and h[2] in b' \t\n\r': - return 'ppm' - -tests.append(test_ppm) - -def test_rast(h, f): - """Sun raster file""" - if h.startswith(b'\x59\xA6\x6A\x95'): - return 'rast' - -tests.append(test_rast) - -def test_xbm(h, f): - """X bitmap (X10 or X11)""" - if h.startswith(b'#define '): - return 'xbm' - -tests.append(test_xbm) - -def test_bmp(h, f): - if h.startswith(b'BM'): - return 'bmp' - -tests.append(test_bmp) - -def test_webp(h, f): - if h.startswith(b'RIFF') and h[8:12] == b'WEBP': - return 'webp' - -tests.append(test_webp) - -def test_exr(h, f): - if h.startswith(b'\x76\x2f\x31\x01'): - return 'exr' - -tests.append(test_exr) - -#--------------------# -# Small test program # -#--------------------# - -def test(): - import sys - recursive = 0 - if sys.argv[1:] and sys.argv[1] == '-r': - del sys.argv[1:2] - recursive = 1 - try: - if sys.argv[1:]: - testall(sys.argv[1:], recursive, 1) - else: - testall(['.'], recursive, 1) - except KeyboardInterrupt: - sys.stderr.write('\n[Interrupted]\n') - sys.exit(1) - -def testall(list, recursive, toplevel): - import sys - import os - for filename in list: - if os.path.isdir(filename): - print(filename + '/:', end=' ') - if recursive or toplevel: - print('recursing down:') - import glob - names = glob.glob(os.path.join(glob.escape(filename), '*')) - testall(names, recursive, 0) - else: - print('*** directory (use -r) ***') - else: - print(filename + ':', end=' ') - sys.stdout.flush() - try: - print(what(filename)) - except OSError: - print('*** not found ***') - -if __name__ == '__main__': - test() diff --git a/Lib/sndhdr.py b/Lib/sndhdr.py deleted file mode 100644 index 594353136f..0000000000 --- a/Lib/sndhdr.py +++ /dev/null @@ -1,257 +0,0 @@ -"""Routines to help recognizing sound files. - -Function whathdr() recognizes various types of sound file headers. -It understands almost all headers that SOX can decode. - -The return tuple contains the following items, in this order: -- file type (as SOX understands it) -- sampling rate (0 if unknown or hard to decode) -- number of channels (0 if unknown or hard to decode) -- number of frames in the file (-1 if unknown or hard to decode) -- number of bits/sample, or 'U' for U-LAW, or 'A' for A-LAW - -If the file doesn't have a recognizable type, it returns None. -If the file can't be opened, OSError is raised. - -To compute the total time, divide the number of frames by the -sampling rate (a frame contains a sample for each channel). - -Function what() calls whathdr(). (It used to also use some -heuristics for raw data, but this doesn't work very well.) - -Finally, the function test() is a simple main program that calls -what() for all files mentioned on the argument list. For directory -arguments it calls what() for all files in that directory. Default -argument is "." (testing all files in the current directory). The -option -r tells it to recurse down directories found inside -explicitly given directories. -""" - -# The file structure is top-down except that the test program and its -# subroutine come last. - -__all__ = ['what', 'whathdr'] - -from collections import namedtuple - -SndHeaders = namedtuple('SndHeaders', - 'filetype framerate nchannels nframes sampwidth') - -SndHeaders.filetype.__doc__ = ("""The value for type indicates the data type -and will be one of the strings 'aifc', 'aiff', 'au','hcom', -'sndr', 'sndt', 'voc', 'wav', '8svx', 'sb', 'ub', or 'ul'.""") -SndHeaders.framerate.__doc__ = ("""The sampling_rate will be either the actual -value or 0 if unknown or difficult to decode.""") -SndHeaders.nchannels.__doc__ = ("""The number of channels or 0 if it cannot be -determined or if the value is difficult to decode.""") -SndHeaders.nframes.__doc__ = ("""The value for frames will be either the number -of frames or -1.""") -SndHeaders.sampwidth.__doc__ = ("""Either the sample size in bits or -'A' for A-LAW or 'U' for u-LAW.""") - -def what(filename): - """Guess the type of a sound file.""" - res = whathdr(filename) - return res - - -def whathdr(filename): - """Recognize sound headers.""" - with open(filename, 'rb') as f: - h = f.read(512) - for tf in tests: - res = tf(h, f) - if res: - return SndHeaders(*res) - return None - - -#-----------------------------------# -# Subroutines per sound header type # -#-----------------------------------# - -tests = [] - -def test_aifc(h, f): - import aifc - if not h.startswith(b'FORM'): - return None - if h[8:12] == b'AIFC': - fmt = 'aifc' - elif h[8:12] == b'AIFF': - fmt = 'aiff' - else: - return None - f.seek(0) - try: - a = aifc.open(f, 'r') - except (EOFError, aifc.Error): - return None - return (fmt, a.getframerate(), a.getnchannels(), - a.getnframes(), 8 * a.getsampwidth()) - -tests.append(test_aifc) - - -def test_au(h, f): - if h.startswith(b'.snd'): - func = get_long_be - elif h[:4] in (b'\0ds.', b'dns.'): - func = get_long_le - else: - return None - filetype = 'au' - hdr_size = func(h[4:8]) - data_size = func(h[8:12]) - encoding = func(h[12:16]) - rate = func(h[16:20]) - nchannels = func(h[20:24]) - sample_size = 1 # default - if encoding == 1: - sample_bits = 'U' - elif encoding == 2: - sample_bits = 8 - elif encoding == 3: - sample_bits = 16 - sample_size = 2 - else: - sample_bits = '?' - frame_size = sample_size * nchannels - if frame_size: - nframe = data_size / frame_size - else: - nframe = -1 - return filetype, rate, nchannels, nframe, sample_bits - -tests.append(test_au) - - -def test_hcom(h, f): - if h[65:69] != b'FSSD' or h[128:132] != b'HCOM': - return None - divisor = get_long_be(h[144:148]) - if divisor: - rate = 22050 / divisor - else: - rate = 0 - return 'hcom', rate, 1, -1, 8 - -tests.append(test_hcom) - - -def test_voc(h, f): - if not h.startswith(b'Creative Voice File\032'): - return None - sbseek = get_short_le(h[20:22]) - rate = 0 - if 0 <= sbseek < 500 and h[sbseek] == 1: - ratecode = 256 - h[sbseek+4] - if ratecode: - rate = int(1000000.0 / ratecode) - return 'voc', rate, 1, -1, 8 - -tests.append(test_voc) - - -def test_wav(h, f): - import wave - # 'RIFF' 'WAVE' 'fmt ' - if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ': - return None - f.seek(0) - try: - w = wave.open(f, 'r') - except (EOFError, wave.Error): - return None - return ('wav', w.getframerate(), w.getnchannels(), - w.getnframes(), 8*w.getsampwidth()) - -tests.append(test_wav) - - -def test_8svx(h, f): - if not h.startswith(b'FORM') or h[8:12] != b'8SVX': - return None - # Should decode it to get #channels -- assume always 1 - return '8svx', 0, 1, 0, 8 - -tests.append(test_8svx) - - -def test_sndt(h, f): - if h.startswith(b'SOUND'): - nsamples = get_long_le(h[8:12]) - rate = get_short_le(h[20:22]) - return 'sndt', rate, 1, nsamples, 8 - -tests.append(test_sndt) - - -def test_sndr(h, f): - if h.startswith(b'\0\0'): - rate = get_short_le(h[2:4]) - if 4000 <= rate <= 25000: - return 'sndr', rate, 1, -1, 8 - -tests.append(test_sndr) - - -#-------------------------------------------# -# Subroutines to extract numbers from bytes # -#-------------------------------------------# - -def get_long_be(b): - return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3] - -def get_long_le(b): - return (b[3] << 24) | (b[2] << 16) | (b[1] << 8) | b[0] - -def get_short_be(b): - return (b[0] << 8) | b[1] - -def get_short_le(b): - return (b[1] << 8) | b[0] - - -#--------------------# -# Small test program # -#--------------------# - -def test(): - import sys - recursive = 0 - if sys.argv[1:] and sys.argv[1] == '-r': - del sys.argv[1:2] - recursive = 1 - try: - if sys.argv[1:]: - testall(sys.argv[1:], recursive, 1) - else: - testall(['.'], recursive, 1) - except KeyboardInterrupt: - sys.stderr.write('\n[Interrupted]\n') - sys.exit(1) - -def testall(list, recursive, toplevel): - import sys - import os - for filename in list: - if os.path.isdir(filename): - print(filename + '/:', end=' ') - if recursive or toplevel: - print('recursing down:') - import glob - names = glob.glob(os.path.join(filename, '*')) - testall(names, recursive, 0) - else: - print('*** directory (use -r) ***') - else: - print(filename + ':', end=' ') - sys.stdout.flush() - try: - print(what(filename)) - except OSError: - print('*** not found ***') - -if __name__ == '__main__': - test() diff --git a/Lib/sunau.py b/Lib/sunau.py deleted file mode 100644 index 129502b0b4..0000000000 --- a/Lib/sunau.py +++ /dev/null @@ -1,531 +0,0 @@ -"""Stuff to parse Sun and NeXT audio files. - -An audio file consists of a header followed by the data. The structure -of the header is as follows. - - +---------------+ - | magic word | - +---------------+ - | header size | - +---------------+ - | data size | - +---------------+ - | encoding | - +---------------+ - | sample rate | - +---------------+ - | # of channels | - +---------------+ - | info | - | | - +---------------+ - -The magic word consists of the 4 characters '.snd'. Apart from the -info field, all header fields are 4 bytes in size. They are all -32-bit unsigned integers encoded in big-endian byte order. - -The header size really gives the start of the data. -The data size is the physical size of the data. From the other -parameters the number of frames can be calculated. -The encoding gives the way in which audio samples are encoded. -Possible values are listed below. -The info field currently consists of an ASCII string giving a -human-readable description of the audio file. The info field is -padded with NUL bytes to the header size. - -Usage. - -Reading audio files: - f = sunau.open(file, 'r') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods read(), seek(), and close(). -When the setpos() and rewind() methods are not used, the seek() -method is not necessary. - -This returns an instance of a class with the following public methods: - getnchannels() -- returns number of audio channels (1 for - mono, 2 for stereo) - getsampwidth() -- returns sample width in bytes - getframerate() -- returns sampling frequency - getnframes() -- returns number of audio frames - getcomptype() -- returns compression type ('NONE' or 'ULAW') - getcompname() -- returns human-readable version of - compression type ('not compressed' matches 'NONE') - getparams() -- returns a namedtuple consisting of all of the - above in the above order - getmarkers() -- returns None (for compatibility with the - aifc module) - getmark(id) -- raises an error since the mark does not - exist (for compatibility with the aifc module) - readframes(n) -- returns at most n frames of audio - rewind() -- rewind to the beginning of the audio stream - setpos(pos) -- seek to the specified position - tell() -- return the current position - close() -- close the instance (make it unusable) -The position returned by tell() and the position given to setpos() -are compatible and have nothing to do with the actual position in the -file. -The close() method is called automatically when the class instance -is destroyed. - -Writing audio files: - f = sunau.open(file, 'w') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods write(), tell(), seek(), and -close(). - -This returns an instance of a class with the following public methods: - setnchannels(n) -- set the number of channels - setsampwidth(n) -- set the sample width - setframerate(n) -- set the frame rate - setnframes(n) -- set the number of frames - setcomptype(type, name) - -- set the compression type and the - human-readable compression type - setparams(tuple)-- set all parameters at once - tell() -- return current position in output file - writeframesraw(data) - -- write audio frames without pathing up the - file header - writeframes(data) - -- write audio frames and patch up the file header - close() -- patch up the file header and close the - output file -You should set the parameters before the first writeframesraw or -writeframes. The total number of frames does not need to be set, -but when it is set to the correct value, the header does not have to -be patched up. -It is best to first set all parameters, perhaps possibly the -compression type, and then write audio frames using writeframesraw. -When all frames have been written, either call writeframes(b'') or -close() to patch up the sizes in the header. -The close() method is called automatically when the class instance -is destroyed. -""" - -from collections import namedtuple -import warnings - -_sunau_params = namedtuple('_sunau_params', - 'nchannels sampwidth framerate nframes comptype compname') - -# from -AUDIO_FILE_MAGIC = 0x2e736e64 -AUDIO_FILE_ENCODING_MULAW_8 = 1 -AUDIO_FILE_ENCODING_LINEAR_8 = 2 -AUDIO_FILE_ENCODING_LINEAR_16 = 3 -AUDIO_FILE_ENCODING_LINEAR_24 = 4 -AUDIO_FILE_ENCODING_LINEAR_32 = 5 -AUDIO_FILE_ENCODING_FLOAT = 6 -AUDIO_FILE_ENCODING_DOUBLE = 7 -AUDIO_FILE_ENCODING_ADPCM_G721 = 23 -AUDIO_FILE_ENCODING_ADPCM_G722 = 24 -AUDIO_FILE_ENCODING_ADPCM_G723_3 = 25 -AUDIO_FILE_ENCODING_ADPCM_G723_5 = 26 -AUDIO_FILE_ENCODING_ALAW_8 = 27 - -# from -AUDIO_UNKNOWN_SIZE = 0xFFFFFFFF # ((unsigned)(~0)) - -_simple_encodings = [AUDIO_FILE_ENCODING_MULAW_8, - AUDIO_FILE_ENCODING_LINEAR_8, - AUDIO_FILE_ENCODING_LINEAR_16, - AUDIO_FILE_ENCODING_LINEAR_24, - AUDIO_FILE_ENCODING_LINEAR_32, - AUDIO_FILE_ENCODING_ALAW_8] - -class Error(Exception): - pass - -def _read_u32(file): - x = 0 - for i in range(4): - byte = file.read(1) - if not byte: - raise EOFError - x = x*256 + ord(byte) - return x - -def _write_u32(file, x): - data = [] - for i in range(4): - d, m = divmod(x, 256) - data.insert(0, int(m)) - x = d - file.write(bytes(data)) - -class Au_read: - - def __init__(self, f): - if type(f) == type(''): - import builtins - f = builtins.open(f, 'rb') - self._opened = True - else: - self._opened = False - self.initfp(f) - - def __del__(self): - if self._file: - self.close() - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - def initfp(self, file): - self._file = file - self._soundpos = 0 - magic = int(_read_u32(file)) - if magic != AUDIO_FILE_MAGIC: - raise Error('bad magic number') - self._hdr_size = int(_read_u32(file)) - if self._hdr_size < 24: - raise Error('header size too small') - if self._hdr_size > 100: - raise Error('header size ridiculously large') - self._data_size = _read_u32(file) - if self._data_size != AUDIO_UNKNOWN_SIZE: - self._data_size = int(self._data_size) - self._encoding = int(_read_u32(file)) - if self._encoding not in _simple_encodings: - raise Error('encoding not (yet) supported') - if self._encoding in (AUDIO_FILE_ENCODING_MULAW_8, - AUDIO_FILE_ENCODING_ALAW_8): - self._sampwidth = 2 - self._framesize = 1 - elif self._encoding == AUDIO_FILE_ENCODING_LINEAR_8: - self._framesize = self._sampwidth = 1 - elif self._encoding == AUDIO_FILE_ENCODING_LINEAR_16: - self._framesize = self._sampwidth = 2 - elif self._encoding == AUDIO_FILE_ENCODING_LINEAR_24: - self._framesize = self._sampwidth = 3 - elif self._encoding == AUDIO_FILE_ENCODING_LINEAR_32: - self._framesize = self._sampwidth = 4 - else: - raise Error('unknown encoding') - self._framerate = int(_read_u32(file)) - self._nchannels = int(_read_u32(file)) - if not self._nchannels: - raise Error('bad # of channels') - self._framesize = self._framesize * self._nchannels - if self._hdr_size > 24: - self._info = file.read(self._hdr_size - 24) - self._info, _, _ = self._info.partition(b'\0') - else: - self._info = b'' - try: - self._data_pos = file.tell() - except (AttributeError, OSError): - self._data_pos = None - - def getfp(self): - return self._file - - def getnchannels(self): - return self._nchannels - - def getsampwidth(self): - return self._sampwidth - - def getframerate(self): - return self._framerate - - def getnframes(self): - if self._data_size == AUDIO_UNKNOWN_SIZE: - return AUDIO_UNKNOWN_SIZE - if self._encoding in _simple_encodings: - return self._data_size // self._framesize - return 0 # XXX--must do some arithmetic here - - def getcomptype(self): - if self._encoding == AUDIO_FILE_ENCODING_MULAW_8: - return 'ULAW' - elif self._encoding == AUDIO_FILE_ENCODING_ALAW_8: - return 'ALAW' - else: - return 'NONE' - - def getcompname(self): - if self._encoding == AUDIO_FILE_ENCODING_MULAW_8: - return 'CCITT G.711 u-law' - elif self._encoding == AUDIO_FILE_ENCODING_ALAW_8: - return 'CCITT G.711 A-law' - else: - return 'not compressed' - - def getparams(self): - return _sunau_params(self.getnchannels(), self.getsampwidth(), - self.getframerate(), self.getnframes(), - self.getcomptype(), self.getcompname()) - - def getmarkers(self): - return None - - def getmark(self, id): - raise Error('no marks') - - def readframes(self, nframes): - if self._encoding in _simple_encodings: - if nframes == AUDIO_UNKNOWN_SIZE: - data = self._file.read() - else: - data = self._file.read(nframes * self._framesize) - self._soundpos += len(data) // self._framesize - if self._encoding == AUDIO_FILE_ENCODING_MULAW_8: - import audioop - data = audioop.ulaw2lin(data, self._sampwidth) - return data - return None # XXX--not implemented yet - - def rewind(self): - if self._data_pos is None: - raise OSError('cannot seek') - self._file.seek(self._data_pos) - self._soundpos = 0 - - def tell(self): - return self._soundpos - - def setpos(self, pos): - if pos < 0 or pos > self.getnframes(): - raise Error('position not in range') - if self._data_pos is None: - raise OSError('cannot seek') - self._file.seek(self._data_pos + pos * self._framesize) - self._soundpos = pos - - def close(self): - file = self._file - if file: - self._file = None - if self._opened: - file.close() - -class Au_write: - - def __init__(self, f): - if type(f) == type(''): - import builtins - f = builtins.open(f, 'wb') - self._opened = True - else: - self._opened = False - self.initfp(f) - - def __del__(self): - if self._file: - self.close() - self._file = None - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - def initfp(self, file): - self._file = file - self._framerate = 0 - self._nchannels = 0 - self._sampwidth = 0 - self._framesize = 0 - self._nframes = AUDIO_UNKNOWN_SIZE - self._nframeswritten = 0 - self._datawritten = 0 - self._datalength = 0 - self._info = b'' - self._comptype = 'ULAW' # default is U-law - - def setnchannels(self, nchannels): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if nchannels not in (1, 2, 4): - raise Error('only 1, 2, or 4 channels supported') - self._nchannels = nchannels - - def getnchannels(self): - if not self._nchannels: - raise Error('number of channels not set') - return self._nchannels - - def setsampwidth(self, sampwidth): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if sampwidth not in (1, 2, 3, 4): - raise Error('bad sample width') - self._sampwidth = sampwidth - - def getsampwidth(self): - if not self._framerate: - raise Error('sample width not specified') - return self._sampwidth - - def setframerate(self, framerate): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._framerate = framerate - - def getframerate(self): - if not self._framerate: - raise Error('frame rate not set') - return self._framerate - - def setnframes(self, nframes): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if nframes < 0: - raise Error('# of frames cannot be negative') - self._nframes = nframes - - def getnframes(self): - return self._nframeswritten - - def setcomptype(self, type, name): - if type in ('NONE', 'ULAW'): - self._comptype = type - else: - raise Error('unknown compression type') - - def getcomptype(self): - return self._comptype - - def getcompname(self): - if self._comptype == 'ULAW': - return 'CCITT G.711 u-law' - elif self._comptype == 'ALAW': - return 'CCITT G.711 A-law' - else: - return 'not compressed' - - def setparams(self, params): - nchannels, sampwidth, framerate, nframes, comptype, compname = params - self.setnchannels(nchannels) - self.setsampwidth(sampwidth) - self.setframerate(framerate) - self.setnframes(nframes) - self.setcomptype(comptype, compname) - - def getparams(self): - return _sunau_params(self.getnchannels(), self.getsampwidth(), - self.getframerate(), self.getnframes(), - self.getcomptype(), self.getcompname()) - - def tell(self): - return self._nframeswritten - - def writeframesraw(self, data): - if not isinstance(data, (bytes, bytearray)): - data = memoryview(data).cast('B') - self._ensure_header_written() - if self._comptype == 'ULAW': - import audioop - data = audioop.lin2ulaw(data, self._sampwidth) - nframes = len(data) // self._framesize - self._file.write(data) - self._nframeswritten = self._nframeswritten + nframes - self._datawritten = self._datawritten + len(data) - - def writeframes(self, data): - self.writeframesraw(data) - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten: - self._patchheader() - - def close(self): - if self._file: - try: - self._ensure_header_written() - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten: - self._patchheader() - self._file.flush() - finally: - file = self._file - self._file = None - if self._opened: - file.close() - - # - # private methods - # - - def _ensure_header_written(self): - if not self._nframeswritten: - if not self._nchannels: - raise Error('# of channels not specified') - if not self._sampwidth: - raise Error('sample width not specified') - if not self._framerate: - raise Error('frame rate not specified') - self._write_header() - - def _write_header(self): - if self._comptype == 'NONE': - if self._sampwidth == 1: - encoding = AUDIO_FILE_ENCODING_LINEAR_8 - self._framesize = 1 - elif self._sampwidth == 2: - encoding = AUDIO_FILE_ENCODING_LINEAR_16 - self._framesize = 2 - elif self._sampwidth == 3: - encoding = AUDIO_FILE_ENCODING_LINEAR_24 - self._framesize = 3 - elif self._sampwidth == 4: - encoding = AUDIO_FILE_ENCODING_LINEAR_32 - self._framesize = 4 - else: - raise Error('internal error') - elif self._comptype == 'ULAW': - encoding = AUDIO_FILE_ENCODING_MULAW_8 - self._framesize = 1 - else: - raise Error('internal error') - self._framesize = self._framesize * self._nchannels - _write_u32(self._file, AUDIO_FILE_MAGIC) - header_size = 25 + len(self._info) - header_size = (header_size + 7) & ~7 - _write_u32(self._file, header_size) - if self._nframes == AUDIO_UNKNOWN_SIZE: - length = AUDIO_UNKNOWN_SIZE - else: - length = self._nframes * self._framesize - try: - self._form_length_pos = self._file.tell() - except (AttributeError, OSError): - self._form_length_pos = None - _write_u32(self._file, length) - self._datalength = length - _write_u32(self._file, encoding) - _write_u32(self._file, self._framerate) - _write_u32(self._file, self._nchannels) - self._file.write(self._info) - self._file.write(b'\0'*(header_size - len(self._info) - 24)) - - def _patchheader(self): - if self._form_length_pos is None: - raise OSError('cannot seek') - self._file.seek(self._form_length_pos) - _write_u32(self._file, self._datawritten) - self._datalength = self._datawritten - self._file.seek(0, 2) - -def open(f, mode=None): - if mode is None: - if hasattr(f, 'mode'): - mode = f.mode - else: - mode = 'rb' - if mode in ('r', 'rb'): - return Au_read(f) - elif mode in ('w', 'wb'): - return Au_write(f) - else: - raise Error("mode must be 'r', 'rb', 'w', or 'wb'") - -def openfp(f, mode=None): - warnings.warn("sunau.openfp is deprecated since Python 3.7. " - "Use sunau.open instead.", DeprecationWarning, stacklevel=2) - return open(f, mode=mode) From fd4ad3e4d183b8d8faf73a34f5b13de1680ab58d Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Thu, 3 Apr 2025 11:25:41 -0700 Subject: [PATCH 006/126] Remove smtpd --- Lib/smtpd.py | 979 -------------------------------------- Lib/test/test_smtpd.py | 1018 ---------------------------------------- 2 files changed, 1997 deletions(-) delete mode 100755 Lib/smtpd.py delete mode 100644 Lib/test/test_smtpd.py diff --git a/Lib/smtpd.py b/Lib/smtpd.py deleted file mode 100755 index 963e0a7689..0000000000 --- a/Lib/smtpd.py +++ /dev/null @@ -1,979 +0,0 @@ -#! /usr/bin/env python3 -"""An RFC 5321 smtp proxy with optional RFC 1870 and RFC 6531 extensions. - -Usage: %(program)s [options] [localhost:localport [remotehost:remoteport]] - -Options: - - --nosetuid - -n - This program generally tries to setuid `nobody', unless this flag is - set. The setuid call will fail if this program is not run as root (in - which case, use this flag). - - --version - -V - Print the version number and exit. - - --class classname - -c classname - Use `classname' as the concrete SMTP proxy class. Uses `PureProxy' by - default. - - --size limit - -s limit - Restrict the total size of the incoming message to "limit" number of - bytes via the RFC 1870 SIZE extension. Defaults to 33554432 bytes. - - --smtputf8 - -u - Enable the SMTPUTF8 extension and behave as an RFC 6531 smtp proxy. - - --debug - -d - Turn on debugging prints. - - --help - -h - Print this message and exit. - -Version: %(__version__)s - -If localhost is not given then `localhost' is used, and if localport is not -given then 8025 is used. If remotehost is not given then `localhost' is used, -and if remoteport is not given, then 25 is used. -""" - -# Overview: -# -# This file implements the minimal SMTP protocol as defined in RFC 5321. It -# has a hierarchy of classes which implement the backend functionality for the -# smtpd. A number of classes are provided: -# -# SMTPServer - the base class for the backend. Raises NotImplementedError -# if you try to use it. -# -# DebuggingServer - simply prints each message it receives on stdout. -# -# PureProxy - Proxies all messages to a real smtpd which does final -# delivery. One known problem with this class is that it doesn't handle -# SMTP errors from the backend server at all. This should be fixed -# (contributions are welcome!). -# -# MailmanProxy - An experimental hack to work with GNU Mailman -# . Using this server as your real incoming smtpd, your -# mailhost will automatically recognize and accept mail destined to Mailman -# lists when those lists are created. Every message not destined for a list -# gets forwarded to a real backend smtpd, as with PureProxy. Again, errors -# are not handled correctly yet. -# -# -# Author: Barry Warsaw -# -# TODO: -# -# - support mailbox delivery -# - alias files -# - Handle more ESMTP extensions -# - handle error codes from the backend smtpd - -import sys -import os -import errno -import getopt -import time -import socket -import collections -from warnings import warn -from email._header_value_parser import get_addr_spec, get_angle_addr - -__all__ = [ - "SMTPChannel", "SMTPServer", "DebuggingServer", "PureProxy", - "MailmanProxy", -] - -warn( - 'The smtpd module is deprecated and unmaintained and will be removed ' - 'in Python 3.12. Please see aiosmtpd ' - '(https://aiosmtpd.readthedocs.io/) for the recommended replacement.', - DeprecationWarning, - stacklevel=2) - - -# These are imported after the above warning so that users get the correct -# deprecation warning. -import asyncore -import asynchat - - -program = sys.argv[0] -__version__ = 'Python SMTP proxy version 0.3' - - -class Devnull: - def write(self, msg): pass - def flush(self): pass - - -DEBUGSTREAM = Devnull() -NEWLINE = '\n' -COMMASPACE = ', ' -DATA_SIZE_DEFAULT = 33554432 - - -def usage(code, msg=''): - print(__doc__ % globals(), file=sys.stderr) - if msg: - print(msg, file=sys.stderr) - sys.exit(code) - - -class SMTPChannel(asynchat.async_chat): - COMMAND = 0 - DATA = 1 - - command_size_limit = 512 - command_size_limits = collections.defaultdict(lambda x=command_size_limit: x) - - @property - def max_command_size_limit(self): - try: - return max(self.command_size_limits.values()) - except ValueError: - return self.command_size_limit - - def __init__(self, server, conn, addr, data_size_limit=DATA_SIZE_DEFAULT, - map=None, enable_SMTPUTF8=False, decode_data=False): - asynchat.async_chat.__init__(self, conn, map=map) - self.smtp_server = server - self.conn = conn - self.addr = addr - self.data_size_limit = data_size_limit - self.enable_SMTPUTF8 = enable_SMTPUTF8 - self._decode_data = decode_data - if enable_SMTPUTF8 and decode_data: - raise ValueError("decode_data and enable_SMTPUTF8 cannot" - " be set to True at the same time") - if decode_data: - self._emptystring = '' - self._linesep = '\r\n' - self._dotsep = '.' - self._newline = NEWLINE - else: - self._emptystring = b'' - self._linesep = b'\r\n' - self._dotsep = ord(b'.') - self._newline = b'\n' - self._set_rset_state() - self.seen_greeting = '' - self.extended_smtp = False - self.command_size_limits.clear() - self.fqdn = socket.getfqdn() - try: - self.peer = conn.getpeername() - except OSError as err: - # a race condition may occur if the other end is closing - # before we can get the peername - self.close() - if err.errno != errno.ENOTCONN: - raise - return - print('Peer:', repr(self.peer), file=DEBUGSTREAM) - self.push('220 %s %s' % (self.fqdn, __version__)) - - def _set_post_data_state(self): - """Reset state variables to their post-DATA state.""" - self.smtp_state = self.COMMAND - self.mailfrom = None - self.rcpttos = [] - self.require_SMTPUTF8 = False - self.num_bytes = 0 - self.set_terminator(b'\r\n') - - def _set_rset_state(self): - """Reset all state variables except the greeting.""" - self._set_post_data_state() - self.received_data = '' - self.received_lines = [] - - - # properties for backwards-compatibility - @property - def __server(self): - warn("Access to __server attribute on SMTPChannel is deprecated, " - "use 'smtp_server' instead", DeprecationWarning, 2) - return self.smtp_server - @__server.setter - def __server(self, value): - warn("Setting __server attribute on SMTPChannel is deprecated, " - "set 'smtp_server' instead", DeprecationWarning, 2) - self.smtp_server = value - - @property - def __line(self): - warn("Access to __line attribute on SMTPChannel is deprecated, " - "use 'received_lines' instead", DeprecationWarning, 2) - return self.received_lines - @__line.setter - def __line(self, value): - warn("Setting __line attribute on SMTPChannel is deprecated, " - "set 'received_lines' instead", DeprecationWarning, 2) - self.received_lines = value - - @property - def __state(self): - warn("Access to __state attribute on SMTPChannel is deprecated, " - "use 'smtp_state' instead", DeprecationWarning, 2) - return self.smtp_state - @__state.setter - def __state(self, value): - warn("Setting __state attribute on SMTPChannel is deprecated, " - "set 'smtp_state' instead", DeprecationWarning, 2) - self.smtp_state = value - - @property - def __greeting(self): - warn("Access to __greeting attribute on SMTPChannel is deprecated, " - "use 'seen_greeting' instead", DeprecationWarning, 2) - return self.seen_greeting - @__greeting.setter - def __greeting(self, value): - warn("Setting __greeting attribute on SMTPChannel is deprecated, " - "set 'seen_greeting' instead", DeprecationWarning, 2) - self.seen_greeting = value - - @property - def __mailfrom(self): - warn("Access to __mailfrom attribute on SMTPChannel is deprecated, " - "use 'mailfrom' instead", DeprecationWarning, 2) - return self.mailfrom - @__mailfrom.setter - def __mailfrom(self, value): - warn("Setting __mailfrom attribute on SMTPChannel is deprecated, " - "set 'mailfrom' instead", DeprecationWarning, 2) - self.mailfrom = value - - @property - def __rcpttos(self): - warn("Access to __rcpttos attribute on SMTPChannel is deprecated, " - "use 'rcpttos' instead", DeprecationWarning, 2) - return self.rcpttos - @__rcpttos.setter - def __rcpttos(self, value): - warn("Setting __rcpttos attribute on SMTPChannel is deprecated, " - "set 'rcpttos' instead", DeprecationWarning, 2) - self.rcpttos = value - - @property - def __data(self): - warn("Access to __data attribute on SMTPChannel is deprecated, " - "use 'received_data' instead", DeprecationWarning, 2) - return self.received_data - @__data.setter - def __data(self, value): - warn("Setting __data attribute on SMTPChannel is deprecated, " - "set 'received_data' instead", DeprecationWarning, 2) - self.received_data = value - - @property - def __fqdn(self): - warn("Access to __fqdn attribute on SMTPChannel is deprecated, " - "use 'fqdn' instead", DeprecationWarning, 2) - return self.fqdn - @__fqdn.setter - def __fqdn(self, value): - warn("Setting __fqdn attribute on SMTPChannel is deprecated, " - "set 'fqdn' instead", DeprecationWarning, 2) - self.fqdn = value - - @property - def __peer(self): - warn("Access to __peer attribute on SMTPChannel is deprecated, " - "use 'peer' instead", DeprecationWarning, 2) - return self.peer - @__peer.setter - def __peer(self, value): - warn("Setting __peer attribute on SMTPChannel is deprecated, " - "set 'peer' instead", DeprecationWarning, 2) - self.peer = value - - @property - def __conn(self): - warn("Access to __conn attribute on SMTPChannel is deprecated, " - "use 'conn' instead", DeprecationWarning, 2) - return self.conn - @__conn.setter - def __conn(self, value): - warn("Setting __conn attribute on SMTPChannel is deprecated, " - "set 'conn' instead", DeprecationWarning, 2) - self.conn = value - - @property - def __addr(self): - warn("Access to __addr attribute on SMTPChannel is deprecated, " - "use 'addr' instead", DeprecationWarning, 2) - return self.addr - @__addr.setter - def __addr(self, value): - warn("Setting __addr attribute on SMTPChannel is deprecated, " - "set 'addr' instead", DeprecationWarning, 2) - self.addr = value - - # Overrides base class for convenience. - def push(self, msg): - asynchat.async_chat.push(self, bytes( - msg + '\r\n', 'utf-8' if self.require_SMTPUTF8 else 'ascii')) - - # Implementation of base class abstract method - def collect_incoming_data(self, data): - limit = None - if self.smtp_state == self.COMMAND: - limit = self.max_command_size_limit - elif self.smtp_state == self.DATA: - limit = self.data_size_limit - if limit and self.num_bytes > limit: - return - elif limit: - self.num_bytes += len(data) - if self._decode_data: - self.received_lines.append(str(data, 'utf-8')) - else: - self.received_lines.append(data) - - # Implementation of base class abstract method - def found_terminator(self): - line = self._emptystring.join(self.received_lines) - print('Data:', repr(line), file=DEBUGSTREAM) - self.received_lines = [] - if self.smtp_state == self.COMMAND: - sz, self.num_bytes = self.num_bytes, 0 - if not line: - self.push('500 Error: bad syntax') - return - if not self._decode_data: - line = str(line, 'utf-8') - i = line.find(' ') - if i < 0: - command = line.upper() - arg = None - else: - command = line[:i].upper() - arg = line[i+1:].strip() - max_sz = (self.command_size_limits[command] - if self.extended_smtp else self.command_size_limit) - if sz > max_sz: - self.push('500 Error: line too long') - return - method = getattr(self, 'smtp_' + command, None) - if not method: - self.push('500 Error: command "%s" not recognized' % command) - return - method(arg) - return - else: - if self.smtp_state != self.DATA: - self.push('451 Internal confusion') - self.num_bytes = 0 - return - if self.data_size_limit and self.num_bytes > self.data_size_limit: - self.push('552 Error: Too much mail data') - self.num_bytes = 0 - return - # Remove extraneous carriage returns and de-transparency according - # to RFC 5321, Section 4.5.2. - data = [] - for text in line.split(self._linesep): - if text and text[0] == self._dotsep: - data.append(text[1:]) - else: - data.append(text) - self.received_data = self._newline.join(data) - args = (self.peer, self.mailfrom, self.rcpttos, self.received_data) - kwargs = {} - if not self._decode_data: - kwargs = { - 'mail_options': self.mail_options, - 'rcpt_options': self.rcpt_options, - } - status = self.smtp_server.process_message(*args, **kwargs) - self._set_post_data_state() - if not status: - self.push('250 OK') - else: - self.push(status) - - # SMTP and ESMTP commands - def smtp_HELO(self, arg): - if not arg: - self.push('501 Syntax: HELO hostname') - return - # See issue #21783 for a discussion of this behavior. - if self.seen_greeting: - self.push('503 Duplicate HELO/EHLO') - return - self._set_rset_state() - self.seen_greeting = arg - self.push('250 %s' % self.fqdn) - - def smtp_EHLO(self, arg): - if not arg: - self.push('501 Syntax: EHLO hostname') - return - # See issue #21783 for a discussion of this behavior. - if self.seen_greeting: - self.push('503 Duplicate HELO/EHLO') - return - self._set_rset_state() - self.seen_greeting = arg - self.extended_smtp = True - self.push('250-%s' % self.fqdn) - if self.data_size_limit: - self.push('250-SIZE %s' % self.data_size_limit) - self.command_size_limits['MAIL'] += 26 - if not self._decode_data: - self.push('250-8BITMIME') - if self.enable_SMTPUTF8: - self.push('250-SMTPUTF8') - self.command_size_limits['MAIL'] += 10 - self.push('250 HELP') - - def smtp_NOOP(self, arg): - if arg: - self.push('501 Syntax: NOOP') - else: - self.push('250 OK') - - def smtp_QUIT(self, arg): - # args is ignored - self.push('221 Bye') - self.close_when_done() - - def _strip_command_keyword(self, keyword, arg): - keylen = len(keyword) - if arg[:keylen].upper() == keyword: - return arg[keylen:].strip() - return '' - - def _getaddr(self, arg): - if not arg: - return '', '' - if arg.lstrip().startswith('<'): - address, rest = get_angle_addr(arg) - else: - address, rest = get_addr_spec(arg) - if not address: - return address, rest - return address.addr_spec, rest - - def _getparams(self, params): - # Return params as dictionary. Return None if not all parameters - # appear to be syntactically valid according to RFC 1869. - result = {} - for param in params: - param, eq, value = param.partition('=') - if not param.isalnum() or eq and not value: - return None - result[param] = value if eq else True - return result - - def smtp_HELP(self, arg): - if arg: - extended = ' [SP ]' - lc_arg = arg.upper() - if lc_arg == 'EHLO': - self.push('250 Syntax: EHLO hostname') - elif lc_arg == 'HELO': - self.push('250 Syntax: HELO hostname') - elif lc_arg == 'MAIL': - msg = '250 Syntax: MAIL FROM:
' - if self.extended_smtp: - msg += extended - self.push(msg) - elif lc_arg == 'RCPT': - msg = '250 Syntax: RCPT TO:
' - if self.extended_smtp: - msg += extended - self.push(msg) - elif lc_arg == 'DATA': - self.push('250 Syntax: DATA') - elif lc_arg == 'RSET': - self.push('250 Syntax: RSET') - elif lc_arg == 'NOOP': - self.push('250 Syntax: NOOP') - elif lc_arg == 'QUIT': - self.push('250 Syntax: QUIT') - elif lc_arg == 'VRFY': - self.push('250 Syntax: VRFY
') - else: - self.push('501 Supported commands: EHLO HELO MAIL RCPT ' - 'DATA RSET NOOP QUIT VRFY') - else: - self.push('250 Supported commands: EHLO HELO MAIL RCPT DATA ' - 'RSET NOOP QUIT VRFY') - - def smtp_VRFY(self, arg): - if arg: - address, params = self._getaddr(arg) - if address: - self.push('252 Cannot VRFY user, but will accept message ' - 'and attempt delivery') - else: - self.push('502 Could not VRFY %s' % arg) - else: - self.push('501 Syntax: VRFY
') - - def smtp_MAIL(self, arg): - if not self.seen_greeting: - self.push('503 Error: send HELO first') - return - print('===> MAIL', arg, file=DEBUGSTREAM) - syntaxerr = '501 Syntax: MAIL FROM:
' - if self.extended_smtp: - syntaxerr += ' [SP ]' - if arg is None: - self.push(syntaxerr) - return - arg = self._strip_command_keyword('FROM:', arg) - address, params = self._getaddr(arg) - if not address: - self.push(syntaxerr) - return - if not self.extended_smtp and params: - self.push(syntaxerr) - return - if self.mailfrom: - self.push('503 Error: nested MAIL command') - return - self.mail_options = params.upper().split() - params = self._getparams(self.mail_options) - if params is None: - self.push(syntaxerr) - return - if not self._decode_data: - body = params.pop('BODY', '7BIT') - if body not in ['7BIT', '8BITMIME']: - self.push('501 Error: BODY can only be one of 7BIT, 8BITMIME') - return - if self.enable_SMTPUTF8: - smtputf8 = params.pop('SMTPUTF8', False) - if smtputf8 is True: - self.require_SMTPUTF8 = True - elif smtputf8 is not False: - self.push('501 Error: SMTPUTF8 takes no arguments') - return - size = params.pop('SIZE', None) - if size: - if not size.isdigit(): - self.push(syntaxerr) - return - elif self.data_size_limit and int(size) > self.data_size_limit: - self.push('552 Error: message size exceeds fixed maximum message size') - return - if len(params.keys()) > 0: - self.push('555 MAIL FROM parameters not recognized or not implemented') - return - self.mailfrom = address - print('sender:', self.mailfrom, file=DEBUGSTREAM) - self.push('250 OK') - - def smtp_RCPT(self, arg): - if not self.seen_greeting: - self.push('503 Error: send HELO first'); - return - print('===> RCPT', arg, file=DEBUGSTREAM) - if not self.mailfrom: - self.push('503 Error: need MAIL command') - return - syntaxerr = '501 Syntax: RCPT TO:
' - if self.extended_smtp: - syntaxerr += ' [SP ]' - if arg is None: - self.push(syntaxerr) - return - arg = self._strip_command_keyword('TO:', arg) - address, params = self._getaddr(arg) - if not address: - self.push(syntaxerr) - return - if not self.extended_smtp and params: - self.push(syntaxerr) - return - self.rcpt_options = params.upper().split() - params = self._getparams(self.rcpt_options) - if params is None: - self.push(syntaxerr) - return - # XXX currently there are no options we recognize. - if len(params.keys()) > 0: - self.push('555 RCPT TO parameters not recognized or not implemented') - return - self.rcpttos.append(address) - print('recips:', self.rcpttos, file=DEBUGSTREAM) - self.push('250 OK') - - def smtp_RSET(self, arg): - if arg: - self.push('501 Syntax: RSET') - return - self._set_rset_state() - self.push('250 OK') - - def smtp_DATA(self, arg): - if not self.seen_greeting: - self.push('503 Error: send HELO first'); - return - if not self.rcpttos: - self.push('503 Error: need RCPT command') - return - if arg: - self.push('501 Syntax: DATA') - return - self.smtp_state = self.DATA - self.set_terminator(b'\r\n.\r\n') - self.push('354 End data with .') - - # Commands that have not been implemented - def smtp_EXPN(self, arg): - self.push('502 EXPN not implemented') - - -class SMTPServer(asyncore.dispatcher): - # SMTPChannel class to use for managing client connections - channel_class = SMTPChannel - - def __init__(self, localaddr, remoteaddr, - data_size_limit=DATA_SIZE_DEFAULT, map=None, - enable_SMTPUTF8=False, decode_data=False): - self._localaddr = localaddr - self._remoteaddr = remoteaddr - self.data_size_limit = data_size_limit - self.enable_SMTPUTF8 = enable_SMTPUTF8 - self._decode_data = decode_data - if enable_SMTPUTF8 and decode_data: - raise ValueError("decode_data and enable_SMTPUTF8 cannot" - " be set to True at the same time") - asyncore.dispatcher.__init__(self, map=map) - try: - gai_results = socket.getaddrinfo(*localaddr, - type=socket.SOCK_STREAM) - self.create_socket(gai_results[0][0], gai_results[0][1]) - # try to re-use a server port if possible - self.set_reuse_addr() - self.bind(localaddr) - self.listen(5) - except: - self.close() - raise - else: - print('%s started at %s\n\tLocal addr: %s\n\tRemote addr:%s' % ( - self.__class__.__name__, time.ctime(time.time()), - localaddr, remoteaddr), file=DEBUGSTREAM) - - def handle_accepted(self, conn, addr): - print('Incoming connection from %s' % repr(addr), file=DEBUGSTREAM) - channel = self.channel_class(self, - conn, - addr, - self.data_size_limit, - self._map, - self.enable_SMTPUTF8, - self._decode_data) - - # API for "doing something useful with the message" - def process_message(self, peer, mailfrom, rcpttos, data, **kwargs): - """Override this abstract method to handle messages from the client. - - peer is a tuple containing (ipaddr, port) of the client that made the - socket connection to our smtp port. - - mailfrom is the raw address the client claims the message is coming - from. - - rcpttos is a list of raw addresses the client wishes to deliver the - message to. - - data is a string containing the entire full text of the message, - headers (if supplied) and all. It has been `de-transparencied' - according to RFC 821, Section 4.5.2. In other words, a line - containing a `.' followed by other text has had the leading dot - removed. - - kwargs is a dictionary containing additional information. It is - empty if decode_data=True was given as init parameter, otherwise - it will contain the following keys: - 'mail_options': list of parameters to the mail command. All - elements are uppercase strings. Example: - ['BODY=8BITMIME', 'SMTPUTF8']. - 'rcpt_options': same, for the rcpt command. - - This function should return None for a normal `250 Ok' response; - otherwise, it should return the desired response string in RFC 821 - format. - - """ - raise NotImplementedError - - -class DebuggingServer(SMTPServer): - - def _print_message_content(self, peer, data): - inheaders = 1 - lines = data.splitlines() - for line in lines: - # headers first - if inheaders and not line: - peerheader = 'X-Peer: ' + peer[0] - if not isinstance(data, str): - # decoded_data=false; make header match other binary output - peerheader = repr(peerheader.encode('utf-8')) - print(peerheader) - inheaders = 0 - if not isinstance(data, str): - # Avoid spurious 'str on bytes instance' warning. - line = repr(line) - print(line) - - def process_message(self, peer, mailfrom, rcpttos, data, **kwargs): - print('---------- MESSAGE FOLLOWS ----------') - if kwargs: - if kwargs.get('mail_options'): - print('mail options: %s' % kwargs['mail_options']) - if kwargs.get('rcpt_options'): - print('rcpt options: %s\n' % kwargs['rcpt_options']) - self._print_message_content(peer, data) - print('------------ END MESSAGE ------------') - - -class PureProxy(SMTPServer): - def __init__(self, *args, **kwargs): - if 'enable_SMTPUTF8' in kwargs and kwargs['enable_SMTPUTF8']: - raise ValueError("PureProxy does not support SMTPUTF8.") - super(PureProxy, self).__init__(*args, **kwargs) - - def process_message(self, peer, mailfrom, rcpttos, data): - lines = data.split('\n') - # Look for the last header - i = 0 - for line in lines: - if not line: - break - i += 1 - lines.insert(i, 'X-Peer: %s' % peer[0]) - data = NEWLINE.join(lines) - refused = self._deliver(mailfrom, rcpttos, data) - # TBD: what to do with refused addresses? - print('we got some refusals:', refused, file=DEBUGSTREAM) - - def _deliver(self, mailfrom, rcpttos, data): - import smtplib - refused = {} - try: - s = smtplib.SMTP() - s.connect(self._remoteaddr[0], self._remoteaddr[1]) - try: - refused = s.sendmail(mailfrom, rcpttos, data) - finally: - s.quit() - except smtplib.SMTPRecipientsRefused as e: - print('got SMTPRecipientsRefused', file=DEBUGSTREAM) - refused = e.recipients - except (OSError, smtplib.SMTPException) as e: - print('got', e.__class__, file=DEBUGSTREAM) - # All recipients were refused. If the exception had an associated - # error code, use it. Otherwise,fake it with a non-triggering - # exception code. - errcode = getattr(e, 'smtp_code', -1) - errmsg = getattr(e, 'smtp_error', 'ignore') - for r in rcpttos: - refused[r] = (errcode, errmsg) - return refused - - -class MailmanProxy(PureProxy): - def __init__(self, *args, **kwargs): - warn('MailmanProxy is deprecated and will be removed ' - 'in future', DeprecationWarning, 2) - if 'enable_SMTPUTF8' in kwargs and kwargs['enable_SMTPUTF8']: - raise ValueError("MailmanProxy does not support SMTPUTF8.") - super(PureProxy, self).__init__(*args, **kwargs) - - def process_message(self, peer, mailfrom, rcpttos, data): - from io import StringIO - from Mailman import Utils - from Mailman import Message - from Mailman import MailList - # If the message is to a Mailman mailing list, then we'll invoke the - # Mailman script directly, without going through the real smtpd. - # Otherwise we'll forward it to the local proxy for disposition. - listnames = [] - for rcpt in rcpttos: - local = rcpt.lower().split('@')[0] - # We allow the following variations on the theme - # listname - # listname-admin - # listname-owner - # listname-request - # listname-join - # listname-leave - parts = local.split('-') - if len(parts) > 2: - continue - listname = parts[0] - if len(parts) == 2: - command = parts[1] - else: - command = '' - if not Utils.list_exists(listname) or command not in ( - '', 'admin', 'owner', 'request', 'join', 'leave'): - continue - listnames.append((rcpt, listname, command)) - # Remove all list recipients from rcpttos and forward what we're not - # going to take care of ourselves. Linear removal should be fine - # since we don't expect a large number of recipients. - for rcpt, listname, command in listnames: - rcpttos.remove(rcpt) - # If there's any non-list destined recipients left, - print('forwarding recips:', ' '.join(rcpttos), file=DEBUGSTREAM) - if rcpttos: - refused = self._deliver(mailfrom, rcpttos, data) - # TBD: what to do with refused addresses? - print('we got refusals:', refused, file=DEBUGSTREAM) - # Now deliver directly to the list commands - mlists = {} - s = StringIO(data) - msg = Message.Message(s) - # These headers are required for the proper execution of Mailman. All - # MTAs in existence seem to add these if the original message doesn't - # have them. - if not msg.get('from'): - msg['From'] = mailfrom - if not msg.get('date'): - msg['Date'] = time.ctime(time.time()) - for rcpt, listname, command in listnames: - print('sending message to', rcpt, file=DEBUGSTREAM) - mlist = mlists.get(listname) - if not mlist: - mlist = MailList.MailList(listname, lock=0) - mlists[listname] = mlist - # dispatch on the type of command - if command == '': - # post - msg.Enqueue(mlist, tolist=1) - elif command == 'admin': - msg.Enqueue(mlist, toadmin=1) - elif command == 'owner': - msg.Enqueue(mlist, toowner=1) - elif command == 'request': - msg.Enqueue(mlist, torequest=1) - elif command in ('join', 'leave'): - # TBD: this is a hack! - if command == 'join': - msg['Subject'] = 'subscribe' - else: - msg['Subject'] = 'unsubscribe' - msg.Enqueue(mlist, torequest=1) - - -class Options: - setuid = True - classname = 'PureProxy' - size_limit = None - enable_SMTPUTF8 = False - - -def parseargs(): - global DEBUGSTREAM - try: - opts, args = getopt.getopt( - sys.argv[1:], 'nVhc:s:du', - ['class=', 'nosetuid', 'version', 'help', 'size=', 'debug', - 'smtputf8']) - except getopt.error as e: - usage(1, e) - - options = Options() - for opt, arg in opts: - if opt in ('-h', '--help'): - usage(0) - elif opt in ('-V', '--version'): - print(__version__) - sys.exit(0) - elif opt in ('-n', '--nosetuid'): - options.setuid = False - elif opt in ('-c', '--class'): - options.classname = arg - elif opt in ('-d', '--debug'): - DEBUGSTREAM = sys.stderr - elif opt in ('-u', '--smtputf8'): - options.enable_SMTPUTF8 = True - elif opt in ('-s', '--size'): - try: - int_size = int(arg) - options.size_limit = int_size - except: - print('Invalid size: ' + arg, file=sys.stderr) - sys.exit(1) - - # parse the rest of the arguments - if len(args) < 1: - localspec = 'localhost:8025' - remotespec = 'localhost:25' - elif len(args) < 2: - localspec = args[0] - remotespec = 'localhost:25' - elif len(args) < 3: - localspec = args[0] - remotespec = args[1] - else: - usage(1, 'Invalid arguments: %s' % COMMASPACE.join(args)) - - # split into host/port pairs - i = localspec.find(':') - if i < 0: - usage(1, 'Bad local spec: %s' % localspec) - options.localhost = localspec[:i] - try: - options.localport = int(localspec[i+1:]) - except ValueError: - usage(1, 'Bad local port: %s' % localspec) - i = remotespec.find(':') - if i < 0: - usage(1, 'Bad remote spec: %s' % remotespec) - options.remotehost = remotespec[:i] - try: - options.remoteport = int(remotespec[i+1:]) - except ValueError: - usage(1, 'Bad remote port: %s' % remotespec) - return options - - -if __name__ == '__main__': - options = parseargs() - # Become nobody - classname = options.classname - if "." in classname: - lastdot = classname.rfind(".") - mod = __import__(classname[:lastdot], globals(), locals(), [""]) - classname = classname[lastdot+1:] - else: - import __main__ as mod - class_ = getattr(mod, classname) - proxy = class_((options.localhost, options.localport), - (options.remotehost, options.remoteport), - options.size_limit, enable_SMTPUTF8=options.enable_SMTPUTF8) - if options.setuid: - try: - import pwd - except ImportError: - print('Cannot import module "pwd"; try running with -n option.', file=sys.stderr) - sys.exit(1) - nobody = pwd.getpwnam('nobody')[2] - try: - os.setuid(nobody) - except PermissionError: - print('Cannot setuid "nobody"; try running with -n option.', file=sys.stderr) - sys.exit(1) - try: - asyncore.loop() - except KeyboardInterrupt: - pass diff --git a/Lib/test/test_smtpd.py b/Lib/test/test_smtpd.py deleted file mode 100644 index d2e150d535..0000000000 --- a/Lib/test/test_smtpd.py +++ /dev/null @@ -1,1018 +0,0 @@ -import unittest -import textwrap -from test import support, mock_socket -from test.support import socket_helper -from test.support import warnings_helper -import socket -import io - -import warnings -with warnings.catch_warnings(): - warnings.simplefilter('ignore', DeprecationWarning) - import smtpd - import asyncore - - -class DummyServer(smtpd.SMTPServer): - def __init__(self, *args, **kwargs): - smtpd.SMTPServer.__init__(self, *args, **kwargs) - self.messages = [] - if self._decode_data: - self.return_status = 'return status' - else: - self.return_status = b'return status' - - def process_message(self, peer, mailfrom, rcpttos, data, **kw): - self.messages.append((peer, mailfrom, rcpttos, data)) - if data == self.return_status: - return '250 Okish' - if 'mail_options' in kw and 'SMTPUTF8' in kw['mail_options']: - return '250 SMTPUTF8 message okish' - - -class DummyDispatcherBroken(Exception): - pass - - -class BrokenDummyServer(DummyServer): - def listen(self, num): - raise DummyDispatcherBroken() - - -class SMTPDServerTest(unittest.TestCase): - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - - def test_process_message_unimplemented(self): - server = smtpd.SMTPServer((socket_helper.HOST, 0), ('b', 0), - decode_data=True) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr, decode_data=True) - - def write_line(line): - channel.socket.queue_recv(line) - channel.handle_read() - - write_line(b'HELO example') - write_line(b'MAIL From:eggs@example') - write_line(b'RCPT To:spam@example') - write_line(b'DATA') - self.assertRaises(NotImplementedError, write_line, b'spam\r\n.\r\n') - - def test_decode_data_and_enable_SMTPUTF8_raises(self): - self.assertRaises( - ValueError, - smtpd.SMTPServer, - (socket_helper.HOST, 0), - ('b', 0), - enable_SMTPUTF8=True, - decode_data=True) - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - - -class DebuggingServerTest(unittest.TestCase): - - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - - def send_data(self, channel, data, enable_SMTPUTF8=False): - def write_line(line): - channel.socket.queue_recv(line) - channel.handle_read() - write_line(b'EHLO example') - if enable_SMTPUTF8: - write_line(b'MAIL From:eggs@example BODY=8BITMIME SMTPUTF8') - else: - write_line(b'MAIL From:eggs@example') - write_line(b'RCPT To:spam@example') - write_line(b'DATA') - write_line(data) - write_line(b'.') - - def test_process_message_with_decode_data_true(self): - server = smtpd.DebuggingServer((socket_helper.HOST, 0), ('b', 0), - decode_data=True) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr, decode_data=True) - with support.captured_stdout() as s: - self.send_data(channel, b'From: test\n\nhello\n') - stdout = s.getvalue() - self.assertEqual(stdout, textwrap.dedent("""\ - ---------- MESSAGE FOLLOWS ---------- - From: test - X-Peer: peer-address - - hello - ------------ END MESSAGE ------------ - """)) - - def test_process_message_with_decode_data_false(self): - server = smtpd.DebuggingServer((socket_helper.HOST, 0), ('b', 0)) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr) - with support.captured_stdout() as s: - self.send_data(channel, b'From: test\n\nh\xc3\xa9llo\xff\n') - stdout = s.getvalue() - self.assertEqual(stdout, textwrap.dedent("""\ - ---------- MESSAGE FOLLOWS ---------- - b'From: test' - b'X-Peer: peer-address' - b'' - b'h\\xc3\\xa9llo\\xff' - ------------ END MESSAGE ------------ - """)) - - def test_process_message_with_enable_SMTPUTF8_true(self): - server = smtpd.DebuggingServer((socket_helper.HOST, 0), ('b', 0), - enable_SMTPUTF8=True) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr, enable_SMTPUTF8=True) - with support.captured_stdout() as s: - self.send_data(channel, b'From: test\n\nh\xc3\xa9llo\xff\n') - stdout = s.getvalue() - self.assertEqual(stdout, textwrap.dedent("""\ - ---------- MESSAGE FOLLOWS ---------- - b'From: test' - b'X-Peer: peer-address' - b'' - b'h\\xc3\\xa9llo\\xff' - ------------ END MESSAGE ------------ - """)) - - def test_process_SMTPUTF8_message_with_enable_SMTPUTF8_true(self): - server = smtpd.DebuggingServer((socket_helper.HOST, 0), ('b', 0), - enable_SMTPUTF8=True) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr, enable_SMTPUTF8=True) - with support.captured_stdout() as s: - self.send_data(channel, b'From: test\n\nh\xc3\xa9llo\xff\n', - enable_SMTPUTF8=True) - stdout = s.getvalue() - self.assertEqual(stdout, textwrap.dedent("""\ - ---------- MESSAGE FOLLOWS ---------- - mail options: ['BODY=8BITMIME', 'SMTPUTF8'] - b'From: test' - b'X-Peer: peer-address' - b'' - b'h\\xc3\\xa9llo\\xff' - ------------ END MESSAGE ------------ - """)) - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - - -class TestFamilyDetection(unittest.TestCase): - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - - @unittest.skipUnless(socket_helper.IPV6_ENABLED, "IPv6 not enabled") - def test_socket_uses_IPv6(self): - server = smtpd.SMTPServer((socket_helper.HOSTv6, 0), (socket_helper.HOSTv4, 0)) - self.assertEqual(server.socket.family, socket.AF_INET6) - - def test_socket_uses_IPv4(self): - server = smtpd.SMTPServer((socket_helper.HOSTv4, 0), (socket_helper.HOSTv6, 0)) - self.assertEqual(server.socket.family, socket.AF_INET) - - -class TestRcptOptionParsing(unittest.TestCase): - error_response = (b'555 RCPT TO parameters not recognized or not ' - b'implemented\r\n') - - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - self.old_debugstream = smtpd.DEBUGSTREAM - self.debug = smtpd.DEBUGSTREAM = io.StringIO() - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - smtpd.DEBUGSTREAM = self.old_debugstream - - def write_line(self, channel, line): - channel.socket.queue_recv(line) - channel.handle_read() - - def test_params_rejected(self): - server = DummyServer((socket_helper.HOST, 0), ('b', 0)) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr) - self.write_line(channel, b'EHLO example') - self.write_line(channel, b'MAIL from: size=20') - self.write_line(channel, b'RCPT to: foo=bar') - self.assertEqual(channel.socket.last, self.error_response) - - def test_nothing_accepted(self): - server = DummyServer((socket_helper.HOST, 0), ('b', 0)) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr) - self.write_line(channel, b'EHLO example') - self.write_line(channel, b'MAIL from: size=20') - self.write_line(channel, b'RCPT to: ') - self.assertEqual(channel.socket.last, b'250 OK\r\n') - - -class TestMailOptionParsing(unittest.TestCase): - error_response = (b'555 MAIL FROM parameters not recognized or not ' - b'implemented\r\n') - - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - self.old_debugstream = smtpd.DEBUGSTREAM - self.debug = smtpd.DEBUGSTREAM = io.StringIO() - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - smtpd.DEBUGSTREAM = self.old_debugstream - - def write_line(self, channel, line): - channel.socket.queue_recv(line) - channel.handle_read() - - def test_with_decode_data_true(self): - server = DummyServer((socket_helper.HOST, 0), ('b', 0), decode_data=True) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr, decode_data=True) - self.write_line(channel, b'EHLO example') - for line in [ - b'MAIL from: size=20 SMTPUTF8', - b'MAIL from: size=20 SMTPUTF8 BODY=8BITMIME', - b'MAIL from: size=20 BODY=UNKNOWN', - b'MAIL from: size=20 body=8bitmime', - ]: - self.write_line(channel, line) - self.assertEqual(channel.socket.last, self.error_response) - self.write_line(channel, b'MAIL from: size=20') - self.assertEqual(channel.socket.last, b'250 OK\r\n') - - def test_with_decode_data_false(self): - server = DummyServer((socket_helper.HOST, 0), ('b', 0)) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr) - self.write_line(channel, b'EHLO example') - for line in [ - b'MAIL from: size=20 SMTPUTF8', - b'MAIL from: size=20 SMTPUTF8 BODY=8BITMIME', - ]: - self.write_line(channel, line) - self.assertEqual(channel.socket.last, self.error_response) - self.write_line( - channel, - b'MAIL from: size=20 SMTPUTF8 BODY=UNKNOWN') - self.assertEqual( - channel.socket.last, - b'501 Error: BODY can only be one of 7BIT, 8BITMIME\r\n') - self.write_line( - channel, b'MAIL from: size=20 body=8bitmime') - self.assertEqual(channel.socket.last, b'250 OK\r\n') - - def test_with_enable_smtputf8_true(self): - server = DummyServer((socket_helper.HOST, 0), ('b', 0), enable_SMTPUTF8=True) - conn, addr = server.accept() - channel = smtpd.SMTPChannel(server, conn, addr, enable_SMTPUTF8=True) - self.write_line(channel, b'EHLO example') - self.write_line( - channel, - b'MAIL from: size=20 body=8bitmime smtputf8') - self.assertEqual(channel.socket.last, b'250 OK\r\n') - - -class SMTPDChannelTest(unittest.TestCase): - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - self.old_debugstream = smtpd.DEBUGSTREAM - self.debug = smtpd.DEBUGSTREAM = io.StringIO() - self.server = DummyServer((socket_helper.HOST, 0), ('b', 0), - decode_data=True) - conn, addr = self.server.accept() - self.channel = smtpd.SMTPChannel(self.server, conn, addr, - decode_data=True) - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - smtpd.DEBUGSTREAM = self.old_debugstream - - def write_line(self, line): - self.channel.socket.queue_recv(line) - self.channel.handle_read() - - def test_broken_connect(self): - self.assertRaises( - DummyDispatcherBroken, BrokenDummyServer, - (socket_helper.HOST, 0), ('b', 0), decode_data=True) - - def test_decode_data_and_enable_SMTPUTF8_raises(self): - self.assertRaises( - ValueError, smtpd.SMTPChannel, - self.server, self.channel.conn, self.channel.addr, - enable_SMTPUTF8=True, decode_data=True) - - def test_server_accept(self): - self.server.handle_accept() - - def test_missing_data(self): - self.write_line(b'') - self.assertEqual(self.channel.socket.last, - b'500 Error: bad syntax\r\n') - - def test_EHLO(self): - self.write_line(b'EHLO example') - self.assertEqual(self.channel.socket.last, b'250 HELP\r\n') - - def test_EHLO_bad_syntax(self): - self.write_line(b'EHLO') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: EHLO hostname\r\n') - - def test_EHLO_duplicate(self): - self.write_line(b'EHLO example') - self.write_line(b'EHLO example') - self.assertEqual(self.channel.socket.last, - b'503 Duplicate HELO/EHLO\r\n') - - def test_EHLO_HELO_duplicate(self): - self.write_line(b'EHLO example') - self.write_line(b'HELO example') - self.assertEqual(self.channel.socket.last, - b'503 Duplicate HELO/EHLO\r\n') - - def test_HELO(self): - name = smtpd.socket.getfqdn() - self.write_line(b'HELO example') - self.assertEqual(self.channel.socket.last, - '250 {}\r\n'.format(name).encode('ascii')) - - def test_HELO_EHLO_duplicate(self): - self.write_line(b'HELO example') - self.write_line(b'EHLO example') - self.assertEqual(self.channel.socket.last, - b'503 Duplicate HELO/EHLO\r\n') - - def test_HELP(self): - self.write_line(b'HELP') - self.assertEqual(self.channel.socket.last, - b'250 Supported commands: EHLO HELO MAIL RCPT ' + \ - b'DATA RSET NOOP QUIT VRFY\r\n') - - def test_HELP_command(self): - self.write_line(b'HELP MAIL') - self.assertEqual(self.channel.socket.last, - b'250 Syntax: MAIL FROM:
\r\n') - - def test_HELP_command_unknown(self): - self.write_line(b'HELP SPAM') - self.assertEqual(self.channel.socket.last, - b'501 Supported commands: EHLO HELO MAIL RCPT ' + \ - b'DATA RSET NOOP QUIT VRFY\r\n') - - def test_HELO_bad_syntax(self): - self.write_line(b'HELO') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: HELO hostname\r\n') - - def test_HELO_duplicate(self): - self.write_line(b'HELO example') - self.write_line(b'HELO example') - self.assertEqual(self.channel.socket.last, - b'503 Duplicate HELO/EHLO\r\n') - - def test_HELO_parameter_rejected_when_extensions_not_enabled(self): - self.extended_smtp = False - self.write_line(b'HELO example') - self.write_line(b'MAIL from: SIZE=1234') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: MAIL FROM:
\r\n') - - def test_MAIL_allows_space_after_colon(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL from: ') - self.assertEqual(self.channel.socket.last, - b'250 OK\r\n') - - def test_extended_MAIL_allows_space_after_colon(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL from: size=20') - self.assertEqual(self.channel.socket.last, - b'250 OK\r\n') - - def test_NOOP(self): - self.write_line(b'NOOP') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - def test_HELO_NOOP(self): - self.write_line(b'HELO example') - self.write_line(b'NOOP') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - def test_NOOP_bad_syntax(self): - self.write_line(b'NOOP hi') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: NOOP\r\n') - - def test_QUIT(self): - self.write_line(b'QUIT') - self.assertEqual(self.channel.socket.last, b'221 Bye\r\n') - - def test_HELO_QUIT(self): - self.write_line(b'HELO example') - self.write_line(b'QUIT') - self.assertEqual(self.channel.socket.last, b'221 Bye\r\n') - - def test_QUIT_arg_ignored(self): - self.write_line(b'QUIT bye bye') - self.assertEqual(self.channel.socket.last, b'221 Bye\r\n') - - def test_bad_state(self): - self.channel.smtp_state = 'BAD STATE' - self.write_line(b'HELO example') - self.assertEqual(self.channel.socket.last, - b'451 Internal confusion\r\n') - - def test_command_too_long(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL from: ' + - b'a' * self.channel.command_size_limit + - b'@example') - self.assertEqual(self.channel.socket.last, - b'500 Error: line too long\r\n') - - def test_MAIL_command_limit_extended_with_SIZE(self): - self.write_line(b'EHLO example') - fill_len = self.channel.command_size_limit - len('MAIL from:<@example>') - self.write_line(b'MAIL from:<' + - b'a' * fill_len + - b'@example> SIZE=1234') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - self.write_line(b'MAIL from:<' + - b'a' * (fill_len + 26) + - b'@example> SIZE=1234') - self.assertEqual(self.channel.socket.last, - b'500 Error: line too long\r\n') - - def test_MAIL_command_rejects_SMTPUTF8_by_default(self): - self.write_line(b'EHLO example') - self.write_line( - b'MAIL from: BODY=8BITMIME SMTPUTF8') - self.assertEqual(self.channel.socket.last[0:1], b'5') - - def test_data_longer_than_default_data_size_limit(self): - # Hack the default so we don't have to generate so much data. - self.channel.data_size_limit = 1048 - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'DATA') - self.write_line(b'A' * self.channel.data_size_limit + - b'A\r\n.') - self.assertEqual(self.channel.socket.last, - b'552 Error: Too much mail data\r\n') - - def test_MAIL_size_parameter(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL FROM: SIZE=512') - self.assertEqual(self.channel.socket.last, - b'250 OK\r\n') - - def test_MAIL_invalid_size_parameter(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL FROM: SIZE=invalid') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: MAIL FROM:
[SP ]\r\n') - - def test_MAIL_RCPT_unknown_parameters(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL FROM: ham=green') - self.assertEqual(self.channel.socket.last, - b'555 MAIL FROM parameters not recognized or not implemented\r\n') - - self.write_line(b'MAIL FROM:') - self.write_line(b'RCPT TO: ham=green') - self.assertEqual(self.channel.socket.last, - b'555 RCPT TO parameters not recognized or not implemented\r\n') - - def test_MAIL_size_parameter_larger_than_default_data_size_limit(self): - self.channel.data_size_limit = 1048 - self.write_line(b'EHLO example') - self.write_line(b'MAIL FROM: SIZE=2096') - self.assertEqual(self.channel.socket.last, - b'552 Error: message size exceeds fixed maximum message size\r\n') - - def test_need_MAIL(self): - self.write_line(b'HELO example') - self.write_line(b'RCPT to:spam@example') - self.assertEqual(self.channel.socket.last, - b'503 Error: need MAIL command\r\n') - - def test_MAIL_syntax_HELO(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL from eggs@example') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: MAIL FROM:
\r\n') - - def test_MAIL_syntax_EHLO(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL from eggs@example') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: MAIL FROM:
[SP ]\r\n') - - def test_MAIL_missing_address(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL from:') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: MAIL FROM:
\r\n') - - def test_MAIL_chevrons(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL from:') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - def test_MAIL_empty_chevrons(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL from:<>') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - def test_MAIL_quoted_localpart(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL from: <"Fred Blogs"@example.com>') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.assertEqual(self.channel.mailfrom, '"Fred Blogs"@example.com') - - def test_MAIL_quoted_localpart_no_angles(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL from: "Fred Blogs"@example.com') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.assertEqual(self.channel.mailfrom, '"Fred Blogs"@example.com') - - def test_MAIL_quoted_localpart_with_size(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL from: <"Fred Blogs"@example.com> SIZE=1000') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.assertEqual(self.channel.mailfrom, '"Fred Blogs"@example.com') - - def test_MAIL_quoted_localpart_with_size_no_angles(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL from: "Fred Blogs"@example.com SIZE=1000') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.assertEqual(self.channel.mailfrom, '"Fred Blogs"@example.com') - - def test_nested_MAIL(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL from:eggs@example') - self.write_line(b'MAIL from:spam@example') - self.assertEqual(self.channel.socket.last, - b'503 Error: nested MAIL command\r\n') - - def test_VRFY(self): - self.write_line(b'VRFY eggs@example') - self.assertEqual(self.channel.socket.last, - b'252 Cannot VRFY user, but will accept message and attempt ' + \ - b'delivery\r\n') - - def test_VRFY_syntax(self): - self.write_line(b'VRFY') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: VRFY
\r\n') - - def test_EXPN_not_implemented(self): - self.write_line(b'EXPN') - self.assertEqual(self.channel.socket.last, - b'502 EXPN not implemented\r\n') - - def test_no_HELO_MAIL(self): - self.write_line(b'MAIL from:') - self.assertEqual(self.channel.socket.last, - b'503 Error: send HELO first\r\n') - - def test_need_RCPT(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'DATA') - self.assertEqual(self.channel.socket.last, - b'503 Error: need RCPT command\r\n') - - def test_RCPT_syntax_HELO(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From: eggs@example') - self.write_line(b'RCPT to eggs@example') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: RCPT TO:
\r\n') - - def test_RCPT_syntax_EHLO(self): - self.write_line(b'EHLO example') - self.write_line(b'MAIL From: eggs@example') - self.write_line(b'RCPT to eggs@example') - self.assertEqual(self.channel.socket.last, - b'501 Syntax: RCPT TO:
[SP ]\r\n') - - def test_RCPT_lowercase_to_OK(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From: eggs@example') - self.write_line(b'RCPT to: ') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - def test_no_HELO_RCPT(self): - self.write_line(b'RCPT to eggs@example') - self.assertEqual(self.channel.socket.last, - b'503 Error: send HELO first\r\n') - - def test_data_dialog(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.write_line(b'RCPT To:spam@example') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - self.write_line(b'DATA') - self.assertEqual(self.channel.socket.last, - b'354 End data with .\r\n') - self.write_line(b'data\r\nmore\r\n.') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.assertEqual(self.server.messages, - [(('peer-address', 'peer-port'), - 'eggs@example', - ['spam@example'], - 'data\nmore')]) - - def test_DATA_syntax(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'DATA spam') - self.assertEqual(self.channel.socket.last, b'501 Syntax: DATA\r\n') - - def test_no_HELO_DATA(self): - self.write_line(b'DATA spam') - self.assertEqual(self.channel.socket.last, - b'503 Error: send HELO first\r\n') - - def test_data_transparency_section_4_5_2(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'DATA') - self.write_line(b'..\r\n.\r\n') - self.assertEqual(self.channel.received_data, '.') - - def test_multiple_RCPT(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'RCPT To:ham@example') - self.write_line(b'DATA') - self.write_line(b'data\r\n.') - self.assertEqual(self.server.messages, - [(('peer-address', 'peer-port'), - 'eggs@example', - ['spam@example','ham@example'], - 'data')]) - - def test_manual_status(self): - # checks that the Channel is able to return a custom status message - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'DATA') - self.write_line(b'return status\r\n.') - self.assertEqual(self.channel.socket.last, b'250 Okish\r\n') - - def test_RSET(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'RSET') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.write_line(b'MAIL From:foo@example') - self.write_line(b'RCPT To:eggs@example') - self.write_line(b'DATA') - self.write_line(b'data\r\n.') - self.assertEqual(self.server.messages, - [(('peer-address', 'peer-port'), - 'foo@example', - ['eggs@example'], - 'data')]) - - def test_HELO_RSET(self): - self.write_line(b'HELO example') - self.write_line(b'RSET') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - def test_RSET_syntax(self): - self.write_line(b'RSET hi') - self.assertEqual(self.channel.socket.last, b'501 Syntax: RSET\r\n') - - def test_unknown_command(self): - self.write_line(b'UNKNOWN_CMD') - self.assertEqual(self.channel.socket.last, - b'500 Error: command "UNKNOWN_CMD" not ' + \ - b'recognized\r\n') - - def test_attribute_deprecations(self): - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__server - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__server = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__line - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__line = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__state - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__state = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__greeting - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__greeting = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__mailfrom - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__mailfrom = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__rcpttos - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__rcpttos = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__data - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__data = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__fqdn - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__fqdn = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__peer - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__peer = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__conn - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__conn = 'spam' - with warnings_helper.check_warnings(('', DeprecationWarning)): - spam = self.channel._SMTPChannel__addr - with warnings_helper.check_warnings(('', DeprecationWarning)): - self.channel._SMTPChannel__addr = 'spam' - -@unittest.skipUnless(socket_helper.IPV6_ENABLED, "IPv6 not enabled") -class SMTPDChannelIPv6Test(SMTPDChannelTest): - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - self.old_debugstream = smtpd.DEBUGSTREAM - self.debug = smtpd.DEBUGSTREAM = io.StringIO() - self.server = DummyServer((socket_helper.HOSTv6, 0), ('b', 0), - decode_data=True) - conn, addr = self.server.accept() - self.channel = smtpd.SMTPChannel(self.server, conn, addr, - decode_data=True) - -class SMTPDChannelWithDataSizeLimitTest(unittest.TestCase): - - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - self.old_debugstream = smtpd.DEBUGSTREAM - self.debug = smtpd.DEBUGSTREAM = io.StringIO() - self.server = DummyServer((socket_helper.HOST, 0), ('b', 0), - decode_data=True) - conn, addr = self.server.accept() - # Set DATA size limit to 32 bytes for easy testing - self.channel = smtpd.SMTPChannel(self.server, conn, addr, 32, - decode_data=True) - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - smtpd.DEBUGSTREAM = self.old_debugstream - - def write_line(self, line): - self.channel.socket.queue_recv(line) - self.channel.handle_read() - - def test_data_limit_dialog(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.write_line(b'RCPT To:spam@example') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - self.write_line(b'DATA') - self.assertEqual(self.channel.socket.last, - b'354 End data with .\r\n') - self.write_line(b'data\r\nmore\r\n.') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.assertEqual(self.server.messages, - [(('peer-address', 'peer-port'), - 'eggs@example', - ['spam@example'], - 'data\nmore')]) - - def test_data_limit_dialog_too_much_data(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - self.write_line(b'RCPT To:spam@example') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - self.write_line(b'DATA') - self.assertEqual(self.channel.socket.last, - b'354 End data with .\r\n') - self.write_line(b'This message is longer than 32 bytes\r\n.') - self.assertEqual(self.channel.socket.last, - b'552 Error: Too much mail data\r\n') - - -class SMTPDChannelWithDecodeDataFalse(unittest.TestCase): - - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - self.old_debugstream = smtpd.DEBUGSTREAM - self.debug = smtpd.DEBUGSTREAM = io.StringIO() - self.server = DummyServer((socket_helper.HOST, 0), ('b', 0)) - conn, addr = self.server.accept() - self.channel = smtpd.SMTPChannel(self.server, conn, addr) - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - smtpd.DEBUGSTREAM = self.old_debugstream - - def write_line(self, line): - self.channel.socket.queue_recv(line) - self.channel.handle_read() - - def test_ascii_data(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'DATA') - self.write_line(b'plain ascii text') - self.write_line(b'.') - self.assertEqual(self.channel.received_data, b'plain ascii text') - - def test_utf8_data(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'DATA') - self.write_line(b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87') - self.write_line(b'and some plain ascii') - self.write_line(b'.') - self.assertEqual( - self.channel.received_data, - b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87\n' - b'and some plain ascii') - - -class SMTPDChannelWithDecodeDataTrue(unittest.TestCase): - - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - self.old_debugstream = smtpd.DEBUGSTREAM - self.debug = smtpd.DEBUGSTREAM = io.StringIO() - self.server = DummyServer((socket_helper.HOST, 0), ('b', 0), - decode_data=True) - conn, addr = self.server.accept() - # Set decode_data to True - self.channel = smtpd.SMTPChannel(self.server, conn, addr, - decode_data=True) - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - smtpd.DEBUGSTREAM = self.old_debugstream - - def write_line(self, line): - self.channel.socket.queue_recv(line) - self.channel.handle_read() - - def test_ascii_data(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'DATA') - self.write_line(b'plain ascii text') - self.write_line(b'.') - self.assertEqual(self.channel.received_data, 'plain ascii text') - - def test_utf8_data(self): - self.write_line(b'HELO example') - self.write_line(b'MAIL From:eggs@example') - self.write_line(b'RCPT To:spam@example') - self.write_line(b'DATA') - self.write_line(b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87') - self.write_line(b'and some plain ascii') - self.write_line(b'.') - self.assertEqual( - self.channel.received_data, - 'utf8 enriched text: żźć\nand some plain ascii') - - -class SMTPDChannelTestWithEnableSMTPUTF8True(unittest.TestCase): - def setUp(self): - smtpd.socket = asyncore.socket = mock_socket - self.old_debugstream = smtpd.DEBUGSTREAM - self.debug = smtpd.DEBUGSTREAM = io.StringIO() - self.server = DummyServer((socket_helper.HOST, 0), ('b', 0), - enable_SMTPUTF8=True) - conn, addr = self.server.accept() - self.channel = smtpd.SMTPChannel(self.server, conn, addr, - enable_SMTPUTF8=True) - - def tearDown(self): - asyncore.close_all() - asyncore.socket = smtpd.socket = socket - smtpd.DEBUGSTREAM = self.old_debugstream - - def write_line(self, line): - self.channel.socket.queue_recv(line) - self.channel.handle_read() - - def test_MAIL_command_accepts_SMTPUTF8_when_announced(self): - self.write_line(b'EHLO example') - self.write_line( - 'MAIL from: BODY=8BITMIME SMTPUTF8'.encode( - 'utf-8') - ) - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - def test_process_smtputf8_message(self): - self.write_line(b'EHLO example') - for mail_parameters in [b'', b'BODY=8BITMIME SMTPUTF8']: - self.write_line(b'MAIL from: ' + mail_parameters) - self.assertEqual(self.channel.socket.last[0:3], b'250') - self.write_line(b'rcpt to:') - self.assertEqual(self.channel.socket.last[0:3], b'250') - self.write_line(b'data') - self.assertEqual(self.channel.socket.last[0:3], b'354') - self.write_line(b'c\r\n.') - if mail_parameters == b'': - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - else: - self.assertEqual(self.channel.socket.last, - b'250 SMTPUTF8 message okish\r\n') - - def test_utf8_data(self): - self.write_line(b'EHLO example') - self.write_line( - 'MAIL From: naïve@examplé BODY=8BITMIME SMTPUTF8'.encode('utf-8')) - self.assertEqual(self.channel.socket.last[0:3], b'250') - self.write_line('RCPT To:späm@examplé'.encode('utf-8')) - self.assertEqual(self.channel.socket.last[0:3], b'250') - self.write_line(b'DATA') - self.assertEqual(self.channel.socket.last[0:3], b'354') - self.write_line(b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87') - self.write_line(b'.') - self.assertEqual( - self.channel.received_data, - b'utf8 enriched text: \xc5\xbc\xc5\xba\xc4\x87') - - def test_MAIL_command_limit_extended_with_SIZE_and_SMTPUTF8(self): - self.write_line(b'ehlo example') - fill_len = (512 + 26 + 10) - len('mail from:<@example>') - self.write_line(b'MAIL from:<' + - b'a' * (fill_len + 1) + - b'@example>') - self.assertEqual(self.channel.socket.last, - b'500 Error: line too long\r\n') - self.write_line(b'MAIL from:<' + - b'a' * fill_len + - b'@example>') - self.assertEqual(self.channel.socket.last, b'250 OK\r\n') - - def test_multiple_emails_with_extended_command_length(self): - self.write_line(b'ehlo example') - fill_len = (512 + 26 + 10) - len('mail from:<@example>') - for char in [b'a', b'b', b'c']: - self.write_line(b'MAIL from:<' + char * fill_len + b'a@example>') - self.assertEqual(self.channel.socket.last[0:3], b'500') - self.write_line(b'MAIL from:<' + char * fill_len + b'@example>') - self.assertEqual(self.channel.socket.last[0:3], b'250') - self.write_line(b'rcpt to:') - self.assertEqual(self.channel.socket.last[0:3], b'250') - self.write_line(b'data') - self.assertEqual(self.channel.socket.last[0:3], b'354') - self.write_line(b'test\r\n.') - self.assertEqual(self.channel.socket.last[0:3], b'250') - - -class MiscTestCase(unittest.TestCase): - def test__all__(self): - not_exported = { - "program", "Devnull", "DEBUGSTREAM", "NEWLINE", "COMMASPACE", - "DATA_SIZE_DEFAULT", "usage", "Options", "parseargs", - } - support.check__all__(self, smtpd, not_exported=not_exported) - - -if __name__ == "__main__": - unittest.main() From e7f04612f61242bd65bfb7f01a5f13d36fa54af1 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Thu, 3 Apr 2025 10:10:26 -0700 Subject: [PATCH 007/126] remove chunk.py --- Lib/chunk.py | 173 --------------------------------------------------- 1 file changed, 173 deletions(-) delete mode 100644 Lib/chunk.py diff --git a/Lib/chunk.py b/Lib/chunk.py deleted file mode 100644 index 618781efd1..0000000000 --- a/Lib/chunk.py +++ /dev/null @@ -1,173 +0,0 @@ -"""Simple class to read IFF chunks. - -An IFF chunk (used in formats such as AIFF, TIFF, RMFF (RealMedia File -Format)) has the following structure: - -+----------------+ -| ID (4 bytes) | -+----------------+ -| size (4 bytes) | -+----------------+ -| data | -| ... | -+----------------+ - -The ID is a 4-byte string which identifies the type of chunk. - -The size field (a 32-bit value, encoded using big-endian byte order) -gives the size of the whole chunk, including the 8-byte header. - -Usually an IFF-type file consists of one or more chunks. The proposed -usage of the Chunk class defined here is to instantiate an instance at -the start of each chunk and read from the instance until it reaches -the end, after which a new instance can be instantiated. At the end -of the file, creating a new instance will fail with an EOFError -exception. - -Usage: -while True: - try: - chunk = Chunk(file) - except EOFError: - break - chunktype = chunk.getname() - while True: - data = chunk.read(nbytes) - if not data: - pass - # do something with data - -The interface is file-like. The implemented methods are: -read, close, seek, tell, isatty. -Extra methods are: skip() (called by close, skips to the end of the chunk), -getname() (returns the name (ID) of the chunk) - -The __init__ method has one required argument, a file-like object -(including a chunk instance), and one optional argument, a flag which -specifies whether or not chunks are aligned on 2-byte boundaries. The -default is 1, i.e. aligned. -""" - -import warnings - -warnings._deprecated(__name__, remove=(3, 13)) - -class Chunk: - def __init__(self, file, align=True, bigendian=True, inclheader=False): - import struct - self.closed = False - self.align = align # whether to align to word (2-byte) boundaries - if bigendian: - strflag = '>' - else: - strflag = '<' - self.file = file - self.chunkname = file.read(4) - if len(self.chunkname) < 4: - raise EOFError - try: - self.chunksize = struct.unpack_from(strflag+'L', file.read(4))[0] - except struct.error: - raise EOFError from None - if inclheader: - self.chunksize = self.chunksize - 8 # subtract header - self.size_read = 0 - try: - self.offset = self.file.tell() - except (AttributeError, OSError): - self.seekable = False - else: - self.seekable = True - - def getname(self): - """Return the name (ID) of the current chunk.""" - return self.chunkname - - def getsize(self): - """Return the size of the current chunk.""" - return self.chunksize - - def close(self): - if not self.closed: - try: - self.skip() - finally: - self.closed = True - - def isatty(self): - if self.closed: - raise ValueError("I/O operation on closed file") - return False - - def seek(self, pos, whence=0): - """Seek to specified position into the chunk. - Default position is 0 (start of chunk). - If the file is not seekable, this will result in an error. - """ - - if self.closed: - raise ValueError("I/O operation on closed file") - if not self.seekable: - raise OSError("cannot seek") - if whence == 1: - pos = pos + self.size_read - elif whence == 2: - pos = pos + self.chunksize - if pos < 0 or pos > self.chunksize: - raise RuntimeError - self.file.seek(self.offset + pos, 0) - self.size_read = pos - - def tell(self): - if self.closed: - raise ValueError("I/O operation on closed file") - return self.size_read - - def read(self, size=-1): - """Read at most size bytes from the chunk. - If size is omitted or negative, read until the end - of the chunk. - """ - - if self.closed: - raise ValueError("I/O operation on closed file") - if self.size_read >= self.chunksize: - return b'' - if size < 0: - size = self.chunksize - self.size_read - if size > self.chunksize - self.size_read: - size = self.chunksize - self.size_read - data = self.file.read(size) - self.size_read = self.size_read + len(data) - if self.size_read == self.chunksize and \ - self.align and \ - (self.chunksize & 1): - dummy = self.file.read(1) - self.size_read = self.size_read + len(dummy) - return data - - def skip(self): - """Skip the rest of the chunk. - If you are not interested in the contents of the chunk, - this method should be called so that the file points to - the start of the next chunk. - """ - - if self.closed: - raise ValueError("I/O operation on closed file") - if self.seekable: - try: - n = self.chunksize - self.size_read - # maybe fix alignment - if self.align and (self.chunksize & 1): - n = n + 1 - self.file.seek(n, 1) - self.size_read = self.size_read + n - return - except OSError: - pass - while self.size_read < self.chunksize: - n = min(8192, self.chunksize - self.size_read) - dummy = self.read(n) - if not dummy: - raise EOFError From 2c94b809aeddc047e93cb0bd0f47720f36f01d67 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Fri, 4 Apr 2025 16:02:29 +0900 Subject: [PATCH 008/126] move cspell to last step --- .github/workflows/ci.yaml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 586e00be26..05516c9270 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -313,13 +313,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: install extra dictionaries - run: npm install @cspell/dict-en_us @cspell/dict-cpp @cspell/dict-python @cspell/dict-rust @cspell/dict-win32 @cspell/dict-shell - - name: spell checker - uses: streetsidesoftware/cspell-action@v6 - with: - files: '**/*.rs' - incremental_files_only: true - uses: dtolnay/rust-toolchain@stable with: components: rustfmt, clippy @@ -339,6 +332,14 @@ jobs: - name: check wasm code with prettier # prettier doesn't handle ignore files very well: https://github.com/prettier/prettier/issues/8506 run: cd wasm && git ls-files -z | xargs -0 prettier --check -u + # Keep cspell check as the last step. This is optional test. + - name: install extra dictionaries + run: npm install @cspell/dict-en_us @cspell/dict-cpp @cspell/dict-python @cspell/dict-rust @cspell/dict-win32 @cspell/dict-shell + - name: spell checker + uses: streetsidesoftware/cspell-action@v6 + with: + files: '**/*.rs' + incremental_files_only: true miri: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} From 7ac61f384013b3e94828053122cb869b74fed079 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Fri, 4 Apr 2025 15:38:36 +0900 Subject: [PATCH 009/126] fix cspell warnings --- .cspell.dict/cpython.txt | 5 ++ .cspell.dict/python-more.txt | 107 ++++++++++++++++++++++++++++++++++- .cspell.dict/rust-more.txt | 25 +++++++- .cspell.json | 17 ++++++ common/src/boxvec.rs | 1 + common/src/hash.rs | 4 +- common/src/rc.rs | 2 +- common/src/static_cell.rs | 4 +- derive/src/lib.rs | 10 ++-- src/lib.rs | 6 +- stdlib/src/array.rs | 6 +- stdlib/src/binascii.rs | 4 +- stdlib/src/csv.rs | 8 +-- stdlib/src/fcntl.rs | 2 + stdlib/src/grp.rs | 1 + stdlib/src/locale.rs | 2 + stdlib/src/math.rs | 26 ++++----- stdlib/src/random.rs | 4 +- stdlib/src/syslog.rs | 2 +- stdlib/src/tkinter.rs | 4 +- stdlib/src/zlib.rs | 2 +- vm/src/anystr.rs | 2 +- vm/src/builtins/bytearray.rs | 4 +- vm/src/builtins/bytes.rs | 6 +- vm/src/builtins/complex.rs | 2 +- vm/src/builtins/dict.rs | 8 +-- vm/src/builtins/function.rs | 16 +++--- vm/src/builtins/int.rs | 15 ++--- vm/src/builtins/memory.rs | 8 +-- vm/src/builtins/str.rs | 4 +- vm/src/builtins/super.rs | 4 +- vm/src/bytesinner.rs | 52 ++++++++++------- vm/src/cformat.rs | 2 + vm/src/exceptions.rs | 10 ++-- vm/src/frame.rs | 20 ++++--- vm/src/function/argument.rs | 2 +- vm/src/function/fspath.rs | 2 +- vm/src/function/protocol.rs | 14 ++--- vm/src/import.rs | 8 +-- vm/src/object/core.rs | 3 +- vm/src/ospath.rs | 10 ++-- vm/src/protocol/buffer.rs | 22 +++---- vm/src/protocol/iter.rs | 6 +- vm/src/protocol/object.rs | 22 +++---- vm/src/stdlib/builtins.rs | 14 +++-- vm/src/stdlib/io.rs | 96 +++++++++++++++---------------- vm/src/stdlib/itertools.rs | 14 ++--- vm/src/stdlib/nt.rs | 10 ++-- vm/src/stdlib/operator.rs | 6 +- vm/src/stdlib/os.rs | 2 +- vm/src/stdlib/sre.rs | 34 +++++------ vm/src/stdlib/time.rs | 5 +- vm/src/vm/thread.rs | 8 +-- vm/src/vm/vm_new.rs | 4 +- vm/src/vm/vm_ops.rs | 12 ++-- vm/sre_engine/src/engine.rs | 32 +++++------ wtf8/src/lib.rs | 6 +- 57 files changed, 450 insertions(+), 277 deletions(-) diff --git a/.cspell.dict/cpython.txt b/.cspell.dict/cpython.txt index 0ac387634d..f7e282e4bc 100644 --- a/.cspell.dict/cpython.txt +++ b/.cspell.dict/cpython.txt @@ -10,14 +10,19 @@ cellarg cellvar cellvars cmpop +weakreflist +XXPRIME dictoffset elts +xstat excepthandler +fileutils finalbody freevar freevars fromlist heaptype +HIGHRES IMMUTABLETYPE kwonlyarg kwonlyargs diff --git a/.cspell.dict/python-more.txt b/.cspell.dict/python-more.txt index a482c880cc..526f5ba166 100644 --- a/.cspell.dict/python-more.txt +++ b/.cspell.dict/python-more.txt @@ -1,17 +1,34 @@ +abiflags abstractmethods +aenter +aexit aiter anext +appendleft +argcount arrayiterator arraytype asend +asyncgen athrow +backslashreplace basicsize +bdfl +bigcharset +breakpointhook cformat +chunksize classcell +closefd closesocket codepoint codepoints +codesize +contextvar cpython +cratio +dealloc +debugbuild decompressor defaultaction descr @@ -19,15 +36,29 @@ dictcomp dictitems dictkeys dictview +digestmod +dllhandle docstring docstrings dunder +endianness +endpos eventmask +excepthook +exceptiongroup +exitfuncs +extendleft +fastlocals fdel +fedcba fget fileencoding fillchar +fillvalue finallyhandler +firstiter +firstlineno +fnctl frombytes fromhex fromunicode @@ -35,45 +66,87 @@ fset fspath fstring fstrings +ftruncate genexpr getattro +getcodesize +getdefaultencoding +getfilesystemencodeerrors +getfilesystemencoding getformat +getframe getnewargs +getpip +getrandom +getrecursionlimit +getrefcount getweakrefcount getweakrefs +getwindowsversion +gmtoff +groupdict +groupindex +hamt hostnames +idfunc idiv impls +indexgroup infj instancecheck instanceof +irepeat isabstractmethod +isbytes +iscased +istext itemiterator itemsize iternext +keepends +keyfunc keyiterator kwarg kwargs +kwdefaults +kwonlyargcount +lastgroup linearization linearize listcomp +lvalue mappingproxy +maskpri +maxdigits +MAXGROUPS +MAXREPEAT maxsplit +maxunicode memoryview memoryviewiterator metaclass metaclasses metatype +mformat mro mros +multiarch +namereplace nanj +nbytes +ncallbacks ndigits ndim +nlocals nonbytes origname posixsubprocess +posonly +posonlyargcount +profilefunc +pycodecs +pycs pyexpat -pytraverse PYTHONDEBUG PYTHONHOME PYTHONINSPECT @@ -82,11 +155,19 @@ PYTHONPATH PYTHONPATH PYTHONVERBOSE PYTHONWARNINGS +pytraverse qualname +quotetabs radd rdiv rdivmod +readall +readbuffer reconstructor +releaselevel +reverseitemiterator +reverseiterator +reversekeyiterator reversevalueiterator rfloordiv rlshift @@ -95,22 +176,42 @@ rpow rrshift rsub rtruediv +rvalue scproxy setattro setcomp showwarnmsg -warnmsg +signum +slotnames stacklevel +stacksize +startpos subclasscheck subclasshook +suboffset +sumprod +surrogateescape +surrogatepass +sysconfigdata +sysvars +titlecased +unimportable unionable unraisablehook +unsliceable +urandom valueiterator vararg varargs varnames warningregistry +warnmsg +warnoptions warnopts weakproxy winver -xopts \ No newline at end of file +withdata +xmlcharrefreplace +xoptions +xopts +yieldfrom diff --git a/.cspell.dict/rust-more.txt b/.cspell.dict/rust-more.txt index f2177dd4c7..d75529789f 100644 --- a/.cspell.dict/rust-more.txt +++ b/.cspell.dict/rust-more.txt @@ -3,27 +3,42 @@ bidi biguint bindgen bitflags +bitor bstr byteorder +byteset +caseless chrono consts +cranelift cstring +datelike +deserializer flate2 fract +getres hasher +hexf +hexversion idents +illumos indexmap insta keccak lalrpop +lexopt libc +libloading libz longlong Manually maplit memmap +memmem metas modpow +msvc +muldiv nanos objclass peekable @@ -31,17 +46,25 @@ powc powf prepended punct +puruspe replacen rsplitn rustc rustfmt +rustyline seekfrom +siphash splitn subsec +thiserror +timelike timsort trai ulonglong unic unistd +unsync +wasmbind +widestring winapi -winsock \ No newline at end of file +winsock diff --git a/.cspell.json b/.cspell.json index 562b300ffa..99718a6515 100644 --- a/.cspell.json +++ b/.cspell.json @@ -47,16 +47,24 @@ // words - list of words to be always considered correct "words": [ // RustPython + "aiterable", + "alnum", "baseclass", + "boxvec", "Bytecode", "cfgs", "codegen", + "coro", "dedentations", "dedents", "deduped", "downcasted", "dumpable", + "emscripten", + "excs", + "finalizer", "GetSet", + "groupref", "internable", "makeunicodedata", "miri", @@ -100,6 +108,15 @@ "unraisable", "wasi", "zelf", + // unix + "CLOEXEC", + "codeset", + "endgrent", + "getrusage", + "nanosleep", + "WRLCK", + // win32 + "birthtime", ], // flagWords - list of words to be always considered incorrect "flagWords": [ diff --git a/common/src/boxvec.rs b/common/src/boxvec.rs index 1a1d57c169..4501835477 100644 --- a/common/src/boxvec.rs +++ b/common/src/boxvec.rs @@ -1,3 +1,4 @@ +// cspell:ignore //! An unresizable vector backed by a `Box<[T]>` #![allow(clippy::needless_lifetimes)] diff --git a/common/src/hash.rs b/common/src/hash.rs index 8fef70c8b9..1ae561650c 100644 --- a/common/src/hash.rs +++ b/common/src/hash.rs @@ -53,14 +53,14 @@ impl HashSecret { fix_sentinel(mod_int(self.hash_one(data) as _)) } - pub fn hash_iter<'a, T: 'a, I, F, E>(&self, iter: I, hashf: F) -> Result + pub fn hash_iter<'a, T: 'a, I, F, E>(&self, iter: I, hash_func: F) -> Result where I: IntoIterator, F: Fn(&'a T) -> Result, { let mut hasher = self.build_hasher(); for element in iter { - let item_hash = hashf(element)?; + let item_hash = hash_func(element)?; item_hash.hash(&mut hasher); } Ok(fix_sentinel(mod_int(hasher.finish() as PyHash))) diff --git a/common/src/rc.rs b/common/src/rc.rs index 81207e840c..40c7cf97a8 100644 --- a/common/src/rc.rs +++ b/common/src/rc.rs @@ -3,7 +3,7 @@ use std::rc::Rc; #[cfg(feature = "threading")] use std::sync::Arc; -// type aliases instead of newtypes because you can't do `fn method(self: PyRc)` with a +// type aliases instead of new-types because you can't do `fn method(self: PyRc)` with a // newtype; requires the arbitrary_self_types unstable feature #[cfg(feature = "threading")] diff --git a/common/src/static_cell.rs b/common/src/static_cell.rs index 7f16dad399..407b83ae0a 100644 --- a/common/src/static_cell.rs +++ b/common/src/static_cell.rs @@ -13,7 +13,7 @@ mod non_threading { impl StaticCell { #[doc(hidden)] - pub const fn _from_localkey(inner: &'static LocalKey>) -> Self { + pub const fn _from_local_key(inner: &'static LocalKey>) -> Self { Self { inner } } @@ -58,7 +58,7 @@ mod non_threading { ::std::thread_local! { $vis static $name: $crate::lock::OnceCell<&'static $t> = $crate::lock::OnceCell::new(); } - $crate::static_cell::StaticCell::_from_localkey(&$name) + $crate::static_cell::StaticCell::_from_local_key(&$name) };)+ }; } diff --git a/derive/src/lib.rs b/derive/src/lib.rs index a96c2aef6e..2a7b3d68fc 100644 --- a/derive/src/lib.rs +++ b/derive/src/lib.rs @@ -34,7 +34,7 @@ pub fn derive_from_args(input: TokenStream) -> TokenStream { /// - `IMMUTABLETYPE`: class attributes are immutable. /// - `with`: which trait implementations are to be included in the python class. /// ```rust, ignore -/// #[pyclass(module = "mymodule", name = "MyClass", base = "BaseClass")] +/// #[pyclass(module = "my_module", name = "MyClass", base = "BaseClass")] /// struct MyStruct { /// x: i32, /// } @@ -161,8 +161,8 @@ pub fn pyexception(attr: TokenStream, item: TokenStream) -> TokenStream { /// - `name`: the name of the python module, /// by default, it is the name of the module, but this can be configured. /// ```rust, ignore -/// // This will create a module named `mymodule` -/// #[pymodule(name = "mymodule")] +/// // This will create a module named `my_module` +/// #[pymodule(name = "my_module")] /// mod module { /// } /// ``` @@ -173,7 +173,7 @@ pub fn pyexception(attr: TokenStream, item: TokenStream) -> TokenStream { /// } /// /// #[pymodule(with(submodule))] -/// mod mymodule { +/// mod my_module { /// } /// ``` /// - `with`: declare the list of submodules that this module contains (see `sub` for example). @@ -190,7 +190,7 @@ pub fn pyexception(attr: TokenStream, item: TokenStream) -> TokenStream { /// #### Examples /// ```rust, ignore /// #[pymodule] -/// mod mymodule { +/// mod my_module { /// #[pyattr] /// const MY_CONSTANT: i32 = 42; /// #[pyattr] diff --git a/src/lib.rs b/src/lib.rs index 67a2a16eef..3fa5292e94 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,13 +9,13 @@ //! use rustpython_vm::{pymodule, py_freeze}; //! fn main() { //! rustpython::run(|vm| { -//! vm.add_native_module("mymod".to_owned(), Box::new(mymod::make_module)); -//! vm.add_frozen(py_freeze!(source = "def foo(): pass", module_name = "otherthing")); +//! vm.add_native_module("my_mod".to_owned(), Box::new(my_mod::make_module)); +//! vm.add_frozen(py_freeze!(source = "def foo(): pass", module_name = "other_thing")); //! }); //! } //! //! #[pymodule] -//! mod mymod { +//! mod my_mod { //! use rustpython_vm::builtins::PyStrRef; //TODO: use rustpython_vm::prelude::*; //! diff --git a/stdlib/src/array.rs b/stdlib/src/array.rs index fd83f0a5ad..db4394e44f 100644 --- a/stdlib/src/array.rs +++ b/stdlib/src/array.rs @@ -880,14 +880,14 @@ mod array { return Err(vm.new_value_error("negative count".to_owned())); } let n = vm.check_repeat_or_overflow_error(itemsize, n)?; - let nbytes = n * itemsize; + let n_bytes = n * itemsize; - let b = vm.call_method(&f, "read", (nbytes,))?; + let b = vm.call_method(&f, "read", (n_bytes,))?; let b = b .downcast::() .map_err(|_| vm.new_type_error("read() didn't return bytes".to_owned()))?; - let not_enough_bytes = b.len() != nbytes; + let not_enough_bytes = b.len() != n_bytes; self._from_bytes(b.as_bytes(), itemsize, vm)?; diff --git a/stdlib/src/binascii.rs b/stdlib/src/binascii.rs index f154a2251b..a8df1fb60b 100644 --- a/stdlib/src/binascii.rs +++ b/stdlib/src/binascii.rs @@ -1,4 +1,4 @@ -// spell-checker:ignore hexlify unhexlify uuencodes +// spell-checker:ignore hexlify unhexlify uuencodes CRCTAB pub(super) use decl::crc32; pub(crate) use decl::make_module; @@ -339,7 +339,7 @@ mod decl { || (buffer[idx + 1] >= b'a' && buffer[idx + 1] <= b'f') || (buffer[idx + 1] >= b'0' && buffer[idx + 1] <= b'9')) { - // hexval + // hex val if let (Some(ch1), Some(ch2)) = (unhex_nibble(buffer[idx]), unhex_nibble(buffer[idx + 1])) { diff --git a/stdlib/src/csv.rs b/stdlib/src/csv.rs index 214209ab9e..39c15fd952 100644 --- a/stdlib/src/csv.rs +++ b/stdlib/src/csv.rs @@ -981,14 +981,14 @@ mod _csv { String::from_utf8(input.to_vec()).unwrap() }; loop { - let (res, nread, nwritten, nends) = reader.read_record( + let (res, n_read, n_written, n_ends) = reader.read_record( &input.as_bytes()[input_offset..], &mut buffer[output_offset..], &mut output_ends[output_ends_offset..], ); - input_offset += nread; - output_offset += nwritten; - output_ends_offset += nends; + input_offset += n_read; + output_offset += n_written; + output_ends_offset += n_ends; match res { csv_core::ReadRecordResult::InputEmpty => {} csv_core::ReadRecordResult::OutputFull => resize_buf(buffer), diff --git a/stdlib/src/fcntl.rs b/stdlib/src/fcntl.rs index 307d6e4351..7dff14ccd8 100644 --- a/stdlib/src/fcntl.rs +++ b/stdlib/src/fcntl.rs @@ -1,3 +1,5 @@ +// cspell:disable + pub(crate) use fcntl::make_module; #[pymodule] diff --git a/stdlib/src/grp.rs b/stdlib/src/grp.rs index 2cdad56588..9c946dd582 100644 --- a/stdlib/src/grp.rs +++ b/stdlib/src/grp.rs @@ -1,3 +1,4 @@ +// cspell:disable pub(crate) use grp::make_module; #[pymodule] diff --git a/stdlib/src/locale.rs b/stdlib/src/locale.rs index 9ca71a0957..dfc6c93497 100644 --- a/stdlib/src/locale.rs +++ b/stdlib/src/locale.rs @@ -1,3 +1,5 @@ +// cspell:ignore abday abmon yesexpr + pub(crate) use _locale::make_module; #[cfg(windows)] diff --git a/stdlib/src/math.rs b/stdlib/src/math.rs index f86ebb591e..93929e3566 100644 --- a/stdlib/src/math.rs +++ b/stdlib/src/math.rs @@ -136,7 +136,7 @@ mod math { if base.is_sign_negative() { return Err(vm.new_value_error("math domain error".to_owned())); } - log2(x, vm).map(|logx| logx / base.log2()) + log2(x, vm).map(|log_x| log_x / base.log2()) } #[pyfunction] @@ -188,7 +188,7 @@ mod math { #[pyfunction] fn log10(x: PyObjectRef, vm: &VirtualMachine) -> PyResult { - log2(x, vm).map(|logx| logx / 10f64.log2()) + log2(x, vm).map(|log_x| log_x / 10f64.log2()) } #[pyfunction] @@ -588,16 +588,16 @@ mod math { where F: Fn(&BigInt, &PyInt) -> BigInt, { - let argvec = args.into_vec(); + let arg_vec = args.into_vec(); - if argvec.is_empty() { + if arg_vec.is_empty() { return default; - } else if argvec.len() == 1 { - return op(argvec[0].as_bigint(), &argvec[0]); + } else if arg_vec.len() == 1 { + return op(arg_vec[0].as_bigint(), &arg_vec[0]); } - let mut res = argvec[0].as_bigint().clone(); - for num in &argvec[1..] { + let mut res = arg_vec[0].as_bigint().clone(); + for num in &arg_vec[1..] { res = op(&res, num) } res @@ -895,15 +895,15 @@ mod math { return Err(vm.new_value_error("math domain error".to_owned())); } - let absx = x.abs(); - let absy = y.abs(); - let modulus = absx % absy; + let abs_x = x.abs(); + let abs_y = y.abs(); + let modulus = abs_x % abs_y; - let c = absy - modulus; + let c = abs_y - modulus; let r = match modulus.partial_cmp(&c) { Some(Ordering::Less) => modulus, Some(Ordering::Greater) => -c, - _ => modulus - 2.0 * fmod(0.5 * (absx - modulus), absy), + _ => modulus - 2.0 * fmod(0.5 * (abs_x - modulus), abs_y), }; return Ok(1.0_f64.copysign(x) * r); diff --git a/stdlib/src/random.rs b/stdlib/src/random.rs index 31e523b68b..a2aaff2612 100644 --- a/stdlib/src/random.rs +++ b/stdlib/src/random.rs @@ -79,7 +79,7 @@ mod _random { }; let words = (k - 1) / 32 + 1; - let wordarray = (0..words) + let word_array = (0..words) .map(|_| { let word = gen_u32(k); k = k.wrapping_sub(32); @@ -87,7 +87,7 @@ mod _random { }) .collect::>(); - let uint = BigUint::new(wordarray); + let uint = BigUint::new(word_array); // very unlikely but might as well check let sign = if uint.is_zero() { Sign::NoSign diff --git a/stdlib/src/syslog.rs b/stdlib/src/syslog.rs index 3b36f9ea74..69e9d1cb9e 100644 --- a/stdlib/src/syslog.rs +++ b/stdlib/src/syslog.rs @@ -1,4 +1,4 @@ -// spell-checker:ignore logoption openlog setlogmask upto +// spell-checker:ignore logoption openlog setlogmask upto NDELAY pub(crate) use syslog::make_module; diff --git a/stdlib/src/tkinter.rs b/stdlib/src/tkinter.rs index 1d14c9f38c..907dc55002 100644 --- a/stdlib/src/tkinter.rs +++ b/stdlib/src/tkinter.rs @@ -1,3 +1,5 @@ +// cspell:ignore createcommand + pub(crate) use self::_tkinter::make_module; #[pymodule] @@ -45,7 +47,7 @@ mod _tkinter { #[pyfunction] fn create(args: FuncArgs, _vm: &VirtualMachine) -> PyResult { - // TODO: handle arguements + // TODO: handle arguments // TODO: this means creating 2 tk instances is not possible. let tk = Tk::new(()).unwrap(); Ok(TkApp { diff --git a/stdlib/src/zlib.rs b/stdlib/src/zlib.rs index 40e364f8d4..9c19b74066 100644 --- a/stdlib/src/zlib.rs +++ b/stdlib/src/zlib.rs @@ -1,4 +1,4 @@ -// spell-checker:ignore compressobj decompressobj zdict chunksize zlibmodule miniz +// spell-checker:ignore compressobj decompressobj zdict chunksize zlibmodule miniz chunker pub(crate) use zlib::make_module; diff --git a/vm/src/anystr.rs b/vm/src/anystr.rs index 6bc8a4dd13..79b15b6a3f 100644 --- a/vm/src/anystr.rs +++ b/vm/src/anystr.rs @@ -200,7 +200,7 @@ pub trait AnyStr { F: Fn(&Self) -> PyObjectRef; #[inline] - fn py_startsendswith<'a, T, F>( + fn py_starts_ends_with<'a, T, F>( &self, affix: &'a PyObject, func_name: &str, diff --git a/vm/src/builtins/bytearray.rs b/vm/src/builtins/bytearray.rs index 36cf8cadcd..3d4822cf48 100644 --- a/vm/src/builtins/bytearray.rs +++ b/vm/src/builtins/bytearray.rs @@ -375,7 +375,7 @@ impl PyByteArray { Some(x) => x, None => return Ok(false), }; - substr.py_startsendswith( + substr.py_starts_ends_with( &affix, "endswith", "bytes", @@ -396,7 +396,7 @@ impl PyByteArray { Some(x) => x, None => return Ok(false), }; - substr.py_startsendswith( + substr.py_starts_ends_with( &affix, "startswith", "bytes", diff --git a/vm/src/builtins/bytes.rs b/vm/src/builtins/bytes.rs index eff4190eda..434de6a76c 100644 --- a/vm/src/builtins/bytes.rs +++ b/vm/src/builtins/bytes.rs @@ -299,7 +299,7 @@ impl PyBytes { Some(x) => x, None => return Ok(false), }; - substr.py_startsendswith( + substr.py_starts_ends_with( &affix, "endswith", "bytes", @@ -319,7 +319,7 @@ impl PyBytes { Some(x) => x, None => return Ok(false), }; - substr.py_startsendswith( + substr.py_starts_ends_with( &affix, "startswith", "bytes", @@ -541,7 +541,7 @@ impl PyRef { /// Other possible values are 'ignore', 'replace' /// For a list of possible encodings, /// see https://docs.python.org/3/library/codecs.html#standard-encodings - /// currently, only 'utf-8' and 'ascii' emplemented + /// currently, only 'utf-8' and 'ascii' implemented #[pymethod] fn decode(self, args: DecodeArgs, vm: &VirtualMachine) -> PyResult { bytes_decode(self.into(), args, vm) diff --git a/vm/src/builtins/complex.rs b/vm/src/builtins/complex.rs index d48707261c..02324704b3 100644 --- a/vm/src/builtins/complex.rs +++ b/vm/src/builtins/complex.rs @@ -53,7 +53,7 @@ impl From for PyComplex { impl PyObjectRef { /// Tries converting a python object into a complex, returns an option of whether the complex - /// and whether the object was a complex originally or coereced into one + /// and whether the object was a complex originally or coerced into one pub fn try_complex(&self, vm: &VirtualMachine) -> PyResult> { if let Some(complex) = self.payload_if_exact::(vm) { return Ok(Some((complex.value, true))); diff --git a/vm/src/builtins/dict.rs b/vm/src/builtins/dict.rs index a19b11fcfb..fc2f206dd0 100644 --- a/vm/src/builtins/dict.rs +++ b/vm/src/builtins/dict.rs @@ -281,8 +281,8 @@ impl PyDict { #[pymethod(magic)] fn or(&self, other: PyObjectRef, vm: &VirtualMachine) -> PyResult { - let dicted: Result = other.downcast(); - if let Ok(other) = dicted { + let other_dict: Result = other.downcast(); + if let Ok(other) = other_dict { let self_cp = self.copy(); self_cp.merge_dict(other, vm)?; return Ok(self_cp.into_pyobject(vm)); @@ -397,8 +397,8 @@ impl PyRef { #[pymethod(magic)] fn ror(self, other: PyObjectRef, vm: &VirtualMachine) -> PyResult { - let dicted: Result = other.downcast(); - if let Ok(other) = dicted { + let other_dict: Result = other.downcast(); + if let Ok(other) = other_dict { let other_cp = other.copy(); other_cp.merge_dict(self, vm)?; return Ok(other_cp.into_pyobject(vm)); diff --git a/vm/src/builtins/function.rs b/vm/src/builtins/function.rs index f7b5d39993..3181f1068f 100644 --- a/vm/src/builtins/function.rs +++ b/vm/src/builtins/function.rs @@ -198,9 +198,9 @@ impl PyFunction { // function definition calls for if nargs < nexpected_args { let defaults = get_defaults!().0.as_ref().map(|tup| tup.as_slice()); - let ndefs = defaults.map_or(0, |d| d.len()); + let n_defs = defaults.map_or(0, |d| d.len()); - let nrequired = code.arg_count as usize - ndefs; + let nrequired = code.arg_count as usize - n_defs; // Given the number of defaults available, check all the arguments for which we // _don't_ have defaults; if any are missing, raise an exception @@ -642,9 +642,9 @@ impl PyBoundMethod { vm: &VirtualMachine, ) -> (Option, (PyObjectRef, Option)) { let builtins_getattr = vm.builtins.get_attr("getattr", vm).ok(); - let funcself = self.object.clone(); - let funcname = self.function.get_attr("__name__", vm).ok(); - (builtins_getattr, (funcself, funcname)) + let func_self = self.object.clone(); + let func_name = self.function.get_attr("__name__", vm).ok(); + (builtins_getattr, (func_self, func_name)) } #[pygetset(magic)] @@ -700,16 +700,16 @@ impl Representable for PyBoundMethod { #[inline] fn repr_str(zelf: &Py, vm: &VirtualMachine) -> PyResult { #[allow(clippy::needless_match)] // False positive on nightly - let funcname = + let func_name = if let Some(qname) = vm.get_attribute_opt(zelf.function.clone(), "__qualname__")? { Some(qname) } else { vm.get_attribute_opt(zelf.function.clone(), "__name__")? }; - let funcname: Option = funcname.and_then(|o| o.downcast().ok()); + let func_name: Option = func_name.and_then(|o| o.downcast().ok()); Ok(format!( "", - funcname.as_ref().map_or("?", |s| s.as_str()), + func_name.as_ref().map_or("?", |s| s.as_str()), &zelf.object.repr(vm)?.as_str(), )) } diff --git a/vm/src/builtins/int.rs b/vm/src/builtins/int.rs index d644343f1c..5f12f2490e 100644 --- a/vm/src/builtins/int.rs +++ b/vm/src/builtins/int.rs @@ -524,13 +524,14 @@ impl PyInt { // Malachite division uses floor rounding, Python uses half-even let remainder = &value - &rounded; - let halfpow10 = &pow10 / BigInt::from(2); - let correction = - if remainder > halfpow10 || (remainder == halfpow10 && quotient.is_odd()) { - pow10 - } else { - BigInt::from(0) - }; + let half_pow10 = &pow10 / BigInt::from(2); + let correction = if remainder > half_pow10 + || (remainder == half_pow10 && quotient.is_odd()) + { + pow10 + } else { + BigInt::from(0) + }; let rounded = (rounded + correction) * sign; return Ok(vm.ctx.new_int(rounded)); } diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index c5af12dc1f..09239e3e49 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -43,7 +43,7 @@ pub struct PyMemoryView { // avoid double release when memoryview had released the buffer before drop buffer: ManuallyDrop, // the released memoryview does not mean the buffer is destroyed - // because the possible another memeoryview is viewing from it + // because the possible another memoryview is viewing from it released: AtomicCell, // start does NOT mean the bytes before start will not be visited, // it means the point we starting to get the absolute position via @@ -103,7 +103,7 @@ impl PyMemoryView { }) } - /// don't use this function to create the memeoryview if the buffer is exporting + /// don't use this function to create the memoryview if the buffer is exporting /// via another memoryview, use PyMemoryView::new_view() or PyMemoryView::from_object /// to reduce the chain pub fn from_buffer_range( @@ -262,8 +262,8 @@ impl PyMemoryView { // no suboffset set, stride must be positive self.start += stride as usize * range.start; } - let newlen = range.len(); - self.desc.dim_desc[dim].0 = newlen; + let new_len = range.len(); + self.desc.dim_desc[dim].0 = new_len; } fn init_slice(&mut self, slice: &PySlice, dim: usize, vm: &VirtualMachine) -> PyResult<()> { diff --git a/vm/src/builtins/str.rs b/vm/src/builtins/str.rs index 8aafc63c3b..90c702a14d 100644 --- a/vm/src/builtins/str.rs +++ b/vm/src/builtins/str.rs @@ -836,7 +836,7 @@ impl PyStr { Some(x) => x, None => return Ok(false), }; - substr.py_startsendswith( + substr.py_starts_ends_with( &affix, "endswith", "str", @@ -856,7 +856,7 @@ impl PyStr { Some(x) => x, None => return Ok(false), }; - substr.py_startsendswith( + substr.py_starts_ends_with( &affix, "startswith", "str", diff --git a/vm/src/builtins/super.rs b/vm/src/builtins/super.rs index 5f363ebea5..442d162c78 100644 --- a/vm/src/builtins/super.rs +++ b/vm/src/builtins/super.rs @@ -29,7 +29,7 @@ impl PySuperInner { let obj = if vm.is_none(&obj) { None } else { - let obj_type = supercheck(typ.clone(), obj.clone(), vm)?; + let obj_type = super_check(typ.clone(), obj.clone(), vm)?; Some((obj, obj_type)) }; Ok(Self { typ, obj }) @@ -236,7 +236,7 @@ impl Representable for PySuper { } } -fn supercheck(ty: PyTypeRef, obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { +fn super_check(ty: PyTypeRef, obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { if let Ok(cls) = obj.clone().downcast::() { if cls.fast_issubclass(&ty) { return Ok(cls); diff --git a/vm/src/bytesinner.rs b/vm/src/bytesinner.rs index 63d5148e04..88d2b9744f 100644 --- a/vm/src/bytesinner.rs +++ b/vm/src/bytesinner.rs @@ -748,10 +748,10 @@ impl PyBytesInner { self.elements.py_zfill(width) } - // len(self)>=1, from="", len(to)>=1, maxcount>=1 - fn replace_interleave(&self, to: PyBytesInner, maxcount: Option) -> Vec { + // len(self)>=1, from="", len(to)>=1, max_count>=1 + fn replace_interleave(&self, to: PyBytesInner, max_count: Option) -> Vec { let place_count = self.elements.len() + 1; - let count = maxcount.map_or(place_count, |v| std::cmp::min(v, place_count)) - 1; + let count = max_count.map_or(place_count, |v| std::cmp::min(v, place_count)) - 1; let capacity = self.elements.len() + count * to.len(); let mut result = Vec::with_capacity(capacity); let to_slice = to.elements.as_slice(); @@ -764,8 +764,12 @@ impl PyBytesInner { result } - fn replace_delete(&self, from: PyBytesInner, maxcount: Option) -> Vec { - let count = count_substring(self.elements.as_slice(), from.elements.as_slice(), maxcount); + fn replace_delete(&self, from: PyBytesInner, max_count: Option) -> Vec { + let count = count_substring( + self.elements.as_slice(), + from.elements.as_slice(), + max_count, + ); if count == 0 { // no matches return self.elements.clone(); @@ -793,7 +797,7 @@ impl PyBytesInner { &self, from: PyBytesInner, to: PyBytesInner, - maxcount: Option, + max_count: Option, ) -> Vec { let len = from.len(); let mut iter = self.elements.find_iter(&from.elements); @@ -801,7 +805,7 @@ impl PyBytesInner { let mut new = if let Some(offset) = iter.next() { let mut new = self.elements.clone(); new[offset..offset + len].clone_from_slice(to.elements.as_slice()); - if maxcount == Some(1) { + if max_count == Some(1) { return new; } else { new @@ -810,7 +814,7 @@ impl PyBytesInner { return self.elements.clone(); }; - let mut count = maxcount.unwrap_or(usize::MAX) - 1; + let mut count = max_count.unwrap_or(usize::MAX) - 1; for offset in iter { new[offset..offset + len].clone_from_slice(to.elements.as_slice()); count -= 1; @@ -825,10 +829,14 @@ impl PyBytesInner { &self, from: PyBytesInner, to: PyBytesInner, - maxcount: Option, + max_count: Option, vm: &VirtualMachine, ) -> PyResult> { - let count = count_substring(self.elements.as_slice(), from.elements.as_slice(), maxcount); + let count = count_substring( + self.elements.as_slice(), + from.elements.as_slice(), + max_count, + ); if count == 0 { // no matches, return unchanged return Ok(self.elements.clone()); @@ -866,19 +874,19 @@ impl PyBytesInner { &self, from: PyBytesInner, to: PyBytesInner, - maxcount: OptionalArg, + max_count: OptionalArg, vm: &VirtualMachine, ) -> PyResult> { // stringlib_replace in CPython - let maxcount = match maxcount { - OptionalArg::Present(maxcount) if maxcount >= 0 => { - if maxcount == 0 || (self.elements.is_empty() && !from.is_empty()) { + let max_count = match max_count { + OptionalArg::Present(max_count) if max_count >= 0 => { + if max_count == 0 || (self.elements.is_empty() && !from.is_empty()) { // nothing to do; return the original bytes return Ok(self.elements.clone()); } else if self.elements.is_empty() && from.is_empty() { return Ok(to.elements); } - Some(maxcount as usize) + Some(max_count as usize) } _ => None, }; @@ -892,7 +900,7 @@ impl PyBytesInner { // insert the 'to' bytes everywhere. // >>> b"Python".replace(b"", b".") // b'.P.y.t.h.o.n.' - return Ok(self.replace_interleave(to, maxcount)); + return Ok(self.replace_interleave(to, max_count)); } // Except for b"".replace(b"", b"A") == b"A" there is no way beyond this @@ -904,13 +912,13 @@ impl PyBytesInner { if to.elements.is_empty() { // delete all occurrences of 'from' bytes - Ok(self.replace_delete(from, maxcount)) + Ok(self.replace_delete(from, max_count)) } else if from.len() == to.len() { // Handle special case where both bytes have the same length - Ok(self.replace_in_place(from, to, maxcount)) + Ok(self.replace_in_place(from, to, max_count)) } else { // Otherwise use the more generic algorithms - self.replace_general(from, to, maxcount, vm) + self.replace_general(from, to, max_count, vm) } } @@ -978,10 +986,10 @@ where } #[inline] -fn count_substring(haystack: &[u8], needle: &[u8], maxcount: Option) -> usize { +fn count_substring(haystack: &[u8], needle: &[u8], max_count: Option) -> usize { let substrings = haystack.find_iter(needle); - if let Some(maxcount) = maxcount { - std::cmp::min(substrings.take(maxcount).count(), maxcount) + if let Some(max_count) = max_count { + std::cmp::min(substrings.take(max_count).count(), max_count) } else { substrings.count() } diff --git a/vm/src/cformat.rs b/vm/src/cformat.rs index 93c409172c..2904b9432e 100644 --- a/vm/src/cformat.rs +++ b/vm/src/cformat.rs @@ -1,3 +1,5 @@ +//cspell:ignore bytesobject + //! Implementation of Printf-Style string formatting //! as per the [Python Docs](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting). diff --git a/vm/src/exceptions.rs b/vm/src/exceptions.rs index 58f2a51b68..708a93fe61 100644 --- a/vm/src/exceptions.rs +++ b/vm/src/exceptions.rs @@ -212,10 +212,10 @@ impl VirtualMachine { if let Some(text) = maybe_text { // if text ends with \n, remove it let rtext = text.as_str().trim_end_matches('\n'); - let ltext = rtext.trim_start_matches([' ', '\n', '\x0c']); // \x0c is \f - let spaces = (rtext.len() - ltext.len()) as isize; + let l_text = rtext.trim_start_matches([' ', '\n', '\x0c']); // \x0c is \f + let spaces = (rtext.len() - l_text.len()) as isize; - writeln!(output, " {}", ltext)?; + writeln!(output, " {}", l_text)?; let maybe_offset: Option = getattr("offset").and_then(|obj| obj.try_to_value::(vm).ok()); @@ -237,7 +237,7 @@ impl VirtualMachine { let colno = offset - 1 - spaces; let end_colno = end_offset - 1 - spaces; if colno >= 0 { - let caretspace = ltext.chars().collect::>()[..colno as usize] + let caret_space = l_text.chars().collect::>()[..colno as usize] .iter() .map(|c| if c.is_whitespace() { *c } else { ' ' }) .collect::(); @@ -250,7 +250,7 @@ impl VirtualMachine { writeln!( output, " {}{}", - caretspace, + caret_space, "^".repeat(error_width as usize) )?; } diff --git a/vm/src/frame.rs b/vm/src/frame.rs index 78f03a04d8..7976a5254f 100644 --- a/vm/src/frame.rs +++ b/vm/src/frame.rs @@ -350,7 +350,7 @@ impl ExecutingFrame<'_> { fn run(&mut self, vm: &VirtualMachine) -> PyResult { flame_guard!(format!("Frame::run({})", self.code.obj_name)); // Execute until return or exception: - let instrs = &self.code.instructions; + let instructions = &self.code.instructions; let mut arg_state = bytecode::OpArgState::default(); loop { let idx = self.lasti() as usize; @@ -359,7 +359,7 @@ impl ExecutingFrame<'_> { // self.code.locations[idx], self.code.source_path // ); self.update_lasti(|i| *i += 1); - let bytecode::CodeUnit { op, arg } = instrs[idx]; + let bytecode::CodeUnit { op, arg } = instructions[idx]; let arg = arg_state.extend(arg); let mut do_extend_arg = false; let result = self.execute_instruction(op, arg, &mut do_extend_arg, vm); @@ -805,14 +805,14 @@ impl ExecutingFrame<'_> { dict.set_item(&*key, value, vm)?; Ok(None) } - bytecode::Instruction::BinaryOperation { op } => self.execute_binop(vm, op.get(arg)), + bytecode::Instruction::BinaryOperation { op } => self.execute_bin_op(vm, op.get(arg)), bytecode::Instruction::BinaryOperationInplace { op } => { - self.execute_binop_inplace(vm, op.get(arg)) + self.execute_bin_op_inplace(vm, op.get(arg)) } bytecode::Instruction::LoadAttr { idx } => self.load_attr(vm, idx.get(arg)), bytecode::Instruction::StoreAttr { idx } => self.store_attr(vm, idx.get(arg)), bytecode::Instruction::DeleteAttr { idx } => self.delete_attr(vm, idx.get(arg)), - bytecode::Instruction::UnaryOperation { op } => self.execute_unop(vm, op.get(arg)), + bytecode::Instruction::UnaryOperation { op } => self.execute_unary_op(vm, op.get(arg)), bytecode::Instruction::TestOperation { op } => self.execute_test(vm, op.get(arg)), bytecode::Instruction::CompareOperation { op } => self.execute_compare(vm, op.get(arg)), bytecode::Instruction::ReturnValue => { @@ -1792,7 +1792,7 @@ impl ExecutingFrame<'_> { } #[cfg_attr(feature = "flame-it", flame("Frame"))] - fn execute_binop(&mut self, vm: &VirtualMachine, op: bytecode::BinaryOperator) -> FrameResult { + fn execute_bin_op(&mut self, vm: &VirtualMachine, op: bytecode::BinaryOperator) -> FrameResult { let b_ref = &self.pop_value(); let a_ref = &self.pop_value(); let value = match op { @@ -1814,7 +1814,7 @@ impl ExecutingFrame<'_> { self.push_value(value); Ok(None) } - fn execute_binop_inplace( + fn execute_bin_op_inplace( &mut self, vm: &VirtualMachine, op: bytecode::BinaryOperator, @@ -1842,7 +1842,11 @@ impl ExecutingFrame<'_> { } #[cfg_attr(feature = "flame-it", flame("Frame"))] - fn execute_unop(&mut self, vm: &VirtualMachine, op: bytecode::UnaryOperator) -> FrameResult { + fn execute_unary_op( + &mut self, + vm: &VirtualMachine, + op: bytecode::UnaryOperator, + ) -> FrameResult { let a = self.pop_value(); let value = match op { bytecode::UnaryOperator::Minus => vm._neg(&a)?, diff --git a/vm/src/function/argument.rs b/vm/src/function/argument.rs index 197cfe7b96..5033ee7627 100644 --- a/vm/src/function/argument.rs +++ b/vm/src/function/argument.rs @@ -276,7 +276,7 @@ impl ArgumentError { vm.new_type_error(format!("{name} is an invalid keyword argument")) } ArgumentError::RequiredKeywordArgument(name) => { - vm.new_type_error(format!("Required keyqord only argument {name}")) + vm.new_type_error(format!("Required keyword only argument {name}")) } ArgumentError::Exception(ex) => ex, } diff --git a/vm/src/function/fspath.rs b/vm/src/function/fspath.rs index 83bd452151..74051644e0 100644 --- a/vm/src/function/fspath.rs +++ b/vm/src/function/fspath.rs @@ -94,7 +94,7 @@ impl FsPath { } #[cfg(windows)] - pub fn to_widecstring(&self, vm: &VirtualMachine) -> PyResult { + pub fn to_wide_cstring(&self, vm: &VirtualMachine) -> PyResult { widestring::WideCString::from_os_str(self.as_os_str(vm)?) .map_err(|err| err.into_pyexception(vm)) } diff --git a/vm/src/function/protocol.rs b/vm/src/function/protocol.rs index 2f4b4d160a..0f146fed95 100644 --- a/vm/src/function/protocol.rs +++ b/vm/src/function/protocol.rs @@ -76,7 +76,7 @@ impl TryFromObject for ArgCallable { /// objects using a generic type parameter that implements `TryFromObject`. pub struct ArgIterable { iterable: PyObjectRef, - iterfn: Option, + iter_fn: Option, _item: PhantomData, } @@ -92,7 +92,7 @@ impl ArgIterable { /// This operation may fail if an exception is raised while invoking the /// `__iter__` method of the iterable object. pub fn iter<'a>(&self, vm: &'a VirtualMachine) -> PyResult> { - let iter = PyIter::new(match self.iterfn { + let iter = PyIter::new(match self.iter_fn { Some(f) => f(self.iterable.clone(), vm)?, None => PySequenceIterator::new(self.iterable.clone(), vm)?.into_pyobject(vm), }); @@ -105,17 +105,17 @@ where T: TryFromObject, { fn try_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult { - let iterfn = { + let iter_fn = { let cls = obj.class(); - let iterfn = cls.mro_find_map(|x| x.slots.iter.load()); - if iterfn.is_none() && !cls.has_attr(identifier!(vm, __getitem__)) { + let iter_fn = cls.mro_find_map(|x| x.slots.iter.load()); + if iter_fn.is_none() && !cls.has_attr(identifier!(vm, __getitem__)) { return Err(vm.new_type_error(format!("'{}' object is not iterable", cls.name()))); } - iterfn + iter_fn }; Ok(Self { iterable: obj, - iterfn, + iter_fn, _item: PhantomData, }) } diff --git a/vm/src/import.rs b/vm/src/import.rs index 0ce116d014..416c40a844 100644 --- a/vm/src/import.rs +++ b/vm/src/import.rs @@ -81,7 +81,7 @@ pub fn make_frozen(vm: &VirtualMachine, name: &str) -> PyResult> { pub fn import_frozen(vm: &VirtualMachine, module_name: &str) -> PyResult { let frozen = make_frozen(vm, module_name)?; - let module = import_codeobj(vm, module_name, frozen, false)?; + let module = import_code_obj(vm, module_name, frozen, false)?; debug_assert!(module.get_attr(identifier!(vm, __name__), vm).is_ok()); // TODO: give a correct origname here module.set_attr("__origname__", vm.ctx.new_str(module_name.to_owned()), vm)?; @@ -116,7 +116,7 @@ pub fn import_file( vm.compile_opts(), ) .map_err(|err| vm.new_syntax_error(&err, Some(content)))?; - import_codeobj(vm, module_name, code, true) + import_code_obj(vm, module_name, code, true) } #[cfg(feature = "rustpython-compiler")] @@ -129,10 +129,10 @@ pub fn import_source(vm: &VirtualMachine, module_name: &str, content: &str) -> P vm.compile_opts(), ) .map_err(|err| vm.new_syntax_error(&err, Some(content)))?; - import_codeobj(vm, module_name, code, false) + import_code_obj(vm, module_name, code, false) } -pub fn import_codeobj( +pub fn import_code_obj( vm: &VirtualMachine, module_name: &str, code_obj: PyRef, diff --git a/vm/src/object/core.rs b/vm/src/object/core.rs index 56ab419c01..bbe900f7cd 100644 --- a/vm/src/object/core.rs +++ b/vm/src/object/core.rs @@ -207,7 +207,7 @@ impl WeakRefList { hash: Radium::new(crate::common::hash::SENTINEL), }; let weak = PyRef::new_ref(obj, cls, dict); - // SAFETY: we don't actually own the PyObjectWeaks inside `list`, and every time we take + // SAFETY: we don't actually own the PyObjectWeak's inside `list`, and every time we take // one out of the list we immediately wrap it in ManuallyDrop or forget it inner.list.push_front(unsafe { ptr::read(&weak) }); inner.ref_count += 1; @@ -1301,6 +1301,7 @@ mod tests { #[test] fn miri_test_drop() { + //cspell:ignore dfghjkl let ctx = crate::Context::genesis(); let obj = ctx.new_bytes(b"dfghjkl".to_vec()); drop(obj); diff --git a/vm/src/ospath.rs b/vm/src/ospath.rs index c1b1859164..26d1582825 100644 --- a/vm/src/ospath.rs +++ b/vm/src/ospath.rs @@ -70,7 +70,7 @@ impl OsPath { } #[cfg(windows)] - pub fn to_widecstring(&self, vm: &VirtualMachine) -> PyResult { + pub fn to_wide_cstring(&self, vm: &VirtualMachine) -> PyResult { widestring::WideCString::from_os_str(&self.path).map_err(|err| err.to_pyexception(vm)) } @@ -167,18 +167,18 @@ impl<'a> IOErrorBuilder<'a> { impl ToPyException for IOErrorBuilder<'_> { fn to_pyexception(&self, vm: &VirtualMachine) -> PyBaseExceptionRef { - let excp = self.error.to_pyexception(vm); + let exc = self.error.to_pyexception(vm); if let Some(filename) = &self.filename { - excp.as_object() + exc.as_object() .set_attr("filename", filename.filename(vm), vm) .unwrap(); } if let Some(filename2) = &self.filename2 { - excp.as_object() + exc.as_object() .set_attr("filename2", filename2.filename(vm), vm) .unwrap(); } - excp + exc } } diff --git a/vm/src/protocol/buffer.rs b/vm/src/protocol/buffer.rs index e3b03b4f80..a3b7f125f5 100644 --- a/vm/src/protocol/buffer.rs +++ b/vm/src/protocol/buffer.rs @@ -131,7 +131,7 @@ impl PyBuffer { // drop PyBuffer without calling release // after this function, the owner should use forget() - // or wrap PyBuffer in the ManaullyDrop to prevent drop() + // or wrap PyBuffer in the ManuallyDrop to prevent drop() pub(crate) unsafe fn drop_without_release(&mut self) { // SAFETY: requirements forwarded from caller unsafe { @@ -267,7 +267,7 @@ impl BufferDescriptor { Ok(pos) } - pub fn for_each_segment(&self, try_conti: bool, mut f: F) + pub fn for_each_segment(&self, try_contiguous: bool, mut f: F) where F: FnMut(Range), { @@ -275,20 +275,20 @@ impl BufferDescriptor { f(0..self.itemsize as isize); return; } - if try_conti && self.is_last_dim_contiguous() { + if try_contiguous && self.is_last_dim_contiguous() { self._for_each_segment::<_, true>(0, 0, &mut f); } else { self._for_each_segment::<_, false>(0, 0, &mut f); } } - fn _for_each_segment(&self, mut index: isize, dim: usize, f: &mut F) + fn _for_each_segment(&self, mut index: isize, dim: usize, f: &mut F) where F: FnMut(Range), { let (shape, stride, suboffset) = self.dim_desc[dim]; if dim + 1 == self.ndim() { - if CONTI { + if CONTIGUOUS { f(index..index + (shape * self.itemsize) as isize); } else { for _ in 0..shape { @@ -300,13 +300,13 @@ impl BufferDescriptor { return; } for _ in 0..shape { - self._for_each_segment::(index + suboffset, dim + 1, f); + self._for_each_segment::(index + suboffset, dim + 1, f); index += stride; } } /// zip two BufferDescriptor with the same shape - pub fn zip_eq(&self, other: &Self, try_conti: bool, mut f: F) + pub fn zip_eq(&self, other: &Self, try_contiguous: bool, mut f: F) where F: FnMut(Range, Range) -> bool, { @@ -314,14 +314,14 @@ impl BufferDescriptor { f(0..self.itemsize as isize, 0..other.itemsize as isize); return; } - if try_conti && self.is_last_dim_contiguous() { + if try_contiguous && self.is_last_dim_contiguous() { self._zip_eq::<_, true>(other, 0, 0, 0, &mut f); } else { self._zip_eq::<_, false>(other, 0, 0, 0, &mut f); } } - fn _zip_eq( + fn _zip_eq( &self, other: &Self, mut a_index: isize, @@ -335,7 +335,7 @@ impl BufferDescriptor { let (_b_shape, b_stride, b_suboffset) = other.dim_desc[dim]; debug_assert_eq!(shape, _b_shape); if dim + 1 == self.ndim() { - if CONTI { + if CONTIGUOUS { if f( a_index..a_index + (shape * self.itemsize) as isize, b_index..b_index + (shape * other.itemsize) as isize, @@ -360,7 +360,7 @@ impl BufferDescriptor { } for _ in 0..shape { - self._zip_eq::( + self._zip_eq::( other, a_index + a_suboffset, b_index + b_suboffset, diff --git a/vm/src/protocol/iter.rs b/vm/src/protocol/iter.rs index a7491a3897..254134991c 100644 --- a/vm/src/protocol/iter.rs +++ b/vm/src/protocol/iter.rs @@ -125,12 +125,12 @@ impl TryFromObject for PyIter { // in the vm when a for loop is entered. Next, it is used when the builtin // function 'iter' is called. fn try_from_object(vm: &VirtualMachine, iter_target: PyObjectRef) -> PyResult { - let getiter = { + let get_iter = { let cls = iter_target.class(); cls.mro_find_map(|x| x.slots.iter.load()) }; - if let Some(getiter) = getiter { - let iter = getiter(iter_target, vm)?; + if let Some(get_iter) = get_iter { + let iter = get_iter(iter_target, vm)?; if PyIter::check(&iter) { Ok(Self(iter)) } else { diff --git a/vm/src/protocol/object.rs b/vm/src/protocol/object.rs index 4cdcb68257..256baa0fdf 100644 --- a/vm/src/protocol/object.rs +++ b/vm/src/protocol/object.rs @@ -28,8 +28,8 @@ impl PyObjectRef { // int PyObject_GenericSetDict(PyObject *o, PyObject *value, void *context) #[inline(always)] - pub fn rich_compare(self, other: Self, opid: PyComparisonOp, vm: &VirtualMachine) -> PyResult { - self._cmp(&other, opid, vm).map(|res| res.to_pyobject(vm)) + pub fn rich_compare(self, other: Self, op_id: PyComparisonOp, vm: &VirtualMachine) -> PyResult { + self._cmp(&other, op_id, vm).map(|res| res.to_pyobject(vm)) } pub fn bytes(self, vm: &VirtualMachine) -> PyResult { @@ -323,17 +323,17 @@ impl PyObject { match op { PyComparisonOp::Eq => Ok(Either::B(self.is(&other))), PyComparisonOp::Ne => Ok(Either::B(!self.is(&other))), - _ => Err(vm.new_unsupported_binop_error(self, other, op.operator_token())), + _ => Err(vm.new_unsupported_bin_op_error(self, other, op.operator_token())), } } #[inline(always)] pub fn rich_compare_bool( &self, other: &Self, - opid: PyComparisonOp, + op_id: PyComparisonOp, vm: &VirtualMachine, ) -> PyResult { - match self._cmp(other, opid, vm)? { + match self._cmp(other, op_id, vm)? { Either::A(obj) => obj.try_to_bool(vm), Either::B(other) => Ok(other), } @@ -479,13 +479,13 @@ impl PyObject { let r = if let Ok(typ) = cls.try_to_ref::(vm) { if self.class().fast_issubclass(typ) { true - } else if let Ok(icls) = + } else if let Ok(i_cls) = PyTypeRef::try_from_object(vm, self.get_attr(identifier!(vm, __class__), vm)?) { - if icls.is(self.class()) { + if i_cls.is(self.class()) { false } else { - icls.fast_issubclass(typ) + i_cls.fast_issubclass(typ) } } else { false @@ -497,11 +497,11 @@ impl PyObject { cls.class() ) })?; - let icls: PyObjectRef = self.get_attr(identifier!(vm, __class__), vm)?; - if vm.is_none(&icls) { + let i_cls: PyObjectRef = self.get_attr(identifier!(vm, __class__), vm)?; + if vm.is_none(&i_cls) { false } else { - icls.abstract_issubclass(cls, vm)? + i_cls.abstract_issubclass(cls, vm)? } }; Ok(r) diff --git a/vm/src/stdlib/builtins.rs b/vm/src/stdlib/builtins.rs index 9dcb35aae9..9a21dd34dd 100644 --- a/vm/src/stdlib/builtins.rs +++ b/vm/src/stdlib/builtins.rs @@ -588,12 +588,14 @@ mod builtins { iterator.class().name() ))); } - PyIter::new(iterator).next(vm).map(|iret| match iret { - PyIterReturn::Return(obj) => PyIterReturn::Return(obj), - PyIterReturn::StopIteration(v) => { - default_value.map_or(PyIterReturn::StopIteration(v), PyIterReturn::Return) - } - }) + PyIter::new(iterator) + .next(vm) + .map(|iter_ret| match iter_ret { + PyIterReturn::Return(obj) => PyIterReturn::Return(obj), + PyIterReturn::StopIteration(v) => { + default_value.map_or(PyIterReturn::StopIteration(v), PyIterReturn::Return) + } + }) } #[pyfunction] diff --git a/vm/src/stdlib/io.rs b/vm/src/stdlib/io.rs index 33ef118acd..6f13e0878d 100644 --- a/vm/src/stdlib/io.rs +++ b/vm/src/stdlib/io.rs @@ -673,14 +673,14 @@ mod _io { } fn _readinto( zelf: PyObjectRef, - bufobj: PyObjectRef, + buf_obj: PyObjectRef, method: &str, vm: &VirtualMachine, ) -> PyResult { - let b = ArgMemoryBuffer::try_from_borrowed_object(vm, &bufobj)?; + let b = ArgMemoryBuffer::try_from_borrowed_object(vm, &buf_obj)?; let l = b.len(); let data = vm.call_method(&zelf, method, (l,))?; - if data.is(&bufobj) { + if data.is(&buf_obj) { return Ok(l); } let mut buf = b.borrow_buf_mut(); @@ -929,25 +929,25 @@ mod _io { ) -> PyResult> { let len = buf_range.len(); let res = if let Some(buf) = buf { - let memobj = PyMemoryView::from_buffer_range(buf, buf_range, vm)?.to_pyobject(vm); + let mem_obj = PyMemoryView::from_buffer_range(buf, buf_range, vm)?.to_pyobject(vm); // TODO: loop if write() raises an interrupt - vm.call_method(self.raw.as_ref().unwrap(), "write", (memobj,))? + vm.call_method(self.raw.as_ref().unwrap(), "write", (mem_obj,))? } else { let v = std::mem::take(&mut self.buffer); - let writebuf = VecBuffer::from(v).into_ref(&vm.ctx); - let memobj = PyMemoryView::from_buffer_range( - writebuf.clone().into_pybuffer(true), + let write_buf = VecBuffer::from(v).into_ref(&vm.ctx); + let mem_obj = PyMemoryView::from_buffer_range( + write_buf.clone().into_pybuffer(true), buf_range, vm, )? .into_ref(&vm.ctx); // TODO: loop if write() raises an interrupt - let res = vm.call_method(self.raw.as_ref().unwrap(), "write", (memobj.clone(),)); + let res = vm.call_method(self.raw.as_ref().unwrap(), "write", (mem_obj.clone(),)); - memobj.release(); - self.buffer = writebuf.take(); + mem_obj.release(); + self.buffer = write_buf.take(); res? }; @@ -1159,9 +1159,9 @@ mod _io { let res = match v { Either::A(v) => { let v = v.unwrap_or(&mut self.buffer); - let readbuf = VecBuffer::from(std::mem::take(v)).into_ref(&vm.ctx); - let memobj = PyMemoryView::from_buffer_range( - readbuf.clone().into_pybuffer(false), + let read_buf = VecBuffer::from(std::mem::take(v)).into_ref(&vm.ctx); + let mem_obj = PyMemoryView::from_buffer_range( + read_buf.clone().into_pybuffer(false), buf_range, vm, )? @@ -1169,17 +1169,17 @@ mod _io { // TODO: loop if readinto() raises an interrupt let res = - vm.call_method(self.raw.as_ref().unwrap(), "readinto", (memobj.clone(),)); + vm.call_method(self.raw.as_ref().unwrap(), "readinto", (mem_obj.clone(),)); - memobj.release(); - std::mem::swap(v, &mut readbuf.take()); + mem_obj.release(); + std::mem::swap(v, &mut read_buf.take()); res? } Either::B(buf) => { - let memobj = PyMemoryView::from_buffer_range(buf, buf_range, vm)?; + let mem_obj = PyMemoryView::from_buffer_range(buf, buf_range, vm)?; // TODO: loop if readinto() raises an interrupt - vm.call_method(self.raw.as_ref().unwrap(), "readinto", (memobj,))? + vm.call_method(self.raw.as_ref().unwrap(), "readinto", (mem_obj,))? } }; @@ -2305,14 +2305,14 @@ mod _io { let incremental_encoder = codec.get_incremental_encoder(Some(errors.to_owned()), vm)?; let encoding_name = vm.get_attribute_opt(incremental_encoder.clone(), "name")?; - let encodefunc = encoding_name.and_then(|name| { + let encode_func = encoding_name.and_then(|name| { let name = name.payload::()?; match name.as_str() { "utf-8" => Some(textio_encode_utf8 as EncodeFunc), _ => None, } }); - Some((incremental_encoder, encodefunc)) + Some((incremental_encoder, encode_func)) } else { None }; @@ -2600,12 +2600,12 @@ mod _io { while skip_bytes > 0 { cookie.set_decoder_state(decoder, vm)?; let input = &next_input.as_bytes()[..skip_bytes as usize]; - let ndecoded = decoder_decode(input)?; - if ndecoded.chars <= num_to_skip.chars { + let n_decoded = decoder_decode(input)?; + if n_decoded.chars <= num_to_skip.chars { let (dec_buffer, dec_flags) = decoder_getstate()?; if dec_buffer.is_empty() { cookie.dec_flags = dec_flags; - num_to_skip -= ndecoded; + num_to_skip -= n_decoded; break; } skip_bytes -= dec_buffer.len() as isize; @@ -2625,23 +2625,23 @@ mod _io { cookie.set_num_to_skip(num_to_skip); if num_to_skip.chars != 0 { - let mut ndecoded = Utf8size::default(); + let mut n_decoded = Utf8size::default(); let mut input = next_input.as_bytes(); input = &input[skip_bytes..]; while !input.is_empty() { let (byte1, rest) = input.split_at(1); let n = decoder_decode(byte1)?; - ndecoded += n; + n_decoded += n; cookie.bytes_to_feed += 1; let (dec_buffer, dec_flags) = decoder_getstate()?; - if dec_buffer.is_empty() && ndecoded.chars < num_to_skip.chars { + if dec_buffer.is_empty() && n_decoded.chars < num_to_skip.chars { cookie.start_pos += cookie.bytes_to_feed as Offset; - num_to_skip -= ndecoded; + num_to_skip -= n_decoded; cookie.dec_flags = dec_flags; cookie.bytes_to_feed = 0; - ndecoded = Utf8size::default(); + n_decoded = Utf8size::default(); } - if ndecoded.chars >= num_to_skip.chars { + if n_decoded.chars >= num_to_skip.chars { break; } input = rest; @@ -2650,7 +2650,7 @@ mod _io { let decoded = vm.call_method(decoder, "decode", (vm.ctx.new_bytes(vec![]), true))?; let decoded = check_decoded(decoded, vm)?; - let final_decoded_chars = ndecoded.chars + decoded.char_len(); + let final_decoded_chars = n_decoded.chars + decoded.char_len(); cookie.need_eof = true; if final_decoded_chars < num_to_skip.chars { return Err( @@ -2739,7 +2739,7 @@ mod _io { let mut textio = self.lock(vm)?; textio.check_closed(vm)?; - let (encoder, encodefunc) = textio + let (encoder, encode_func) = textio .encoder .as_ref() .ok_or_else(|| new_unsupported_operation(vm, "not writable".to_owned()))?; @@ -2767,8 +2767,8 @@ mod _io { } else { obj }; - let chunk = if let Some(encodefunc) = *encodefunc { - encodefunc(chunk) + let chunk = if let Some(encode_func) = *encode_func { + encode_func(chunk) } else { let b = vm.call_method(encoder, "encode", (chunk.clone(),))?; b.downcast::() @@ -2866,7 +2866,7 @@ mod _io { } let mut start; - let mut endpos; + let mut end_pos; let mut offset_to_buffer; let mut chunked = Utf8size::default(); let mut remaining: Option = None; @@ -2883,7 +2883,7 @@ mod _io { textio.set_decoded_chars(None); textio.snapshot = None; start = Utf8size::default(); - endpos = Utf8size::default(); + end_pos = Utf8size::default(); offset_to_buffer = Utf8size::default(); break 'outer None; } @@ -2918,11 +2918,11 @@ mod _io { let nl_res = textio.newline.find_newline(line_from_start); match nl_res { Ok(p) | Err(p) => { - endpos = start + Utf8size::len_str(&line_from_start[..p]); + end_pos = start + Utf8size::len_str(&line_from_start[..p]); if let Some(limit) = limit { - // original CPython logic: endpos = start + limit - chunked - if chunked.chars + endpos.chars >= limit { - endpos = start + // original CPython logic: end_pos = start + limit - chunked + if chunked.chars + end_pos.chars >= limit { + end_pos = start + Utf8size { chars: limit - chunked.chars, bytes: crate::common::str::codepoint_range_end( @@ -2939,21 +2939,21 @@ mod _io { if nl_res.is_ok() { break Some(line); } - if endpos.bytes > start.bytes { - let chunk = SlicedStr(line.clone(), start.bytes..endpos.bytes); + if end_pos.bytes > start.bytes { + let chunk = SlicedStr(line.clone(), start.bytes..end_pos.bytes); chunked += chunk.utf8_len(); chunks.push(chunk); } let line_len = line.byte_len(); - if endpos.bytes < line_len { - remaining = Some(SlicedStr(line, endpos.bytes..line_len)); + if end_pos.bytes < line_len { + remaining = Some(SlicedStr(line, end_pos.bytes..line_len)); } textio.set_decoded_chars(None); }; let cur_line = cur_line.map(|line| { - textio.decoded_chars_used = endpos - offset_to_buffer; - SlicedStr(line, start.bytes..endpos.bytes) + textio.decoded_chars_used = end_pos - offset_to_buffer; + SlicedStr(line, start.bytes..end_pos.bytes) }); // don't need to care about chunked.chars anymore let mut chunked = chunked.bytes; @@ -3166,7 +3166,7 @@ mod _io { #[derive(Debug)] struct IncrementalNewlineDecoderData { decoder: PyObjectRef, - // afaict, this is used for nothing + // currently this is used for nothing // errors: PyObjectRef, pendingcr: bool, translate: bool, @@ -4237,7 +4237,7 @@ mod fileio { #[cfg(any(unix, target_os = "wasi"))] let fd = Fd::open(&path.clone().into_cstring(vm)?, flags, 0o666); #[cfg(windows)] - let fd = Fd::wopen(&path.to_widecstring(vm)?, flags, 0o666); + let fd = Fd::wopen(&path.to_wide_cstring(vm)?, flags, 0o666); let filename = OsPathOrFd::Path(path); match fd { Ok(fd) => (fd.0, filename), diff --git a/vm/src/stdlib/itertools.rs b/vm/src/stdlib/itertools.rs index dab62987d6..18641ac3b6 100644 --- a/vm/src/stdlib/itertools.rs +++ b/vm/src/stdlib/itertools.rs @@ -1087,7 +1087,7 @@ mod decl { #[derive(Debug, PyPayload)] struct PyItertoolsAccumulate { iterable: PyIter, - binop: Option, + bin_op: Option, initial: Option, acc_value: PyRwLock>, } @@ -1107,7 +1107,7 @@ mod decl { fn py_new(cls: PyTypeRef, args: AccumulateArgs, vm: &VirtualMachine) -> PyResult { PyItertoolsAccumulate { iterable: args.iterable, - binop: args.func.flatten(), + bin_op: args.func.flatten(), initial: args.initial.flatten(), acc_value: PyRwLock::new(None), } @@ -1127,7 +1127,7 @@ mod decl { #[pymethod(magic)] fn reduce(zelf: PyRef, vm: &VirtualMachine) -> PyTupleRef { let class = zelf.class().to_owned(); - let binop = zelf.binop.clone(); + let bin_op = zelf.bin_op.clone(); let it = zelf.iterable.clone(); let acc_value = zelf.acc_value.read().clone(); if let Some(initial) = &zelf.initial { @@ -1136,7 +1136,7 @@ mod decl { source: PyRwLock::new(Some(chain_args.to_pyobject(vm).get_iter(vm).unwrap())), active: PyRwLock::new(None), }; - let tup = vm.new_tuple((chain, binop)); + let tup = vm.new_tuple((chain, bin_op)); return vm.new_tuple((class, tup, acc_value)); } match acc_value { @@ -1151,7 +1151,7 @@ mod decl { .into_pyobject(vm); let acc = Self { iterable: PyIter::new(chain), - binop, + bin_op, initial: None, acc_value: PyRwLock::new(None), }; @@ -1161,7 +1161,7 @@ mod decl { } _ => {} } - let tup = vm.new_tuple((it, binop)); + let tup = vm.new_tuple((it, bin_op)); vm.new_tuple((class, tup, acc_value)) } } @@ -1191,7 +1191,7 @@ mod decl { return Ok(PyIterReturn::StopIteration(v)); } }; - match &zelf.binop { + match &zelf.bin_op { None => vm._add(&value, &obj)?, Some(op) => op.call((value, obj), vm)?, } diff --git a/vm/src/stdlib/nt.rs b/vm/src/stdlib/nt.rs index b4899bb225..428d3421fd 100644 --- a/vm/src/stdlib/nt.rs +++ b/vm/src/stdlib/nt.rs @@ -41,7 +41,7 @@ pub(crate) mod module { #[pyfunction] pub(super) fn access(path: OsPath, mode: u8, vm: &VirtualMachine) -> PyResult { - let attr = unsafe { FileSystem::GetFileAttributesW(path.to_widecstring(vm)?.as_ptr()) }; + let attr = unsafe { FileSystem::GetFileAttributesW(path.to_wide_cstring(vm)?.as_ptr()) }; Ok(attr != FileSystem::INVALID_FILE_ATTRIBUTES && (mode & 2 == 0 || attr & FileSystem::FILE_ATTRIBUTE_READONLY == 0 @@ -256,7 +256,7 @@ pub(crate) mod module { #[pyfunction] fn _getfullpathname(path: OsPath, vm: &VirtualMachine) -> PyResult { - let wpath = path.to_widecstring(vm)?; + let wpath = path.to_wide_cstring(vm)?; let mut buffer = vec![0u16; Foundation::MAX_PATH as usize]; let ret = unsafe { FileSystem::GetFullPathNameW( @@ -289,7 +289,7 @@ pub(crate) mod module { #[pyfunction] fn _getvolumepathname(path: OsPath, vm: &VirtualMachine) -> PyResult { - let wide = path.to_widecstring(vm)?; + let wide = path.to_wide_cstring(vm)?; let buflen = std::cmp::max(wide.len(), Foundation::MAX_PATH as usize); let mut buffer = vec![0u16; buflen]; let ret = unsafe { @@ -344,7 +344,7 @@ pub(crate) mod module { fn _getdiskusage(path: OsPath, vm: &VirtualMachine) -> PyResult<(u64, u64)> { use FileSystem::GetDiskFreeSpaceExW; - let wpath = path.to_widecstring(vm)?; + let wpath = path.to_wide_cstring(vm)?; let mut _free_to_me: u64 = 0; let mut total: u64 = 0; let mut free: u64 = 0; @@ -437,7 +437,7 @@ pub(crate) mod module { let mode = mode.unwrap_or(0o777); let [] = dir_fd.0; let _ = mode; - let wide = path.to_widecstring(vm)?; + let wide = path.to_wide_cstring(vm)?; let res = unsafe { FileSystem::CreateDirectoryW(wide.as_ptr(), std::ptr::null_mut()) }; if res == 0 { return Err(errno_err(vm)); diff --git a/vm/src/stdlib/operator.rs b/vm/src/stdlib/operator.rs index d8ff1715fa..38f931b0e7 100644 --- a/vm/src/stdlib/operator.rs +++ b/vm/src/stdlib/operator.rs @@ -389,15 +389,15 @@ mod _operator { type Args = FuncArgs; fn py_new(cls: PyTypeRef, args: Self::Args, vm: &VirtualMachine) -> PyResult { - let nattr = args.args.len(); + let n_attr = args.args.len(); // Check we get no keyword and at least one positional. if !args.kwargs.is_empty() { return Err(vm.new_type_error("attrgetter() takes no keyword arguments".to_owned())); } - if nattr == 0 { + if n_attr == 0 { return Err(vm.new_type_error("attrgetter expected 1 argument, got 0.".to_owned())); } - let mut attrs = Vec::with_capacity(nattr); + let mut attrs = Vec::with_capacity(n_attr); for o in args.args { if let Ok(r) = o.try_into_value(vm) { attrs.push(r); diff --git a/vm/src/stdlib/os.rs b/vm/src/stdlib/os.rs index 641ba54dea..48e16ad41f 100644 --- a/vm/src/stdlib/os.rs +++ b/vm/src/stdlib/os.rs @@ -219,7 +219,7 @@ pub(super) mod _os { #[cfg(windows)] let fd = { let [] = dir_fd.0; - let name = name.to_widecstring(vm)?; + let name = name.to_wide_cstring(vm)?; let flags = flags | libc::O_NOINHERIT; Fd::wopen(&name, flags, mode) }; diff --git a/vm/src/stdlib/sre.rs b/vm/src/stdlib/sre.rs index 7b67c038f4..fd41aa2b7d 100644 --- a/vm/src/stdlib/sre.rs +++ b/vm/src/stdlib/sre.rs @@ -287,7 +287,7 @@ mod _sre { with_sre_str!(zelf, &string_args.string, vm, |s| { let req = s.create_request(&zelf, string_args.pos, string_args.endpos); let state = State::default(); - let mut matchlist: Vec = Vec::new(); + let mut match_list: Vec = Vec::new(); let mut iter = SearchIter { req, state }; while iter.next().is_some() { @@ -301,10 +301,10 @@ mod _sre { .into() }; - matchlist.push(item); + match_list.push(item); } - Ok(matchlist) + Ok(match_list) }) } @@ -362,7 +362,7 @@ mod _sre { with_sre_str!(zelf, &split_args.string, vm, |s| { let req = s.create_request(&zelf, 0, usize::MAX); let state = State::default(); - let mut splitlist: Vec = Vec::new(); + let mut split_list: Vec = Vec::new(); let mut iter = SearchIter { req, state }; let mut n = 0; let mut last = 0; @@ -370,13 +370,13 @@ mod _sre { while (split_args.maxsplit == 0 || n < split_args.maxsplit) && iter.next().is_some() { /* get segment before this match */ - splitlist.push(s.slice(last, iter.state.start, vm)); + split_list.push(s.slice(last, iter.state.start, vm)); let m = Match::new(&mut iter.state, zelf.clone(), split_args.string.clone()); // add groups (if any) for i in 1..=zelf.groups { - splitlist.push(m.get_slice(i, s, vm).unwrap_or_else(|| vm.ctx.none())); + split_list.push(m.get_slice(i, s, vm).unwrap_or_else(|| vm.ctx.none())); } n += 1; @@ -384,9 +384,9 @@ mod _sre { } // get segment following last match (even if empty) - splitlist.push(req.string.slice(last, s.count(), vm)); + split_list.push(req.string.slice(last, s.count(), vm)); - Ok(splitlist) + Ok(split_list) }) } @@ -444,7 +444,7 @@ mod _sre { with_sre_str!(zelf, &string, vm, |s| { let req = s.create_request(&zelf, 0, usize::MAX); let state = State::default(); - let mut sublist: Vec = Vec::new(); + let mut sub_list: Vec = Vec::new(); let mut iter = SearchIter { req, state }; let mut n = 0; let mut last_pos = 0; @@ -452,26 +452,26 @@ mod _sre { while (count == 0 || n < count) && iter.next().is_some() { if last_pos < iter.state.start { /* get segment before this match */ - sublist.push(s.slice(last_pos, iter.state.start, vm)); + sub_list.push(s.slice(last_pos, iter.state.start, vm)); } match &filter { - FilterType::Literal(literal) => sublist.push(literal.clone()), + FilterType::Literal(literal) => sub_list.push(literal.clone()), FilterType::Callable(callable) => { let m = Match::new(&mut iter.state, zelf.clone(), string.clone()) .into_ref(&vm.ctx); - sublist.push(callable.invoke((m,), vm)?); + sub_list.push(callable.invoke((m,), vm)?); } FilterType::Template(template) => { let m = Match::new(&mut iter.state, zelf.clone(), string.clone()); // template.expand(m)? // let mut list = vec![template.literal.clone()]; - sublist.push(template.literal.clone()); + sub_list.push(template.literal.clone()); for (index, literal) in template.items.iter().cloned() { if let Some(item) = m.get_slice(index, s, vm) { - sublist.push(item); + sub_list.push(item); } - sublist.push(literal); + sub_list.push(literal); } } }; @@ -481,9 +481,9 @@ mod _sre { } /* get segment following last match */ - sublist.push(s.slice(last_pos, iter.req.end, vm)); + sub_list.push(s.slice(last_pos, iter.req.end, vm)); - let list = PyList::from(sublist).into_pyobject(vm); + let list = PyList::from(sub_list).into_pyobject(vm); let join_type: PyObjectRef = if zelf.isbytes { vm.ctx.new_bytes(vec![]).into() diff --git a/vm/src/stdlib/time.rs b/vm/src/stdlib/time.rs index 10d51bd39a..5f41304c19 100644 --- a/vm/src/stdlib/time.rs +++ b/vm/src/stdlib/time.rs @@ -1,3 +1,4 @@ +//cspell:ignore cfmt //! The python `time` module. // See also: @@ -377,10 +378,10 @@ mod decl { #[cfg(any(windows, all(target_arch = "wasm32", target_os = "emscripten")))] pub(super) fn time_muldiv(ticks: i64, mul: i64, div: i64) -> u64 { - let intpart = ticks / div; + let int_part = ticks / div; let ticks = ticks % div; let remaining = (ticks * mul) / div; - (intpart * mul + remaining) as u64 + (int_part * mul + remaining) as u64 } #[cfg(all(target_arch = "wasm32", target_os = "emscripten"))] diff --git a/vm/src/vm/thread.rs b/vm/src/vm/thread.rs index 9d29bfae54..ea5a2d995a 100644 --- a/vm/src/vm/thread.rs +++ b/vm/src/vm/thread.rs @@ -39,13 +39,13 @@ pub fn with_vm(obj: &PyObject, f: F) -> Option where F: Fn(&VirtualMachine) -> R, { - let vm_owns_obj = |intp: NonNull| { + let vm_owns_obj = |interp: NonNull| { // SAFETY: all references in VM_STACK should be valid - let vm = unsafe { intp.as_ref() }; + let vm = unsafe { interp.as_ref() }; obj.fast_isinstance(vm.ctx.types.object_type) }; VM_STACK.with(|vms| { - let intp = match vms.borrow().iter().copied().exactly_one() { + let interp = match vms.borrow().iter().copied().exactly_one() { Ok(x) => { debug_assert!(vm_owns_obj(x)); x @@ -54,7 +54,7 @@ where }; // SAFETY: all references in VM_STACK should be valid, and should not be changed or moved // at least until this function returns and the stack unwinds to an enter_vm() call - let vm = unsafe { intp.as_ref() }; + let vm = unsafe { interp.as_ref() }; let prev = VM_CURRENT.with(|current| current.replace(vm)); let ret = f(vm); VM_CURRENT.with(|current| current.replace(prev)); diff --git a/vm/src/vm/vm_new.rs b/vm/src/vm/vm_new.rs index 3ceb783a48..9a7a7fe748 100644 --- a/vm/src/vm/vm_new.rs +++ b/vm/src/vm/vm_new.rs @@ -155,7 +155,7 @@ impl VirtualMachine { )) } - pub fn new_unsupported_binop_error( + pub fn new_unsupported_bin_op_error( &self, a: &PyObject, b: &PyObject, @@ -169,7 +169,7 @@ impl VirtualMachine { )) } - pub fn new_unsupported_ternop_error( + pub fn new_unsupported_ternary_op_error( &self, a: &PyObject, b: &PyObject, diff --git a/vm/src/vm/vm_ops.rs b/vm/src/vm/vm_ops.rs index 5235393a69..c6be959a60 100644 --- a/vm/src/vm/vm_ops.rs +++ b/vm/src/vm/vm_ops.rs @@ -206,7 +206,7 @@ impl VirtualMachine { if !result.is(&self.ctx.not_implemented) { return Ok(result); } - Err(self.new_unsupported_binop_error(a, b, op)) + Err(self.new_unsupported_bin_op_error(a, b, op)) } /// Binary in-place operators @@ -250,7 +250,7 @@ impl VirtualMachine { if !result.is(&self.ctx.not_implemented) { return Ok(result); } - Err(self.new_unsupported_binop_error(a, b, op)) + Err(self.new_unsupported_bin_op_error(a, b, op)) } fn ternary_op( @@ -384,7 +384,7 @@ impl VirtualMachine { return Ok(result); } } - Err(self.new_unsupported_binop_error(a, b, "+")) + Err(self.new_unsupported_bin_op_error(a, b, "+")) } pub fn _iadd(&self, a: &PyObject, b: &PyObject) -> PyResult { @@ -398,7 +398,7 @@ impl VirtualMachine { return Ok(result); } } - Err(self.new_unsupported_binop_error(a, b, "+=")) + Err(self.new_unsupported_bin_op_error(a, b, "+=")) } pub fn _mul(&self, a: &PyObject, b: &PyObject) -> PyResult { @@ -419,7 +419,7 @@ impl VirtualMachine { })?; return seq_b.repeat(n, self); } - Err(self.new_unsupported_binop_error(a, b, "*")) + Err(self.new_unsupported_bin_op_error(a, b, "*")) } pub fn _imul(&self, a: &PyObject, b: &PyObject) -> PyResult { @@ -448,7 +448,7 @@ impl VirtualMachine { * used. */ return seq_b.repeat(n, self); } - Err(self.new_unsupported_binop_error(a, b, "*=")) + Err(self.new_unsupported_bin_op_error(a, b, "*=")) } pub fn _abs(&self, a: &PyObject) -> PyResult { diff --git a/vm/sre_engine/src/engine.rs b/vm/sre_engine/src/engine.rs index bf0a6046fa..9b27f55031 100644 --- a/vm/sre_engine/src/engine.rs +++ b/vm/sre_engine/src/engine.rs @@ -549,7 +549,7 @@ fn _match(req: &Request<'_, S>, state: &mut State, mut ctx: MatchCo break 'result false; }; - let mut gctx = MatchContext { + let mut g_ctx = MatchContext { cursor: req.string.create_cursor(group_start), ..ctx }; @@ -557,12 +557,12 @@ fn _match(req: &Request<'_, S>, state: &mut State, mut ctx: MatchCo for _ in group_start..group_end { #[allow(clippy::redundant_closure_call)] if ctx.at_end(req) - || $f(ctx.peek_char::()) != $f(gctx.peek_char::()) + || $f(ctx.peek_char::()) != $f(g_ctx.peek_char::()) { break 'result false; } ctx.advance_char::(); - gctx.advance_char::(); + g_ctx.advance_char::(); } ctx.skip_code(2); @@ -627,8 +627,8 @@ fn _match(req: &Request<'_, S>, state: &mut State, mut ctx: MatchCo break 'context next_ctx; } SreOpcode::AT => { - let atcode = SreAtCode::try_from(ctx.peek_code(req, 1)).unwrap(); - if at(req, &ctx, atcode) { + let at_code = SreAtCode::try_from(ctx.peek_code(req, 1)).unwrap(); + if at(req, &ctx, at_code) { ctx.skip_code(2); } else { break 'result false; @@ -642,8 +642,8 @@ fn _match(req: &Request<'_, S>, state: &mut State, mut ctx: MatchCo continue 'context; } SreOpcode::CATEGORY => { - let catcode = SreCatCode::try_from(ctx.peek_code(req, 1)).unwrap(); - if ctx.at_end(req) || !category(catcode, ctx.peek_char::()) { + let cat_code = SreCatCode::try_from(ctx.peek_code(req, 1)).unwrap(); + if ctx.at_end(req) || !category(cat_code, ctx.peek_char::()) { break 'result false; } ctx.skip_code(2); @@ -1179,8 +1179,8 @@ impl MatchContext { } } -fn at(req: &Request<'_, S>, ctx: &MatchContext, atcode: SreAtCode) -> bool { - match atcode { +fn at(req: &Request<'_, S>, ctx: &MatchContext, at_code: SreAtCode) -> bool { + match at_code { SreAtCode::BEGINNING | SreAtCode::BEGINNING_STRING => ctx.at_beginning(), SreAtCode::BEGINNING_LINE => ctx.at_beginning() || is_linebreak(ctx.back_peek_char::()), SreAtCode::BOUNDARY => ctx.at_boundary(req, is_word), @@ -1210,8 +1210,8 @@ fn charset_loc_ignore(set: &[u32], c: u32) -> bool { up != lo && charset(set, up) } -fn category(catcode: SreCatCode, c: u32) -> bool { - match catcode { +fn category(cat_code: SreCatCode, c: u32) -> bool { + match cat_code { SreCatCode::DIGIT => is_digit(c), SreCatCode::NOT_DIGIT => !is_digit(c), SreCatCode::SPACE => is_space(c), @@ -1250,13 +1250,13 @@ fn charset(set: &[u32], ch: u32) -> bool { } SreOpcode::CATEGORY => { /* */ - let catcode = match SreCatCode::try_from(set[i + 1]) { + let cat_code = match SreCatCode::try_from(set[i + 1]) { Ok(code) => code, Err(_) => { break; } }; - if category(catcode, ch) { + if category(cat_code, ch) { return ok; } i += 2; @@ -1270,14 +1270,14 @@ fn charset(set: &[u32], ch: u32) -> bool { i += 1 + 8; } SreOpcode::BIGCHARSET => { - /* <256 blockindices> */ + /* <256 block_indices> */ let count = set[i + 1] as usize; if ch < 0x10000 { let set = &set[i + 2..]; let block_index = ch >> 8; - let (_, blockindices, _) = unsafe { set.align_to::() }; + let (_, block_indices, _) = unsafe { set.align_to::() }; let blocks = &set[64..]; - let block = blockindices[block_index as usize]; + let block = block_indices[block_index as usize]; if blocks[((block as u32 * 256 + (ch & 255)) / 32) as usize] & (1u32 << (ch & 31)) != 0 diff --git a/wtf8/src/lib.rs b/wtf8/src/lib.rs index ff4dcf8900..64ea42d06e 100644 --- a/wtf8/src/lib.rs +++ b/wtf8/src/lib.rs @@ -22,7 +22,7 @@ //! string has no surrogates, it can be viewed as a UTF-8 Rust [`str`] without //! needing any copies or re-encoding. //! -//! This implementation is mostly copied from the WTF-8 implentation in the +//! This implementation is mostly copied from the WTF-8 implementation in the //! Rust 1.85 standard library, which is used as the backing for [`OsStr`] on //! Windows targets. As previously mentioned, however, it is modified to not //! join two surrogates into one codepoint when concatenating strings, in order @@ -463,8 +463,8 @@ impl Wtf8Buf { pub fn pop(&mut self) -> Option { let ch = self.code_points().next_back()?; - let newlen = self.len() - ch.len_wtf8(); - self.bytes.truncate(newlen); + let new_len = self.len() - ch.len_wtf8(); + self.bytes.truncate(new_len); Some(ch) } From d7113e11db71419478b65c92a64389442b2e4f1b Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Fri, 4 Apr 2025 16:42:14 +0900 Subject: [PATCH 010/126] Fix more cspell warnings --- .cspell.dict/cpython.txt | 15 +++- .cspell.dict/python-more.txt | 36 +++++++++ .cspell.dict/rust-more.txt | 10 +++ .cspell.json | 8 ++ common/src/format.rs | 1 + common/src/os.rs | 4 +- common/src/static_cell.rs | 4 +- common/src/str.rs | 8 +- src/lib.rs | 8 +- stdlib/src/csv.rs | 22 +++--- stdlib/src/json/machinery.rs | 1 + stdlib/src/locale.rs | 2 +- stdlib/src/math.rs | 2 +- stdlib/src/multiprocessing.rs | 6 +- stdlib/src/overlapped.rs | 4 +- vm/src/anystr.rs | 4 +- vm/src/builtins/bytearray.rs | 2 +- vm/src/builtins/bytes.rs | 2 +- vm/src/builtins/dict.rs | 10 +-- vm/src/builtins/float.rs | 2 - vm/src/builtins/function.rs | 28 +++---- vm/src/builtins/genericalias.rs | 4 +- vm/src/builtins/int.rs | 2 +- vm/src/builtins/memory.rs | 2 +- vm/src/builtins/module.rs | 2 +- vm/src/builtins/set.rs | 6 +- vm/src/builtins/tuple.rs | 2 +- vm/src/builtins/type.rs | 4 +- vm/src/{bytesinner.rs => bytes_inner.rs} | 1 + vm/src/{dictdatatype.rs => dict_inner.rs} | 4 +- vm/src/function/fspath.rs | 8 +- vm/src/intern.rs | 2 +- vm/src/lib.rs | 4 +- vm/src/object/core.rs | 4 +- vm/src/object/traverse.rs | 2 +- vm/src/protocol/object.rs | 4 +- vm/src/protocol/sequence.rs | 4 +- vm/src/py_io.rs | 8 +- vm/src/scope.rs | 2 +- vm/src/stdlib/ast/python.rs | 6 +- vm/src/stdlib/io.rs | 6 +- vm/src/stdlib/itertools.rs | 8 +- vm/src/stdlib/marshal.rs | 1 + vm/src/stdlib/operator.rs | 6 +- vm/src/stdlib/os.rs | 10 +-- vm/src/stdlib/sre.rs | 16 ++-- vm/src/stdlib/sys.rs | 16 ++-- vm/src/stdlib/thread.rs | 16 ++-- vm/src/stdlib/time.rs | 4 +- vm/src/types/slot.rs | 90 +++++++++++------------ vm/src/vm/mod.rs | 6 +- vm/sre_engine/benches/benches.rs | 10 +-- vm/sre_engine/src/engine.rs | 2 +- vm/sre_engine/tests/tests.rs | 15 ++-- wasm/lib/src/convert.rs | 8 +- wasm/lib/src/js_module.rs | 6 +- wasm/lib/src/vm_class.rs | 2 +- 57 files changed, 268 insertions(+), 204 deletions(-) rename vm/src/{bytesinner.rs => bytes_inner.rs} (99%) rename vm/src/{dictdatatype.rs => dict_inner.rs} (99%) diff --git a/.cspell.dict/cpython.txt b/.cspell.dict/cpython.txt index f7e282e4bc..1840918a4d 100644 --- a/.cspell.dict/cpython.txt +++ b/.cspell.dict/cpython.txt @@ -6,15 +6,14 @@ badsyntax basetype boolop bxor +cached_tsver cellarg cellvar cellvars cmpop -weakreflist -XXPRIME +denom dictoffset elts -xstat excepthandler fileutils finalbody @@ -30,17 +29,25 @@ linearise maxdepth mult nkwargs +numer orelse +pathconfig patma posonlyarg posonlyargs prec +PYTHREAD_NAME +SA_ONSTACK stackdepth +tok_oldval unaryop unparse unparser VARKEYWORDS varkwarg wbits +weakreflist withitem -withs \ No newline at end of file +withs +xstat +XXPRIME \ No newline at end of file diff --git a/.cspell.dict/python-more.txt b/.cspell.dict/python-more.txt index 526f5ba166..2edfe95bdf 100644 --- a/.cspell.dict/python-more.txt +++ b/.cspell.dict/python-more.txt @@ -80,6 +80,7 @@ getpip getrandom getrecursionlimit getrefcount +getsizeof getweakrefcount getweakrefs getwindowsversion @@ -90,6 +91,7 @@ hamt hostnames idfunc idiv +idxs impls indexgroup infj @@ -99,6 +101,7 @@ irepeat isabstractmethod isbytes iscased +isfinal istext itemiterator itemsize @@ -111,9 +114,11 @@ kwargs kwdefaults kwonlyargcount lastgroup +lastindex linearization linearize listcomp +longrange lvalue mappingproxy maskpri @@ -137,25 +142,39 @@ nbytes ncallbacks ndigits ndim +nldecoder nlocals +NOARGS nonbytes +Nonprintable origname +ospath +pendingcr +phello +platlibdir posixsubprocess posonly posonlyargcount +prepending profilefunc +pycache pycodecs pycs pyexpat +PYTHONBREAKPOINT PYTHONDEBUG +PYTHONHASHSEED PYTHONHOME PYTHONINSPECT PYTHONOPTIMIZE PYTHONPATH PYTHONPATH +PYTHONSAFEPATH PYTHONVERBOSE +PYTHONWARNDEFAULTENCODING PYTHONWARNINGS pytraverse +PYVENV qualname quotetabs radd @@ -164,6 +183,7 @@ rdivmod readall readbuffer reconstructor +refcnt releaselevel reverseitemiterator reverseiterator @@ -178,23 +198,37 @@ rsub rtruediv rvalue scproxy +seennl setattro setcomp +setrecursionlimit showwarnmsg signum slotnames +STACKLESS stacklevel stacksize startpos +subclassable subclasscheck subclasshook suboffset +suboffsets +SUBPATTERN sumprod surrogateescape surrogatepass +sysconf sysconfigdata sysvars +teedata +thisclass titlecased +tobytes +tolist +toreadonly +TPFLAGS +tracefunc unimportable unionable unraisablehook @@ -208,7 +242,9 @@ warningregistry warnmsg warnoptions warnopts +weaklist weakproxy +weakrefs winver withdata xmlcharrefreplace diff --git a/.cspell.dict/rust-more.txt b/.cspell.dict/rust-more.txt index d75529789f..99e87e532c 100644 --- a/.cspell.dict/rust-more.txt +++ b/.cspell.dict/rust-more.txt @@ -1,4 +1,5 @@ ahash +arrayvec bidi biguint bindgen @@ -14,6 +15,8 @@ cranelift cstring datelike deserializer +fdiv +flamescope flate2 fract getres @@ -44,16 +47,21 @@ objclass peekable powc powf +powi prepended punct puruspe replacen +rmatch +rposition rsplitn rustc rustfmt rustyline +seedable seekfrom siphash +siphasher splitn subsec thiserror @@ -63,8 +71,10 @@ trai ulonglong unic unistd +unraw unsync wasmbind +wasmtime widestring winapi winsock diff --git a/.cspell.json b/.cspell.json index 99718a6515..caa53a2879 100644 --- a/.cspell.json +++ b/.cspell.json @@ -46,6 +46,7 @@ ], // words - list of words to be always considered correct "words": [ + "RUSTPYTHONPATH", // RustPython "aiterable", "alnum", @@ -69,9 +70,11 @@ "makeunicodedata", "miri", "notrace", + "openat", "pyarg", "pyarg", "pyargs", + "pyast", "PyAttr", "pyc", "PyClass", @@ -80,6 +83,7 @@ "PyFunction", "pygetset", "pyimpl", + "pylib", "pymember", "PyMethod", "PyModule", @@ -100,6 +104,7 @@ "richcompare", "RustPython", "struc", + "sysmodule", "tracebacks", "typealiases", "Unconstructible", @@ -112,11 +117,14 @@ "CLOEXEC", "codeset", "endgrent", + "gethrvtime", "getrusage", "nanosleep", + "sigaction", "WRLCK", // win32 "birthtime", + "IFEXEC", ], // flagWords - list of words to be always considered incorrect "flagWords": [ diff --git a/common/src/format.rs b/common/src/format.rs index 75d0996796..4c1ce6c5c2 100644 --- a/common/src/format.rs +++ b/common/src/format.rs @@ -1,3 +1,4 @@ +// cspell:ignore ddfe use itertools::{Itertools, PeekingNext}; use malachite_bigint::{BigInt, Sign}; use num_traits::FromPrimitive; diff --git a/common/src/os.rs b/common/src/os.rs index 06ea1432e9..d37f28d28a 100644 --- a/common/src/os.rs +++ b/common/src/os.rs @@ -62,13 +62,13 @@ pub fn last_posix_errno() -> i32 { } #[cfg(unix)] -pub fn bytes_as_osstr(b: &[u8]) -> Result<&std::ffi::OsStr, Utf8Error> { +pub fn bytes_as_os_str(b: &[u8]) -> Result<&std::ffi::OsStr, Utf8Error> { use std::os::unix::ffi::OsStrExt; Ok(std::ffi::OsStr::from_bytes(b)) } #[cfg(not(unix))] -pub fn bytes_as_osstr(b: &[u8]) -> Result<&std::ffi::OsStr, Utf8Error> { +pub fn bytes_as_os_str(b: &[u8]) -> Result<&std::ffi::OsStr, Utf8Error> { Ok(std::str::from_utf8(b)?.as_ref()) } diff --git a/common/src/static_cell.rs b/common/src/static_cell.rs index 407b83ae0a..30e34f187f 100644 --- a/common/src/static_cell.rs +++ b/common/src/static_cell.rs @@ -76,7 +76,7 @@ mod threading { impl StaticCell { #[doc(hidden)] - pub const fn _from_oncecell(inner: OnceCell) -> Self { + pub const fn _from_once_cell(inner: OnceCell) -> Self { Self { inner } } @@ -108,7 +108,7 @@ mod threading { ($($(#[$attr:meta])* $vis:vis static $name:ident: $t:ty;)+) => { $($(#[$attr])* $vis static $name: $crate::static_cell::StaticCell<$t> = - $crate::static_cell::StaticCell::_from_oncecell($crate::lock::OnceCell::new());)+ + $crate::static_cell::StaticCell::_from_once_cell($crate::lock::OnceCell::new());)+ }; } } diff --git a/common/src/str.rs b/common/src/str.rs index ca1723e7ef..fa26959e0b 100644 --- a/common/src/str.rs +++ b/common/src/str.rs @@ -360,8 +360,8 @@ pub fn get_chars(s: &str, range: impl RangeBounds) -> &str { } #[inline] -pub fn char_range_end(s: &str, nchars: usize) -> Option { - let i = match nchars.checked_sub(1) { +pub fn char_range_end(s: &str, n_chars: usize) -> Option { + let i = match n_chars.checked_sub(1) { Some(last_char_index) => { let (index, c) = s.char_indices().nth(last_char_index)?; index + c.len_utf8() @@ -395,8 +395,8 @@ pub fn get_codepoints(w: &Wtf8, range: impl RangeBounds) -> &Wtf8 { } #[inline] -pub fn codepoint_range_end(s: &Wtf8, nchars: usize) -> Option { - let i = match nchars.checked_sub(1) { +pub fn codepoint_range_end(s: &Wtf8, n_chars: usize) -> Option { + let i = match n_chars.checked_sub(1) { Some(last_char_index) => { let (index, c) = s.code_point_indices().nth(last_char_index)?; index + c.len_wtf8() diff --git a/src/lib.rs b/src/lib.rs index 3fa5292e94..e415d847cf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -233,14 +233,14 @@ fn write_profile(settings: &Settings) -> Result<(), Box> enum ProfileFormat { Html, Text, - Speedscope, + SpeedScore, } let profile_output = settings.profile_output.as_deref(); let profile_format = match settings.profile_format.as_deref() { Some("html") => ProfileFormat::Html, Some("text") => ProfileFormat::Text, None if profile_output == Some("-".as_ref()) => ProfileFormat::Text, - Some("speedscope") | None => ProfileFormat::Speedscope, + Some("speedscope") | None => ProfileFormat::SpeedScore, Some(other) => { error!("Unknown profile format {}", other); // TODO: Need to change to ExitCode or Termination @@ -251,7 +251,7 @@ fn write_profile(settings: &Settings) -> Result<(), Box> let profile_output = profile_output.unwrap_or_else(|| match profile_format { ProfileFormat::Html => "flame-graph.html".as_ref(), ProfileFormat::Text => "flame.txt".as_ref(), - ProfileFormat::Speedscope => "flamescope.json".as_ref(), + ProfileFormat::SpeedScore => "flamescope.json".as_ref(), }); let profile_output: Box = if profile_output == "-" { @@ -265,7 +265,7 @@ fn write_profile(settings: &Settings) -> Result<(), Box> match profile_format { ProfileFormat::Html => flame::dump_html(profile_output)?, ProfileFormat::Text => flame::dump_text_to_writer(profile_output)?, - ProfileFormat::Speedscope => flamescope::dump(profile_output)?, + ProfileFormat::SpeedScore => flamescope::dump(profile_output)?, } Ok(()) diff --git a/stdlib/src/csv.rs b/stdlib/src/csv.rs index 39c15fd952..730d3b2feb 100644 --- a/stdlib/src/csv.rs +++ b/stdlib/src/csv.rs @@ -277,7 +277,7 @@ mod _csv { .map_err(|_| vm.new_type_error("argument 1 must be a dialect object".to_owned()))?, OptionalArg::Missing => opts.result(vm)?, }; - let dialect = opts.update_pydialect(dialect); + let dialect = opts.update_py_dialect(dialect); GLOBAL_HASHMAP .lock() .insert(name.as_str().to_owned(), dialect); @@ -665,7 +665,7 @@ mod _csv { } impl FormatOptions { - fn update_pydialect(&self, mut res: PyDialect) -> PyDialect { + fn update_py_dialect(&self, mut res: PyDialect) -> PyDialect { macro_rules! check_and_fill { ($res:ident, $e:ident) => {{ if let Some(t) = self.$e { @@ -699,18 +699,18 @@ mod _csv { DialectItem::Str(name) => { let g = GLOBAL_HASHMAP.lock(); if let Some(dialect) = g.get(name) { - Ok(self.update_pydialect(*dialect)) + Ok(self.update_py_dialect(*dialect)) } else { - Err(new_csv_error(vm, format!("{} is not registed.", name))) + Err(new_csv_error(vm, format!("{} is not registered.", name))) } // TODO // Maybe need to update the obj from HashMap } - DialectItem::Obj(o) => Ok(self.update_pydialect(*o)), + DialectItem::Obj(o) => Ok(self.update_py_dialect(*o)), DialectItem::None => { let g = GLOBAL_HASHMAP.lock(); let res = *g.get("excel").unwrap(); - Ok(self.update_pydialect(res)) + Ok(self.update_py_dialect(res)) } } } @@ -1084,8 +1084,8 @@ mod _csv { macro_rules! handle_res { ($x:expr) => {{ - let (res, nwritten) = $x; - buffer_offset += nwritten; + let (res, n_written) = $x; + buffer_offset += n_written; match res { csv_core::WriteResult::InputEmpty => break, csv_core::WriteResult::OutputFull => resize_buf(buffer), @@ -1118,10 +1118,10 @@ mod _csv { } loop { - let (res, nread, nwritten) = + let (res, n_read, n_written) = writer.field(&data[input_offset..], &mut buffer[buffer_offset..]); - input_offset += nread; - handle_res!((res, nwritten)); + input_offset += n_read; + handle_res!((res, n_written)); } } diff --git a/stdlib/src/json/machinery.rs b/stdlib/src/json/machinery.rs index 4612b5263d..a4344e363c 100644 --- a/stdlib/src/json/machinery.rs +++ b/stdlib/src/json/machinery.rs @@ -1,3 +1,4 @@ +// cspell:ignore LOJKINE // derived from https://github.com/lovasoa/json_in_type // BSD 2-Clause License diff --git a/stdlib/src/locale.rs b/stdlib/src/locale.rs index dfc6c93497..6cde173fb1 100644 --- a/stdlib/src/locale.rs +++ b/stdlib/src/locale.rs @@ -1,4 +1,4 @@ -// cspell:ignore abday abmon yesexpr +// cspell:ignore abday abmon yesexpr noexpr CRNCYSTR RADIXCHAR AMPM THOUSEP pub(crate) use _locale::make_module; diff --git a/stdlib/src/math.rs b/stdlib/src/math.rs index 93929e3566..6665ee8b49 100644 --- a/stdlib/src/math.rs +++ b/stdlib/src/math.rs @@ -652,7 +652,7 @@ mod math { partials.truncate(i); if x != 0.0 { if !x.is_finite() { - // a nonfinite x could arise either as + // a non-finite x could arise either as // a result of intermediate overflow, or // as a result of a nan or inf in the // summands diff --git a/stdlib/src/multiprocessing.rs b/stdlib/src/multiprocessing.rs index 2db922e16b..4a98c1afad 100644 --- a/stdlib/src/multiprocessing.rs +++ b/stdlib/src/multiprocessing.rs @@ -19,12 +19,12 @@ mod _multiprocessing { #[pyfunction] fn recv(socket: usize, size: usize, vm: &VirtualMachine) -> PyResult { let mut buf = vec![0; size]; - let nread = + let n_read = unsafe { WinSock::recv(socket as SOCKET, buf.as_mut_ptr() as *mut _, size as i32, 0) }; - if nread < 0 { + if n_read < 0 { Err(os::errno_err(vm)) } else { - Ok(nread) + Ok(n_read) } } diff --git a/stdlib/src/overlapped.rs b/stdlib/src/overlapped.rs index 007fa67423..6fd8a1516d 100644 --- a/stdlib/src/overlapped.rs +++ b/stdlib/src/overlapped.rs @@ -184,14 +184,14 @@ mod _overlapped { buf: buf.as_ptr() as *mut _, len: buf.len() as _, }; - let mut nread: u32 = 0; + let mut n_read: u32 = 0; // TODO: optimization with MaybeUninit let ret = unsafe { windows_sys::Win32::Networking::WinSock::WSARecv( handle as _, &wsabuf, 1, - &mut nread, + &mut n_read, &mut flags, &mut inner.overlapped, None, diff --git a/vm/src/anystr.rs b/vm/src/anystr.rs index 79b15b6a3f..03582215ba 100644 --- a/vm/src/anystr.rs +++ b/vm/src/anystr.rs @@ -167,7 +167,7 @@ pub trait AnyStr { full_obj: impl FnOnce() -> PyObjectRef, split: SP, splitn: SN, - splitw: SW, + split_whitespace: SW, ) -> PyResult> where T: TryFromObject + AnyStrWrapper, @@ -188,7 +188,7 @@ pub trait AnyStr { splitn(self, pattern, (args.maxsplit + 1) as usize, vm) } } else { - splitw(self, args.maxsplit, vm) + split_whitespace(self, args.maxsplit, vm) }; Ok(splits) } diff --git a/vm/src/builtins/bytearray.rs b/vm/src/builtins/bytearray.rs index 3d4822cf48..ce2232d8eb 100644 --- a/vm/src/builtins/bytearray.rs +++ b/vm/src/builtins/bytearray.rs @@ -9,7 +9,7 @@ use crate::{ anystr::{self, AnyStr}, atomic_func, byte::{bytes_from_object, value_from_object}, - bytesinner::{ + bytes_inner::{ ByteInnerFindOptions, ByteInnerNewOptions, ByteInnerPaddingOptions, ByteInnerSplitOptions, ByteInnerTranslateOptions, DecodeArgs, PyBytesInner, bytes_decode, }, diff --git a/vm/src/builtins/bytes.rs b/vm/src/builtins/bytes.rs index 434de6a76c..77b9f9d526 100644 --- a/vm/src/builtins/bytes.rs +++ b/vm/src/builtins/bytes.rs @@ -6,7 +6,7 @@ use crate::{ TryFromBorrowedObject, TryFromObject, VirtualMachine, anystr::{self, AnyStr}, atomic_func, - bytesinner::{ + bytes_inner::{ ByteInnerFindOptions, ByteInnerNewOptions, ByteInnerPaddingOptions, ByteInnerSplitOptions, ByteInnerTranslateOptions, DecodeArgs, PyBytesInner, bytes_decode, }, diff --git a/vm/src/builtins/dict.rs b/vm/src/builtins/dict.rs index fc2f206dd0..f78543a5f5 100644 --- a/vm/src/builtins/dict.rs +++ b/vm/src/builtins/dict.rs @@ -12,7 +12,7 @@ use crate::{ }, class::{PyClassDef, PyClassImpl}, common::ascii, - dictdatatype::{self, DictKey}, + dict_inner::{self, DictKey}, function::{ArgIterable, KwArgs, OptionalArg, PyArithmeticValue::*, PyComparisonValue}, iter::PyExactSizeIterator, protocol::{PyIterIter, PyIterReturn, PyMappingMethods, PyNumberMethods, PySequenceMethods}, @@ -27,7 +27,7 @@ use rustpython_common::lock::PyMutex; use std::fmt; use std::sync::LazyLock; -pub type DictContentType = dictdatatype::Dict; +pub type DictContentType = dict_inner::Dict; #[pyclass(module = false, name = "dict", unhashable = true, traverse)] #[derive(Default)] @@ -154,7 +154,7 @@ impl PyDict { self.entries.contains(vm, key).unwrap() } - pub fn size(&self) -> dictdatatype::DictSize { + pub fn size(&self) -> dict_inner::DictSize { self.entries.size() } } @@ -811,7 +811,7 @@ macro_rules! dict_view { #[pyclass(module = false, name = $iter_class_name)] #[derive(Debug)] pub(crate) struct $iter_name { - pub size: dictdatatype::DictSize, + pub size: dict_inner::DictSize, pub internal: PyMutex>, } @@ -884,7 +884,7 @@ macro_rules! dict_view { #[pyclass(module = false, name = $reverse_iter_class_name)] #[derive(Debug)] pub(crate) struct $reverse_iter_name { - pub size: dictdatatype::DictSize, + pub size: dict_inner::DictSize, internal: PyMutex>, } diff --git a/vm/src/builtins/float.rs b/vm/src/builtins/float.rs index 27f1f3273f..85f2a07bb9 100644 --- a/vm/src/builtins/float.rs +++ b/vm/src/builtins/float.rs @@ -1,5 +1,3 @@ -// spell-checker:ignore numer denom - use super::{ PyByteArray, PyBytes, PyInt, PyIntRef, PyStr, PyStrRef, PyType, PyTypeRef, try_bigint_to_f64, }; diff --git a/vm/src/builtins/function.rs b/vm/src/builtins/function.rs index 3181f1068f..e054ac4348 100644 --- a/vm/src/builtins/function.rs +++ b/vm/src/builtins/function.rs @@ -90,13 +90,13 @@ impl PyFunction { ) -> PyResult<()> { let code = &*self.code; let nargs = func_args.args.len(); - let nexpected_args = code.arg_count as usize; + let n_expected_args = code.arg_count as usize; let total_args = code.arg_count as usize + code.kwonlyarg_count as usize; // let arg_names = self.code.arg_names(); // This parses the arguments from args and kwargs into // the proper variables keeping into account default values - // and starargs and kwargs. + // and star-args and kwargs. // See also: PyEval_EvalCodeWithName in cpython: // https://github.com/python/cpython/blob/main/Python/ceval.c#L3681 @@ -108,7 +108,7 @@ impl PyFunction { // zip short-circuits if either iterator returns None, which is the behavior we want -- // only fill as much as there is to fill with as much as we have for (local, arg) in Iterator::zip( - fastlocals.iter_mut().take(nexpected_args), + fastlocals.iter_mut().take(n_expected_args), args_iter.by_ref().take(nargs), ) { *local = Some(arg); @@ -122,11 +122,11 @@ impl PyFunction { vararg_offset += 1; } else { // Check the number of positional arguments - if nargs > nexpected_args { + if nargs > n_expected_args { return Err(vm.new_type_error(format!( "{}() takes {} positional arguments but {} were given", self.qualname(), - nexpected_args, + n_expected_args, nargs ))); } @@ -141,7 +141,7 @@ impl PyFunction { None }; - let argpos = |range: std::ops::Range<_>, name: &str| { + let arg_pos = |range: std::ops::Range<_>, name: &str| { code.varnames .iter() .enumerate() @@ -155,7 +155,7 @@ impl PyFunction { // Handle keyword arguments for (name, value) in func_args.kwargs { // Check if we have a parameter with this name: - if let Some(pos) = argpos(code.posonlyarg_count as usize..total_args, &name) { + if let Some(pos) = arg_pos(code.posonlyarg_count as usize..total_args, &name) { let slot = &mut fastlocals[pos]; if slot.is_some() { return Err(vm.new_type_error(format!( @@ -167,7 +167,7 @@ impl PyFunction { *slot = Some(value); } else if let Some(kwargs) = kwargs.as_ref() { kwargs.set_item(&name, value, vm)?; - } else if argpos(0..code.posonlyarg_count as usize, &name).is_some() { + } else if arg_pos(0..code.posonlyarg_count as usize, &name).is_some() { posonly_passed_as_kwarg.push(name); } else { return Err(vm.new_type_error(format!( @@ -196,15 +196,15 @@ impl PyFunction { // Add missing positional arguments, if we have fewer positional arguments than the // function definition calls for - if nargs < nexpected_args { + if nargs < n_expected_args { let defaults = get_defaults!().0.as_ref().map(|tup| tup.as_slice()); let n_defs = defaults.map_or(0, |d| d.len()); - let nrequired = code.arg_count as usize - n_defs; + let n_required = code.arg_count as usize - n_defs; // Given the number of defaults available, check all the arguments for which we // _don't_ have defaults; if any are missing, raise an exception - let mut missing: Vec<_> = (nargs..nrequired) + let mut missing: Vec<_> = (nargs..n_required) .filter_map(|i| { if fastlocals[i].is_none() { Some(&code.varnames[i]) @@ -247,13 +247,13 @@ impl PyFunction { } if let Some(defaults) = defaults { - let n = std::cmp::min(nargs, nexpected_args); - let i = n.saturating_sub(nrequired); + let n = std::cmp::min(nargs, n_expected_args); + let i = n.saturating_sub(n_required); // We have sufficient defaults, so iterate over the corresponding names and use // the default if we don't already have a value for i in i..defaults.len() { - let slot = &mut fastlocals[nrequired + i]; + let slot = &mut fastlocals[n_required + i]; if slot.is_none() { *slot = Some(defaults[i].clone()); } diff --git a/vm/src/builtins/genericalias.rs b/vm/src/builtins/genericalias.rs index 549985bcfb..18649718dd 100644 --- a/vm/src/builtins/genericalias.rs +++ b/vm/src/builtins/genericalias.rs @@ -253,7 +253,7 @@ fn tuple_index(tuple: &PyTupleRef, item: &PyObjectRef) -> Option { fn subs_tvars( obj: PyObjectRef, params: &PyTupleRef, - argitems: &[PyObjectRef], + arg_items: &[PyObjectRef], vm: &VirtualMachine, ) -> PyResult { obj.get_attr(identifier!(vm, __parameters__), vm) @@ -267,7 +267,7 @@ fn subs_tvars( .iter() .map(|arg| { if let Some(idx) = tuple_index(params, arg) { - argitems[idx].clone() + arg_items[idx].clone() } else { arg.clone() } diff --git a/vm/src/builtins/int.rs b/vm/src/builtins/int.rs index 5f12f2490e..80aaae03eb 100644 --- a/vm/src/builtins/int.rs +++ b/vm/src/builtins/int.rs @@ -3,7 +3,7 @@ use crate::{ AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyRefExact, PyResult, TryFromBorrowedObject, VirtualMachine, builtins::PyStrRef, - bytesinner::PyBytesInner, + bytes_inner::PyBytesInner, class::PyClassImpl, common::{ format::FormatSpec, diff --git a/vm/src/builtins/memory.rs b/vm/src/builtins/memory.rs index 09239e3e49..801d94fb36 100644 --- a/vm/src/builtins/memory.rs +++ b/vm/src/builtins/memory.rs @@ -6,7 +6,7 @@ use crate::{ AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromBorrowedObject, TryFromObject, VirtualMachine, atomic_func, buffer::FormatSpec, - bytesinner::bytes_to_hex, + bytes_inner::bytes_to_hex, class::PyClassImpl, common::{ borrow::{BorrowedValue, BorrowedValueMut}, diff --git a/vm/src/builtins/module.rs b/vm/src/builtins/module.rs index 8c8f22cf58..2cdc13a59c 100644 --- a/vm/src/builtins/module.rs +++ b/vm/src/builtins/module.rs @@ -122,7 +122,7 @@ impl Py { name.downcast::().ok() } - // TODO: to be replaced by the commented-out dict method above once dictoffsets land + // TODO: to be replaced by the commented-out dict method above once dictoffset land pub fn dict(&self) -> PyDictRef { self.as_object().dict().unwrap() } diff --git a/vm/src/builtins/set.rs b/vm/src/builtins/set.rs index 3e10e5c6b7..43e6ee1f7d 100644 --- a/vm/src/builtins/set.rs +++ b/vm/src/builtins/set.rs @@ -11,7 +11,7 @@ use crate::{ class::PyClassImpl, common::{ascii, hash::PyHash, lock::PyMutex, rc::PyRc}, convert::ToPyResult, - dictdatatype::{self, DictSize}, + dict_inner::{self, DictSize}, function::{ArgIterable, OptionalArg, PosArgs, PyArithmeticValue, PyComparisonValue}, protocol::{PyIterReturn, PyNumberMethods, PySequenceMethods}, recursion::ReprGuard, @@ -30,7 +30,7 @@ use rustpython_common::{ use std::sync::LazyLock; use std::{fmt, ops::Deref}; -pub type SetContentType = dictdatatype::Dict<()>; +pub type SetContentType = dict_inner::Dict<()>; #[pyclass(module = false, name = "set", unhashable = true, traverse)] #[derive(Default)] @@ -460,7 +460,7 @@ impl PySetInner { hash = self.content.try_fold_keys(hash, |h, element| { Ok(h ^ _shuffle_bits(element.hash(vm)? as u64)) })?; - // Disperse patterns arising in nested frozensets + // Disperse patterns arising in nested frozen-sets hash ^= (hash >> 11) ^ (hash >> 25); hash = hash.wrapping_mul(69069).wrapping_add(907133923); // -1 is reserved as an error code diff --git a/vm/src/builtins/tuple.rs b/vm/src/builtins/tuple.rs index 1b6e281657..1dc7861071 100644 --- a/vm/src/builtins/tuple.rs +++ b/vm/src/builtins/tuple.rs @@ -332,7 +332,7 @@ impl PyTuple { #[pymethod(magic)] fn getnewargs(zelf: PyRef, vm: &VirtualMachine) -> (PyTupleRef,) { // the arguments to pass to tuple() is just one tuple - so we'll be doing tuple(tup), which - // should just return tup, or tuplesubclass(tup), which'll copy/validate (e.g. for a + // should just return tup, or tuple_subclass(tup), which'll copy/validate (e.g. for a // structseq) let tup_arg = if zelf.class().is(vm.ctx.types.tuple_type) { zelf diff --git a/vm/src/builtins/type.rs b/vm/src/builtins/type.rs index 776c777cb3..a0c7ee58ff 100644 --- a/vm/src/builtins/type.rs +++ b/vm/src/builtins/type.rs @@ -320,7 +320,7 @@ impl PyType { } } - // This is used for class initialisation where the vm is not yet available. + // This is used for class initialization where the vm is not yet available. pub fn set_str_attr>( &self, attr_name: &str, @@ -451,7 +451,7 @@ impl Py { F: Fn(&Self) -> Option, { // the hot path will be primitive types which usually hit the result from itself. - // try std::intrinsics::likely once it is stablized + // try std::intrinsics::likely once it is stabilized if let Some(r) = f(self) { Some(r) } else { diff --git a/vm/src/bytesinner.rs b/vm/src/bytes_inner.rs similarity index 99% rename from vm/src/bytesinner.rs rename to vm/src/bytes_inner.rs index 88d2b9744f..10394721e7 100644 --- a/vm/src/bytesinner.rs +++ b/vm/src/bytes_inner.rs @@ -1,3 +1,4 @@ +// cspell:ignore unchunked use crate::{ AsObject, PyObject, PyObjectRef, PyPayload, PyResult, TryFromBorrowedObject, VirtualMachine, anystr::{self, AnyStr, AnyStrContainer, AnyStrWrapper}, diff --git a/vm/src/dictdatatype.rs b/vm/src/dict_inner.rs similarity index 99% rename from vm/src/dictdatatype.rs rename to vm/src/dict_inner.rs index ab37b7dc85..c49ab752de 100644 --- a/vm/src/dictdatatype.rs +++ b/vm/src/dict_inner.rs @@ -20,7 +20,7 @@ use num_traits::ToPrimitive; use std::{fmt, mem::size_of, ops::ControlFlow}; // HashIndex is intended to be same size with hash::PyHash -// but it doesn't mean the values are compatible with actual pyhash value +// but it doesn't mean the values are compatible with actual PyHash value /// hash value of an object returned by __hash__ type HashValue = hash::PyHash; @@ -691,7 +691,7 @@ impl Dict { type LookupResult = (IndexEntry, IndexIndex); /// Types implementing this trait can be used to index -/// the dictionary. Typical usecases are: +/// the dictionary. Typical use-cases are: /// - PyObjectRef -> arbitrary python type used as key /// - str -> string reference used as key, this is often used internally pub trait DictKey { diff --git a/vm/src/function/fspath.rs b/vm/src/function/fspath.rs index 74051644e0..28145e490a 100644 --- a/vm/src/function/fspath.rs +++ b/vm/src/function/fspath.rs @@ -62,7 +62,7 @@ impl FsPath { // TODO: FS encodings match self { FsPath::Str(s) => vm.fsencode(s), - FsPath::Bytes(b) => Self::bytes_as_osstr(b.as_bytes(), vm).map(Cow::Borrowed), + FsPath::Bytes(b) => Self::bytes_as_os_str(b.as_bytes(), vm).map(Cow::Borrowed), } } @@ -84,7 +84,7 @@ impl FsPath { pub fn to_path_buf(&self, vm: &VirtualMachine) -> PyResult { let path = match self { FsPath::Str(s) => PathBuf::from(s.as_str()), - FsPath::Bytes(b) => PathBuf::from(Self::bytes_as_osstr(b, vm)?), + FsPath::Bytes(b) => PathBuf::from(Self::bytes_as_os_str(b, vm)?), }; Ok(path) } @@ -99,8 +99,8 @@ impl FsPath { .map_err(|err| err.into_pyexception(vm)) } - pub fn bytes_as_osstr<'a>(b: &'a [u8], vm: &VirtualMachine) -> PyResult<&'a std::ffi::OsStr> { - rustpython_common::os::bytes_as_osstr(b) + pub fn bytes_as_os_str<'a>(b: &'a [u8], vm: &VirtualMachine) -> PyResult<&'a std::ffi::OsStr> { + rustpython_common::os::bytes_as_os_str(b) .map_err(|_| vm.new_unicode_decode_error("can't decode path for utf-8".to_owned())) } } diff --git a/vm/src/intern.rs b/vm/src/intern.rs index bb9220d069..08e41bb5b5 100644 --- a/vm/src/intern.rs +++ b/vm/src/intern.rs @@ -281,7 +281,7 @@ impl InternableString for PyRefExact { } pub trait MaybeInternedString: - AsRef + crate::dictdatatype::DictKey + sealed::SealedMaybeInterned + AsRef + crate::dict_inner::DictKey + sealed::SealedMaybeInterned { fn as_interned(&self) -> Option<&'static PyStrInterned>; } diff --git a/vm/src/lib.rs b/vm/src/lib.rs index 2e4afa3ea1..9561a9cc23 100644 --- a/vm/src/lib.rs +++ b/vm/src/lib.rs @@ -43,14 +43,14 @@ mod anystr; pub mod buffer; pub mod builtins; pub mod byte; -mod bytesinner; +mod bytes_inner; pub mod cformat; pub mod class; mod codecs; pub mod compiler; pub mod convert; mod coroutine; -mod dictdatatype; +mod dict_inner; #[cfg(feature = "rustpython-compiler")] pub mod eval; pub mod exceptions; diff --git a/vm/src/object/core.rs b/vm/src/object/core.rs index bbe900f7cd..a6049884d8 100644 --- a/vm/src/object/core.rs +++ b/vm/src/object/core.rs @@ -370,11 +370,11 @@ impl PyWeak { let dealloc = { let mut guard = unsafe { self.parent.as_ref().lock() }; let offset = std::mem::offset_of!(PyInner, payload); - let pyinner = (self as *const Self) + let py_inner = (self as *const Self) .cast::() .wrapping_sub(offset) .cast::>(); - let node_ptr = unsafe { NonNull::new_unchecked(pyinner as *mut Py) }; + let node_ptr = unsafe { NonNull::new_unchecked(py_inner as *mut Py) }; // the list doesn't have ownership over its PyRef! we're being dropped // right now so that should be obvious!! std::mem::forget(unsafe { guard.list.remove(node_ptr) }); diff --git a/vm/src/object/traverse.rs b/vm/src/object/traverse.rs index 9ff0f88343..c105d23feb 100644 --- a/vm/src/object/traverse.rs +++ b/vm/src/object/traverse.rs @@ -26,7 +26,7 @@ pub unsafe trait Traverse { /// If some field is not called, the worst results is just memory leak, /// but if some field is called repeatedly, panic and deadlock can happen. /// - /// - _**DO NOT**_ clone a `PyObjectRef` or `Pyef` in `traverse()` + /// - _**DO NOT**_ clone a `PyObjectRef` or `PyRef` in `traverse()` fn traverse(&self, traverse_fn: &mut TraverseFn<'_>); } diff --git a/vm/src/protocol/object.rs b/vm/src/protocol/object.rs index 256baa0fdf..1e972eb540 100644 --- a/vm/src/protocol/object.rs +++ b/vm/src/protocol/object.rs @@ -7,10 +7,10 @@ use crate::{ PyAsyncGen, PyBytes, PyDict, PyDictRef, PyGenericAlias, PyInt, PyList, PyStr, PyStrRef, PyTuple, PyTupleRef, PyType, PyTypeRef, pystr::AsPyStr, }, - bytesinner::ByteInnerNewOptions, + bytes_inner::ByteInnerNewOptions, common::{hash::PyHash, str::to_ascii}, convert::{ToPyObject, ToPyResult}, - dictdatatype::DictKey, + dict_inner::DictKey, function::{Either, OptionalArg, PyArithmeticValue, PySetterValue}, object::PyPayload, protocol::{PyIter, PyMapping, PySequence}, diff --git a/vm/src/protocol/sequence.rs b/vm/src/protocol/sequence.rs index 5d5622c156..0681c3e664 100644 --- a/vm/src/protocol/sequence.rs +++ b/vm/src/protocol/sequence.rs @@ -118,7 +118,7 @@ impl PySequence<'_> { return f(self, other, vm); } - // if both arguments apear to be sequences, try fallback to __add__ + // if both arguments appear to be sequences, try fallback to __add__ if self.check() && other.to_sequence().check() { let ret = vm.binary_op1(self.obj, other, PyNumberBinaryOp::Add)?; if let PyArithmeticValue::Implemented(ret) = PyArithmeticValue::from_object(vm, ret) { @@ -156,7 +156,7 @@ impl PySequence<'_> { return f(self, other, vm); } - // if both arguments apear to be sequences, try fallback to __iadd__ + // if both arguments appear to be sequences, try fallback to __iadd__ if self.check() && other.to_sequence().check() { let ret = vm._iadd(self.obj, other)?; if let PyArithmeticValue::Implemented(ret) = PyArithmeticValue::from_object(vm, ret) { diff --git a/vm/src/py_io.rs b/vm/src/py_io.rs index c50f09e2bf..87df9a73d8 100644 --- a/vm/src/py_io.rs +++ b/vm/src/py_io.rs @@ -70,12 +70,12 @@ pub fn file_readline(obj: &PyObject, size: Option, vm: &VirtualMachine) - }; let ret = match_class!(match ret { s @ PyStr => { - let sval = s.as_str(); - if sval.is_empty() { + let s_val = s.as_str(); + if s_val.is_empty() { return Err(eof_err()); } - if let Some(nonl) = sval.strip_suffix('\n') { - vm.ctx.new_str(nonl).into() + if let Some(no_nl) = s_val.strip_suffix('\n') { + vm.ctx.new_str(no_nl).into() } else { s.into() } diff --git a/vm/src/scope.rs b/vm/src/scope.rs index e01209857c..7515468d78 100644 --- a/vm/src/scope.rs +++ b/vm/src/scope.rs @@ -141,7 +141,7 @@ impl Scope { // impl Sealed for super::PyStrRef {} // } // pub trait PyName: -// sealed::Sealed + crate::dictdatatype::DictKey + Clone + ToPyObject +// sealed::Sealed + crate::dict_inner::DictKey + Clone + ToPyObject // { // } // impl PyName for str {} diff --git a/vm/src/stdlib/ast/python.rs b/vm/src/stdlib/ast/python.rs index 50f8294c76..74c4db888a 100644 --- a/vm/src/stdlib/ast/python.rs +++ b/vm/src/stdlib/ast/python.rs @@ -19,8 +19,8 @@ pub(crate) mod _ast { fn init(zelf: PyObjectRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult<()> { let fields = zelf.get_attr("_fields", vm)?; let fields: Vec = fields.try_to_value(vm)?; - let numargs = args.args.len(); - if numargs > fields.len() { + let n_args = args.args.len(); + if n_args > fields.len() { return Err(vm.new_type_error(format!( "{} constructor takes at most {} positional argument{}", zelf.class().name(), @@ -33,7 +33,7 @@ pub(crate) mod _ast { } for (key, value) in args.kwargs { if let Some(pos) = fields.iter().position(|f| f.as_str() == key) { - if pos < numargs { + if pos < n_args { return Err(vm.new_type_error(format!( "{} got multiple values for argument '{}'", zelf.class().name(), diff --git a/vm/src/stdlib/io.rs b/vm/src/stdlib/io.rs index 6f13e0878d..3e1979e3d0 100644 --- a/vm/src/stdlib/io.rs +++ b/vm/src/stdlib/io.rs @@ -1360,7 +1360,7 @@ mod _io { }) } - pub fn repr_fileobj_name(obj: &PyObject, vm: &VirtualMachine) -> PyResult> { + pub fn repr_file_obj_name(obj: &PyObject, vm: &VirtualMachine) -> PyResult> { let name = match obj.get_attr("name", vm) { Ok(name) => Some(name), Err(e) @@ -1549,7 +1549,7 @@ mod _io { #[pyslot] fn slot_repr(zelf: &PyObject, vm: &VirtualMachine) -> PyResult { - let name_repr = repr_fileobj_name(zelf, vm)?; + let name_repr = repr_file_obj_name(zelf, vm)?; let cls = zelf.class(); let slot_name = cls.slot_name(); let repr = if let Some(name_repr) = name_repr { @@ -4293,7 +4293,7 @@ mod fileio { if fd < 0 { return Ok("<_io.FileIO [closed]>".to_owned()); } - let name_repr = repr_fileobj_name(zelf.as_object(), vm)?; + let name_repr = repr_file_obj_name(zelf.as_object(), vm)?; let mode = zelf.mode(); let closefd = if zelf.closefd.load() { "True" } else { "False" }; let repr = if let Some(name_repr) = name_repr { diff --git a/vm/src/stdlib/itertools.rs b/vm/src/stdlib/itertools.rs index 18641ac3b6..addfc991ff 100644 --- a/vm/src/stdlib/itertools.rs +++ b/vm/src/stdlib/itertools.rs @@ -87,7 +87,7 @@ mod decl { fn setstate(zelf: PyRef, state: PyTupleRef, vm: &VirtualMachine) -> PyResult<()> { let args = state.as_slice(); if args.is_empty() { - let msg = String::from("function takes at leat 1 arguments (0 given)"); + let msg = String::from("function takes at least 1 arguments (0 given)"); return Err(vm.new_type_error(msg)); } if args.len() > 2 { @@ -1892,14 +1892,14 @@ mod decl { return Ok(PyIterReturn::StopIteration(None)); } let mut result: Vec = Vec::new(); - let mut numactive = zelf.iterators.len(); + let mut num_active = zelf.iterators.len(); for idx in 0..zelf.iterators.len() { let next_obj = match zelf.iterators[idx].next(vm)? { PyIterReturn::Return(obj) => obj, PyIterReturn::StopIteration(v) => { - numactive -= 1; - if numactive == 0 { + num_active -= 1; + if num_active == 0 { return Ok(PyIterReturn::StopIteration(v)); } zelf.fillvalue.read().clone() diff --git a/vm/src/stdlib/marshal.rs b/vm/src/stdlib/marshal.rs index 564ee5bf6c..17d8ccd3e1 100644 --- a/vm/src/stdlib/marshal.rs +++ b/vm/src/stdlib/marshal.rs @@ -1,3 +1,4 @@ +// cspell:ignore pyfrozen pycomplex pub(crate) use decl::make_module; #[pymodule(name = "marshal")] diff --git a/vm/src/stdlib/operator.rs b/vm/src/stdlib/operator.rs index 38f931b0e7..fbb8147e9f 100644 --- a/vm/src/stdlib/operator.rs +++ b/vm/src/stdlib/operator.rs @@ -532,9 +532,9 @@ mod _operator { fn reduce(zelf: PyRef, vm: &VirtualMachine) -> PyResult { // With no kwargs, return (type(obj), (name, *args)) tuple. if zelf.args.kwargs.is_empty() { - let mut pargs = vec![zelf.name.as_object().to_owned()]; - pargs.append(&mut zelf.args.args.clone()); - Ok(vm.new_tuple((zelf.class().to_owned(), vm.ctx.new_tuple(pargs)))) + let mut py_args = vec![zelf.name.as_object().to_owned()]; + py_args.append(&mut zelf.args.args.clone()); + Ok(vm.new_tuple((zelf.class().to_owned(), vm.ctx.new_tuple(py_args)))) } else { // If we have kwargs, create a partial function that contains them and pass back that // along with the args. diff --git a/vm/src/stdlib/os.rs b/vm/src/stdlib/os.rs index 48e16ad41f..08a5051fe7 100644 --- a/vm/src/stdlib/os.rs +++ b/vm/src/stdlib/os.rs @@ -120,8 +120,8 @@ pub(super) struct FollowSymlinks( #[pyarg(named, name = "follow_symlinks", default = true)] pub bool, ); -fn bytes_as_osstr<'a>(b: &'a [u8], vm: &VirtualMachine) -> PyResult<&'a ffi::OsStr> { - rustpython_common::os::bytes_as_osstr(b) +fn bytes_as_os_str<'a>(b: &'a [u8], vm: &VirtualMachine) -> PyResult<&'a ffi::OsStr> { + rustpython_common::os::bytes_as_os_str(b) .map_err(|_| vm.new_unicode_decode_error("can't decode path for utf-8".to_owned())) } @@ -393,8 +393,8 @@ pub(super) mod _os { if key.is_empty() || key.contains(&b'=') { return Err(vm.new_value_error("illegal environment variable name".to_string())); } - let key = super::bytes_as_osstr(key, vm)?; - let value = super::bytes_as_osstr(value, vm)?; + let key = super::bytes_as_os_str(key, vm)?; + let value = super::bytes_as_os_str(value, vm)?; // SAFETY: requirements forwarded from the caller unsafe { env::set_var(key, value) }; Ok(()) @@ -415,7 +415,7 @@ pub(super) mod _os { ), )); } - let key = super::bytes_as_osstr(key, vm)?; + let key = super::bytes_as_os_str(key, vm)?; // SAFETY: requirements forwarded from the caller unsafe { env::remove_var(key) }; Ok(()) diff --git a/vm/src/stdlib/sre.rs b/vm/src/stdlib/sre.rs index fd41aa2b7d..fdb48c7524 100644 --- a/vm/src/stdlib/sre.rs +++ b/vm/src/stdlib/sre.rs @@ -228,7 +228,7 @@ mod _sre { } #[pymethod(name = "match")] - fn pymatch( + fn py_match( zelf: PyRef, string_args: StringArgs, vm: &VirtualMachine, @@ -242,7 +242,7 @@ mod _sre { let req = x.create_request(&zelf, pos, endpos); let mut state = State::default(); Ok(state - .pymatch(&req) + .py_match(&req) .then(|| Match::new(&mut state, zelf.clone(), string).into_ref(&vm.ctx))) }) } @@ -257,7 +257,7 @@ mod _sre { let mut req = x.create_request(&zelf, string_args.pos, string_args.endpos); req.match_all = true; let mut state = State::default(); - Ok(state.pymatch(&req).then(|| { + Ok(state.py_match(&req).then(|| { Match::new(&mut state, zelf.clone(), string_args.string).into_ref(&vm.ctx) })) }) @@ -346,11 +346,11 @@ mod _sre { #[pymethod] fn sub(zelf: PyRef, sub_args: SubArgs, vm: &VirtualMachine) -> PyResult { - Self::subx(zelf, sub_args, false, vm) + Self::sub_impl(zelf, sub_args, false, vm) } #[pymethod] fn subn(zelf: PyRef, sub_args: SubArgs, vm: &VirtualMachine) -> PyResult { - Self::subx(zelf, sub_args, true, vm) + Self::sub_impl(zelf, sub_args, true, vm) } #[pymethod] @@ -407,7 +407,7 @@ mod _sre { self.pattern.clone() } - fn subx( + fn sub_impl( zelf: PyRef, sub_args: SubArgs, subn: bool, @@ -860,12 +860,12 @@ mod _sre { } #[pymethod(name = "match")] - fn pymatch(&self, vm: &VirtualMachine) -> PyResult>> { + fn py_match(&self, vm: &VirtualMachine) -> PyResult>> { with_sre_str!(self.pattern, &self.string.clone(), vm, |s| { let mut req = s.create_request(&self.pattern, self.start.load(), self.end); let mut state = State::default(); req.must_advance = self.must_advance.load(); - let has_matched = state.pymatch(&req); + let has_matched = state.py_match(&req); self.must_advance .store(state.cursor.position == state.start); diff --git a/vm/src/stdlib/sys.rs b/vm/src/stdlib/sys.rs index fdfe2faf69..befa0f8dff 100644 --- a/vm/src/stdlib/sys.rs +++ b/vm/src/stdlib/sys.rs @@ -513,19 +513,19 @@ mod sys { } // Get the size of the version information block - let verblock_size = + let ver_block_size = GetFileVersionInfoSizeW(kernel32_path.as_ptr(), std::ptr::null_mut()); - if verblock_size == 0 { + if ver_block_size == 0 { return Err(std::io::Error::last_os_error()); } // Allocate a buffer to hold the version information - let mut verblock = vec![0u8; verblock_size as usize]; + let mut ver_block = vec![0u8; ver_block_size as usize]; if GetFileVersionInfoW( kernel32_path.as_ptr(), 0, - verblock_size, - verblock.as_mut_ptr() as *mut _, + ver_block_size, + ver_block.as_mut_ptr() as *mut _, ) == 0 { return Err(std::io::Error::last_os_error()); @@ -540,7 +540,7 @@ mod sys { let mut ffi_ptr: *mut VS_FIXEDFILEINFO = std::ptr::null_mut(); let mut ffi_len: u32 = 0; if VerQueryValueW( - verblock.as_ptr() as *const _, + ver_block.as_ptr() as *const _, sub_block.as_ptr(), &mut ffi_ptr as *mut *mut VS_FIXEDFILEINFO as *mut *mut _, &mut ffi_len as *mut u32, @@ -572,10 +572,10 @@ mod sys { let mut version: OSVERSIONINFOEXW = unsafe { std::mem::zeroed() }; version.dwOSVersionInfoSize = std::mem::size_of::() as u32; let result = unsafe { - let osvi = &mut version as *mut OSVERSIONINFOEXW as *mut OSVERSIONINFOW; + let os_vi = &mut version as *mut OSVERSIONINFOEXW as *mut OSVERSIONINFOW; // SAFETY: GetVersionExW accepts a pointer of OSVERSIONINFOW, but windows-sys crate's type currently doesn't allow to do so. // https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getversionexw#parameters - GetVersionExW(osvi) + GetVersionExW(os_vi) }; if result == 0 { diff --git a/vm/src/stdlib/thread.rs b/vm/src/stdlib/thread.rs index ad80f1f1e1..0ee087e5e4 100644 --- a/vm/src/stdlib/thread.rs +++ b/vm/src/stdlib/thread.rs @@ -334,8 +334,8 @@ pub(crate) mod _thread { ); } } - SENTINELS.with(|sents| { - for lock in sents.replace(Default::default()) { + SENTINELS.with(|sentinels| { + for lock in sentinels.replace(Default::default()) { if lock.mu.is_locked() { unsafe { lock.mu.unlock() }; } @@ -360,7 +360,7 @@ pub(crate) mod _thread { #[pyfunction] fn _set_sentinel(vm: &VirtualMachine) -> PyRef { let lock = Lock { mu: RawMutex::INIT }.into_ref(&vm.ctx); - SENTINELS.with(|sents| sents.borrow_mut().push(lock.clone())); + SENTINELS.with(|sentinels| sentinels.borrow_mut().push(lock.clone())); lock } @@ -385,7 +385,7 @@ pub(crate) mod _thread { #[pyclass(with(GetAttr, SetAttr), flags(BASETYPE))] impl Local { - fn ldict(&self, vm: &VirtualMachine) -> PyDictRef { + fn l_dict(&self, vm: &VirtualMachine) -> PyDictRef { self.data.get_or(|| vm.ctx.new_dict()).clone() } @@ -401,12 +401,12 @@ pub(crate) mod _thread { impl GetAttr for Local { fn getattro(zelf: &Py, attr: &Py, vm: &VirtualMachine) -> PyResult { - let ldict = zelf.ldict(vm); + let l_dict = zelf.l_dict(vm); if attr.as_str() == "__dict__" { - Ok(ldict.into()) + Ok(l_dict.into()) } else { zelf.as_object() - .generic_getattr_opt(attr, Some(ldict), vm)? + .generic_getattr_opt(attr, Some(l_dict), vm)? .ok_or_else(|| { vm.new_attribute_error(format!( "{} has no attribute '{}'", @@ -431,7 +431,7 @@ pub(crate) mod _thread { zelf.class().name() ))) } else { - let dict = zelf.ldict(vm); + let dict = zelf.l_dict(vm); if let PySetterValue::Assign(value) = value { dict.set_item(attr, value, vm)?; } else { diff --git a/vm/src/stdlib/time.rs b/vm/src/stdlib/time.rs index 5f41304c19..cc543e9249 100644 --- a/vm/src/stdlib/time.rs +++ b/vm/src/stdlib/time.rs @@ -243,8 +243,8 @@ mod decl { let timestamp = match value { Either::A(float) => { let secs = float.trunc() as i64; - let nsecs = (float.fract() * 1e9) as u32; - DateTime::::from_timestamp(secs, nsecs) + let nano_secs = (float.fract() * 1e9) as u32; + DateTime::::from_timestamp(secs, nano_secs) } Either::B(int) => DateTime::::from_timestamp(int, 0), }; diff --git a/vm/src/types/slot.rs b/vm/src/types/slot.rs index 2d8c825817..e2121973ec 100644 --- a/vm/src/types/slot.rs +++ b/vm/src/types/slot.rs @@ -390,7 +390,7 @@ impl PyType { }}; } - macro_rules! toggle_subslot { + macro_rules! toggle_sub_slot { ($group:ident, $name:ident, $func:expr) => { self.slots .$group @@ -520,90 +520,90 @@ impl PyType { toggle_slot!(del, del_wrapper); } _ if name == identifier!(ctx, __int__) => { - toggle_subslot!(as_number, int, number_unary_op_wrapper!(__int__)); + toggle_sub_slot!(as_number, int, number_unary_op_wrapper!(__int__)); } _ if name == identifier!(ctx, __index__) => { - toggle_subslot!(as_number, index, number_unary_op_wrapper!(__index__)); + toggle_sub_slot!(as_number, index, number_unary_op_wrapper!(__index__)); } _ if name == identifier!(ctx, __float__) => { - toggle_subslot!(as_number, float, number_unary_op_wrapper!(__float__)); + toggle_sub_slot!(as_number, float, number_unary_op_wrapper!(__float__)); } _ if name == identifier!(ctx, __add__) => { - toggle_subslot!(as_number, add, number_binary_op_wrapper!(__add__)); + toggle_sub_slot!(as_number, add, number_binary_op_wrapper!(__add__)); } _ if name == identifier!(ctx, __radd__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, right_add, number_binary_right_op_wrapper!(__radd__) ); } _ if name == identifier!(ctx, __iadd__) => { - toggle_subslot!(as_number, inplace_add, number_binary_op_wrapper!(__iadd__)); + toggle_sub_slot!(as_number, inplace_add, number_binary_op_wrapper!(__iadd__)); } _ if name == identifier!(ctx, __sub__) => { - toggle_subslot!(as_number, subtract, number_binary_op_wrapper!(__sub__)); + toggle_sub_slot!(as_number, subtract, number_binary_op_wrapper!(__sub__)); } _ if name == identifier!(ctx, __rsub__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, right_subtract, number_binary_right_op_wrapper!(__rsub__) ); } _ if name == identifier!(ctx, __isub__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, inplace_subtract, number_binary_op_wrapper!(__isub__) ); } _ if name == identifier!(ctx, __mul__) => { - toggle_subslot!(as_number, multiply, number_binary_op_wrapper!(__mul__)); + toggle_sub_slot!(as_number, multiply, number_binary_op_wrapper!(__mul__)); } _ if name == identifier!(ctx, __rmul__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, right_multiply, number_binary_right_op_wrapper!(__rmul__) ); } _ if name == identifier!(ctx, __imul__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, inplace_multiply, number_binary_op_wrapper!(__imul__) ); } _ if name == identifier!(ctx, __mod__) => { - toggle_subslot!(as_number, remainder, number_binary_op_wrapper!(__mod__)); + toggle_sub_slot!(as_number, remainder, number_binary_op_wrapper!(__mod__)); } _ if name == identifier!(ctx, __rmod__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, right_remainder, number_binary_right_op_wrapper!(__rmod__) ); } _ if name == identifier!(ctx, __imod__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, inplace_remainder, number_binary_op_wrapper!(__imod__) ); } _ if name == identifier!(ctx, __divmod__) => { - toggle_subslot!(as_number, divmod, number_binary_op_wrapper!(__divmod__)); + toggle_sub_slot!(as_number, divmod, number_binary_op_wrapper!(__divmod__)); } _ if name == identifier!(ctx, __rdivmod__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, right_divmod, number_binary_right_op_wrapper!(__rdivmod__) ); } _ if name == identifier!(ctx, __pow__) => { - toggle_subslot!(as_number, power, |a, b, c, vm| { + toggle_sub_slot!(as_number, power, |a, b, c, vm| { let args = if vm.is_none(c) { vec![b.to_owned()] } else { @@ -613,7 +613,7 @@ impl PyType { }); } _ if name == identifier!(ctx, __rpow__) => { - toggle_subslot!(as_number, right_power, |a, b, c, vm| { + toggle_sub_slot!(as_number, right_power, |a, b, c, vm| { let args = if vm.is_none(c) { vec![a.to_owned()] } else { @@ -623,141 +623,141 @@ impl PyType { }); } _ if name == identifier!(ctx, __ipow__) => { - toggle_subslot!(as_number, inplace_power, |a, b, _, vm| { + toggle_sub_slot!(as_number, inplace_power, |a, b, _, vm| { vm.call_special_method(a, identifier!(vm, __ipow__), (b.to_owned(),)) }); } _ if name == identifier!(ctx, __lshift__) => { - toggle_subslot!(as_number, lshift, number_binary_op_wrapper!(__lshift__)); + toggle_sub_slot!(as_number, lshift, number_binary_op_wrapper!(__lshift__)); } _ if name == identifier!(ctx, __rlshift__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, right_lshift, number_binary_right_op_wrapper!(__rlshift__) ); } _ if name == identifier!(ctx, __ilshift__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, inplace_lshift, number_binary_op_wrapper!(__ilshift__) ); } _ if name == identifier!(ctx, __rshift__) => { - toggle_subslot!(as_number, rshift, number_binary_op_wrapper!(__rshift__)); + toggle_sub_slot!(as_number, rshift, number_binary_op_wrapper!(__rshift__)); } _ if name == identifier!(ctx, __rrshift__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, right_rshift, number_binary_right_op_wrapper!(__rrshift__) ); } _ if name == identifier!(ctx, __irshift__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, inplace_rshift, number_binary_op_wrapper!(__irshift__) ); } _ if name == identifier!(ctx, __and__) => { - toggle_subslot!(as_number, and, number_binary_op_wrapper!(__and__)); + toggle_sub_slot!(as_number, and, number_binary_op_wrapper!(__and__)); } _ if name == identifier!(ctx, __rand__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, right_and, number_binary_right_op_wrapper!(__rand__) ); } _ if name == identifier!(ctx, __iand__) => { - toggle_subslot!(as_number, inplace_and, number_binary_op_wrapper!(__iand__)); + toggle_sub_slot!(as_number, inplace_and, number_binary_op_wrapper!(__iand__)); } _ if name == identifier!(ctx, __xor__) => { - toggle_subslot!(as_number, xor, number_binary_op_wrapper!(__xor__)); + toggle_sub_slot!(as_number, xor, number_binary_op_wrapper!(__xor__)); } _ if name == identifier!(ctx, __rxor__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, right_xor, number_binary_right_op_wrapper!(__rxor__) ); } _ if name == identifier!(ctx, __ixor__) => { - toggle_subslot!(as_number, inplace_xor, number_binary_op_wrapper!(__ixor__)); + toggle_sub_slot!(as_number, inplace_xor, number_binary_op_wrapper!(__ixor__)); } _ if name == identifier!(ctx, __or__) => { - toggle_subslot!(as_number, or, number_binary_op_wrapper!(__or__)); + toggle_sub_slot!(as_number, or, number_binary_op_wrapper!(__or__)); } _ if name == identifier!(ctx, __ror__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, right_or, number_binary_right_op_wrapper!(__ror__) ); } _ if name == identifier!(ctx, __ior__) => { - toggle_subslot!(as_number, inplace_or, number_binary_op_wrapper!(__ior__)); + toggle_sub_slot!(as_number, inplace_or, number_binary_op_wrapper!(__ior__)); } _ if name == identifier!(ctx, __floordiv__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, floor_divide, number_binary_op_wrapper!(__floordiv__) ); } _ if name == identifier!(ctx, __rfloordiv__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, right_floor_divide, number_binary_right_op_wrapper!(__rfloordiv__) ); } _ if name == identifier!(ctx, __ifloordiv__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, inplace_floor_divide, number_binary_op_wrapper!(__ifloordiv__) ); } _ if name == identifier!(ctx, __truediv__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, true_divide, number_binary_op_wrapper!(__truediv__) ); } _ if name == identifier!(ctx, __rtruediv__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, right_true_divide, number_binary_right_op_wrapper!(__rtruediv__) ); } _ if name == identifier!(ctx, __itruediv__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, inplace_true_divide, number_binary_op_wrapper!(__itruediv__) ); } _ if name == identifier!(ctx, __matmul__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, matrix_multiply, number_binary_op_wrapper!(__matmul__) ); } _ if name == identifier!(ctx, __rmatmul__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, right_matrix_multiply, number_binary_right_op_wrapper!(__rmatmul__) ); } _ if name == identifier!(ctx, __imatmul__) => { - toggle_subslot!( + toggle_sub_slot!( as_number, inplace_matrix_multiply, number_binary_op_wrapper!(__imatmul__) diff --git a/vm/src/vm/mod.rs b/vm/src/vm/mod.rs index 752943319d..4b1cece8c5 100644 --- a/vm/src/vm/mod.rs +++ b/vm/src/vm/mod.rs @@ -648,7 +648,7 @@ impl VirtualMachine { list_borrow = value.payload::().unwrap().borrow_vec(); &list_borrow } else { - return self.map_pyiter(value, func); + return self.map_py_iter(value, func); }; slice.iter().map(|obj| func(obj.clone())).collect() } @@ -682,12 +682,12 @@ impl VirtualMachine { ref t @ PyTuple => Ok(t.iter().cloned().map(f).collect()), // TODO: put internal iterable type obj => { - Ok(self.map_pyiter(obj, f)) + Ok(self.map_py_iter(obj, f)) } }) } - fn map_pyiter(&self, value: &PyObject, mut f: F) -> PyResult> + fn map_py_iter(&self, value: &PyObject, mut f: F) -> PyResult> where F: FnMut(PyObjectRef) -> PyResult, { diff --git a/vm/sre_engine/benches/benches.rs b/vm/sre_engine/benches/benches.rs index ee49b036de..e2372d783e 100644 --- a/vm/sre_engine/benches/benches.rs +++ b/vm/sre_engine/benches/benches.rs @@ -92,20 +92,20 @@ fn basic(c: &mut Criterion) { let (req, mut state) = p.state(s); assert!(state.search(req)); let (req, mut state) = p.state(s); - assert!(state.pymatch(&req)); + assert!(state.py_match(&req)); let (mut req, mut state) = p.state(s); req.match_all = true; - assert!(state.pymatch(&req)); + assert!(state.py_match(&req)); let s2 = format!("{}{}{}", " ".repeat(10000), s, " ".repeat(10000)); let (req, mut state) = p.state_range(s2.as_str(), 0..usize::MAX); assert!(state.search(req)); let (req, mut state) = p.state_range(s2.as_str(), 10000..usize::MAX); - assert!(state.pymatch(&req)); + assert!(state.py_match(&req)); let (req, mut state) = p.state_range(s2.as_str(), 10000..10000 + s.len()); - assert!(state.pymatch(&req)); + assert!(state.py_match(&req)); let (mut req, mut state) = p.state_range(s2.as_str(), 10000..10000 + s.len()); req.match_all = true; - assert!(state.pymatch(&req)); + assert!(state.py_match(&req)); }); }); } diff --git a/vm/sre_engine/src/engine.rs b/vm/sre_engine/src/engine.rs index 9b27f55031..1e0b15fd01 100644 --- a/vm/sre_engine/src/engine.rs +++ b/vm/sre_engine/src/engine.rs @@ -129,7 +129,7 @@ impl State { req.string.adjust_cursor(&mut self.cursor, start); } - pub fn pymatch(&mut self, req: &Request<'_, S>) -> bool { + pub fn py_match(&mut self, req: &Request<'_, S>) -> bool { self.start = req.start; req.string.adjust_cursor(&mut self.cursor, self.start); diff --git a/vm/sre_engine/tests/tests.rs b/vm/sre_engine/tests/tests.rs index 5499afa281..0ada32e5db 100644 --- a/vm/sre_engine/tests/tests.rs +++ b/vm/sre_engine/tests/tests.rs @@ -1,3 +1,4 @@ +// cspell:disable use rustpython_sre_engine::{Request, State, StrDrive}; struct Pattern { @@ -21,7 +22,7 @@ fn test_2427() { #[rustfmt::skip] let lookbehind = Pattern { pattern: "(?x)++x", code: &[14, 4, 0, 2, 4294967295, 28, 8, 1, 4294967295, 27, 4, 16, 120, 1, 1, 16, 120, 1] }; // END GENERATED let (req, mut state) = p.state("xxx"); - assert!(!state.pymatch(&req)); + assert!(!state.py_match(&req)); } #[test] @@ -162,7 +163,7 @@ fn test_bug_20998() { // END GENERATED let (mut req, mut state) = p.state("ABC"); req.match_all = true; - assert!(state.pymatch(&req)); + assert!(state.py_match(&req)); assert_eq!(state.cursor.position, 3); } @@ -173,7 +174,7 @@ fn test_bigcharset() { #[rustfmt::skip] let p = Pattern { pattern: "[a-z]*", code: &[14, 4, 0, 0, 4294967295, 24, 97, 0, 4294967295, 39, 92, 10, 3, 33685760, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 33686018, 0, 0, 0, 134217726, 0, 0, 0, 0, 0, 131072, 0, 2147483648, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1] }; // END GENERATED let (req, mut state) = p.state("x "); - assert!(state.pymatch(&req)); + assert!(state.py_match(&req)); assert_eq!(state.cursor.position, 1); } diff --git a/wasm/lib/src/convert.rs b/wasm/lib/src/convert.rs index 4f6e4db35c..bccf5564fa 100644 --- a/wasm/lib/src/convert.rs +++ b/wasm/lib/src/convert.rs @@ -33,8 +33,8 @@ extern "C" { } pub fn py_err_to_js_err(vm: &VirtualMachine, py_err: &PyBaseExceptionRef) -> JsValue { - let jserr = vm.try_class("_js", "JSError").ok(); - let js_arg = if jserr.is_some_and(|jserr| py_err.fast_isinstance(&jserr)) { + let js_err = vm.try_class("_js", "JSError").ok(); + let js_arg = if js_err.is_some_and(|js_err| py_err.fast_isinstance(&js_err)) { py_err.get_arg(0) } else { None @@ -116,7 +116,7 @@ pub fn py_to_js(vm: &VirtualMachine, py_obj: PyObjectRef) -> JsValue { } } let result = py_obj.call(py_func_args, vm); - pyresult_to_jsresult(vm, result) + pyresult_to_js_result(vm, result) }) }; let closure = Closure::wrap(Box::new(closure) @@ -164,7 +164,7 @@ pub fn object_entries(obj: &Object) -> impl Iterator Result { +pub fn pyresult_to_js_result(vm: &VirtualMachine, result: PyResult) -> Result { result .map(|value| py_to_js(vm, value)) .map_err(|err| py_err_to_js_err(vm, &err)) diff --git a/wasm/lib/src/js_module.rs b/wasm/lib/src/js_module.rs index a5b7281481..f159c467d0 100644 --- a/wasm/lib/src/js_module.rs +++ b/wasm/lib/src/js_module.rs @@ -326,7 +326,7 @@ mod _js { .map(|arg| PyJsValue::new(arg).into_pyobject(vm)), ); let res = py_obj.call(pyargs, vm); - convert::pyresult_to_jsresult(vm, res) + convert::pyresult_to_js_result(vm, res) }) }; let closure: ClosureType = if once { @@ -500,7 +500,7 @@ mod _js { Some(on_fulfill) => stored_vm.interp.enter(move |vm| { let val = convert::js_to_py(vm, val); let res = on_fulfill.invoke((val,), vm); - convert::pyresult_to_jsresult(vm, res) + convert::pyresult_to_js_result(vm, res) }), None => Ok(val), }, @@ -508,7 +508,7 @@ mod _js { Some(on_reject) => stored_vm.interp.enter(move |vm| { let err = new_js_error(vm, err); let res = on_reject.invoke((err,), vm); - convert::pyresult_to_jsresult(vm, res) + convert::pyresult_to_js_result(vm, res) }), None => Err(err), }, diff --git a/wasm/lib/src/vm_class.rs b/wasm/lib/src/vm_class.rs index c04877f7e3..d84603e986 100644 --- a/wasm/lib/src/vm_class.rs +++ b/wasm/lib/src/vm_class.rs @@ -340,7 +340,7 @@ impl WASMVirtualMachine { let code = vm.compile(source, mode, source_path); let code = code.map_err(convert::syntax_err)?; let result = vm.run_code_obj(code, scope.clone()); - convert::pyresult_to_jsresult(vm, result) + convert::pyresult_to_js_result(vm, result) })? } From 883e0cab292383a6e70db7a3150f36c62e478988 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Thu, 3 Apr 2025 09:23:06 -0700 Subject: [PATCH 011/126] build docs on CI and deny warnings --- .github/workflows/ci.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 05516c9270..b728c81bc2 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -325,6 +325,8 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} - name: install ruff run: python -m pip install ruff==0.0.291 # astral-sh/ruff#7778 + - name: Ensure docs generate no warnings + run: cargo doc -- -Dwarnings - name: run python lint run: ruff extra_tests wasm examples --exclude='./.*',./Lib,./vm/Lib,./benches/ --select=E9,F63,F7,F82 --show-source - name: install prettier From 5c854fc6902177f81dc8bb662b36b713d67615ea Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Thu, 3 Apr 2025 09:23:22 -0700 Subject: [PATCH 012/126] clear out warnings --- common/src/linked_list.rs | 2 +- src/lib.rs | 4 ++-- src/settings.rs | 4 ++++ src/shell.rs | 2 ++ stdlib/src/pystruct.rs | 4 ++-- vm/src/builtins/mod.rs | 4 ++-- vm/src/dict_inner.rs | 6 +++--- vm/src/function/builtin.rs | 2 +- vm/src/import.rs | 5 ++--- vm/src/lib.rs | 7 ++++--- vm/src/object/core.rs | 16 ++++++++-------- vm/src/object/traverse.rs | 6 +++--- vm/src/prelude.rs | 4 ++++ vm/src/protocol/buffer.rs | 2 +- vm/src/protocol/object.rs | 2 +- vm/src/readline.rs | 5 +++++ vm/src/suggestion.rs | 3 +++ vm/src/version.rs | 3 +-- vm/src/vm/interpreter.rs | 13 ++++++------- vm/src/vm/mod.rs | 4 ++-- vm/src/vm/vm_ops.rs | 4 ++-- wtf8/src/lib.rs | 2 +- 22 files changed, 60 insertions(+), 44 deletions(-) diff --git a/common/src/linked_list.rs b/common/src/linked_list.rs index 7f55d727fb..83577b71d1 100644 --- a/common/src/linked_list.rs +++ b/common/src/linked_list.rs @@ -1,4 +1,4 @@ -//! This module is modified from tokio::util::linked_list: https://github.com/tokio-rs/tokio/blob/master/tokio/src/util/linked_list.rs +//! This module is modified from tokio::util::linked_list: //! Tokio is licensed under the MIT license: //! //! Copyright (c) 2021 Tokio Contributors diff --git a/src/lib.rs b/src/lib.rs index e415d847cf..98de3de5c6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,9 @@ //! This is the `rustpython` binary. If you're looking to embed RustPython into your application, -//! you're likely looking for the [`rustpython-vm`](https://docs.rs/rustpython-vm) crate. +//! you're likely looking for the [`rustpython_vm`] crate. //! //! You can install `rustpython` with `cargo install rustpython`, or if you'd like to inject your //! own native modules you can make a binary crate that depends on the `rustpython` crate (and -//! probably `rustpython-vm`, too), and make a `main.rs` that looks like: +//! probably [`rustpython_vm`], too), and make a `main.rs` that looks like: //! //! ```no_run //! use rustpython_vm::{pymodule, py_freeze}; diff --git a/src/settings.rs b/src/settings.rs index 76c46ac43a..00ee55bddd 100644 --- a/src/settings.rs +++ b/src/settings.rs @@ -13,7 +13,11 @@ pub enum RunMode { } pub enum InstallPipMode { + /// Install pip using the ensurepip pip module. This has a higher chance of + /// success, but may not install the latest version of pip. Ensurepip, + /// Install pip using the get-pip.py script, which retrieves the latest pip version. + /// This can be broken due to incompatibilities with cpython. GetPip, } diff --git a/src/shell.rs b/src/shell.rs index 98ee6eee21..04090a49fc 100644 --- a/src/shell.rs +++ b/src/shell.rs @@ -71,6 +71,8 @@ fn shell_exec( } } + +/// Enter a repl loop pub fn run_shell(vm: &VirtualMachine, scope: Scope) -> PyResult<()> { let mut repl = Readline::new(helper::ShellHelper::new(vm, scope.globals.clone())); let mut full_input = String::new(); diff --git a/stdlib/src/pystruct.rs b/stdlib/src/pystruct.rs index 220970dd20..9426470911 100644 --- a/stdlib/src/pystruct.rs +++ b/stdlib/src/pystruct.rs @@ -1,9 +1,9 @@ //! Python struct module. //! -//! Docs: https://docs.python.org/3/library/struct.html +//! Docs: //! //! Use this rust module to do byte packing: -//! https://docs.rs/byteorder/1.2.6/byteorder/ +//! pub(crate) use _struct::make_module; diff --git a/vm/src/builtins/mod.rs b/vm/src/builtins/mod.rs index ae3b7eea2a..8540e6887c 100644 --- a/vm/src/builtins/mod.rs +++ b/vm/src/builtins/mod.rs @@ -1,6 +1,6 @@ //! This package contains the python basic/builtin types -//! 7 common PyRef type aliases are exposed - PyBytesRef, PyDictRef, PyIntRef, PyListRef, PyStrRef, PyTypeRef, PyTupleRef -//! Do not add more PyRef type aliases. They will be rare enough to use directly PyRef. +//! 7 common PyRef type aliases are exposed - [`PyBytesRef`], [`PyDictRef`], [`PyIntRef`], [`PyListRef`], [`PyStrRef`], [`PyTypeRef`], [`PyTupleRef`] +//! Do not add more PyRef type aliases. They will be rare enough to use directly `PyRef`. pub(crate) mod asyncgenerator; pub use asyncgenerator::PyAsyncGen; diff --git a/vm/src/dict_inner.rs b/vm/src/dict_inner.rs index c49ab752de..56ebc5ebaf 100644 --- a/vm/src/dict_inner.rs +++ b/vm/src/dict_inner.rs @@ -1,7 +1,7 @@ //! Ordered dictionary implementation. -//! Inspired by: https://morepypy.blogspot.com/2015/01/faster-more-memory-efficient-and-more.html -//! And: https://www.youtube.com/watch?v=p33CVV29OG8 -//! And: http://code.activestate.com/recipes/578375/ +//! Inspired by: +//! And: +//! And: use crate::{ AsObject, Py, PyExact, PyObject, PyObjectRef, PyRefExact, PyResult, VirtualMachine, diff --git a/vm/src/function/builtin.rs b/vm/src/function/builtin.rs index b8a408453d..186dc7aeb8 100644 --- a/vm/src/function/builtin.rs +++ b/vm/src/function/builtin.rs @@ -65,7 +65,7 @@ const fn zst_ref_out_of_thin_air(x: T) -> &'static T { } } -/// Get the [`STATIC_FUNC`](IntoPyNativeFn::STATIC_FUNC) of the passed function. The same +/// Get the STATIC_FUNC of the passed function. The same /// requirements of zero-sizedness apply, see that documentation for details. /// /// Equivalent to [`IntoPyNativeFn::into_func()`], but usable in a const context. This is only diff --git a/vm/src/import.rs b/vm/src/import.rs index 416c40a844..90aadbdbf2 100644 --- a/vm/src/import.rs +++ b/vm/src/import.rs @@ -1,6 +1,5 @@ -/* - * Import mechanics - */ +//! Import mechanics + use crate::{ AsObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromObject, builtins::{PyBaseExceptionRef, PyCode, list, traceback::PyTraceback}, diff --git a/vm/src/lib.rs b/vm/src/lib.rs index 9561a9cc23..8ae5ff15db 100644 --- a/vm/src/lib.rs +++ b/vm/src/lib.rs @@ -1,9 +1,10 @@ -//! This crate contains most python logic. +//! This crate contains most of the python logic. //! -//! - Compilation -//! - Bytecode +//! - Interpreter //! - Import mechanics //! - Base objects +//! +//! Some stdlib modules are implemented here, but most of them are in the `rustpython-stdlib` module. The // to allow `mod foo {}` in foo.rs; clippy thinks this is a mistake/misunderstanding of // how `mod` works, but we want this sometimes for pymodule declarations diff --git a/vm/src/object/core.rs b/vm/src/object/core.rs index a6049884d8..8edcb4dfd6 100644 --- a/vm/src/object/core.rs +++ b/vm/src/object/core.rs @@ -1,14 +1,14 @@ //! Essential types for object models //! -//! +-------------------------+--------------+---------------+ -//! | Management | Typed | Untyped | -//! +-------------------------+--------------+---------------+ -//! | Interpreter-independent | Py | PyObject | -//! | Reference-counted | PyRef | PyObjectRef | -//! | Weak | PyWeakRef | PyRef | -//! +-------------------------+--------------+---------------+ +//! +-------------------------+--------------+-----------------------+ +//! | Management | Typed | Untyped | +//! +-------------------------+------------------+-------------------+ +//! | Interpreter-independent | [`Py`] | [`PyObject`] | +//! | Reference-counted | [`PyRef`] | [`PyObjectRef`] | +//! | Weak | [`PyWeakRef`] | [`PyRef`] | +//! +-------------------------+--------------+-----------------------+ //! -//! PyRef may looking like to be called as PyObjectWeak by the rule, +//! [`PyRef`] may looking like to be called as PyObjectWeak by the rule, //! but not to do to remember it is a PyRef object. use super::{ PyAtomicRef, diff --git a/vm/src/object/traverse.rs b/vm/src/object/traverse.rs index c105d23feb..46e5daff05 100644 --- a/vm/src/object/traverse.rs +++ b/vm/src/object/traverse.rs @@ -17,16 +17,16 @@ pub trait MaybeTraverse { fn try_traverse(&self, traverse_fn: &mut TraverseFn<'_>); } -/// Type that need traverse it's children should impl `Traverse`(Not `MaybeTraverse`) +/// Type that need traverse it's children should impl [`Traverse`] (not [`MaybeTraverse`]) /// # Safety -/// Please carefully read [`traverse()`] and follow the guideline +/// Please carefully read [`Traverse::traverse()`] and follow the guideline pub unsafe trait Traverse { /// impl `traverse()` with caution! Following those guideline so traverse doesn't cause memory error!: /// - Make sure that every owned object(Every PyObjectRef/PyRef) is called with traverse_fn **at most once**. /// If some field is not called, the worst results is just memory leak, /// but if some field is called repeatedly, panic and deadlock can happen. /// - /// - _**DO NOT**_ clone a `PyObjectRef` or `PyRef` in `traverse()` + /// - _**DO NOT**_ clone a [`PyObjectRef`] or [`PyRef`] in [`Traverse::traverse()`] fn traverse(&self, traverse_fn: &mut TraverseFn<'_>); } diff --git a/vm/src/prelude.rs b/vm/src/prelude.rs index 0bd0fe88be..b277f1468a 100644 --- a/vm/src/prelude.rs +++ b/vm/src/prelude.rs @@ -1,3 +1,7 @@ +//! The prelude imports the various objects and traits. +//! +//! The intention is that one can include `use rustpython_vm::prelude::*`. + pub use crate::{ object::{ AsObject, Py, PyExact, PyObject, PyObjectRef, PyPayload, PyRef, PyRefExact, PyResult, diff --git a/vm/src/protocol/buffer.rs b/vm/src/protocol/buffer.rs index a3b7f125f5..fcd44c11d3 100644 --- a/vm/src/protocol/buffer.rs +++ b/vm/src/protocol/buffer.rs @@ -1,5 +1,5 @@ //! Buffer protocol -//! https://docs.python.org/3/c-api/buffer.html +//! use crate::{ Py, PyObject, PyObjectRef, PyPayload, PyRef, PyResult, TryFromBorrowedObject, VirtualMachine, diff --git a/vm/src/protocol/object.rs b/vm/src/protocol/object.rs index 1e972eb540..eab24f82d0 100644 --- a/vm/src/protocol/object.rs +++ b/vm/src/protocol/object.rs @@ -1,5 +1,5 @@ //! Object Protocol -//! https://docs.python.org/3/c-api/object.html +//! use crate::{ AsObject, Py, PyObject, PyObjectRef, PyResult, TryFromObject, VirtualMachine, diff --git a/vm/src/readline.rs b/vm/src/readline.rs index 53647270e1..54a77f1289 100644 --- a/vm/src/readline.rs +++ b/vm/src/readline.rs @@ -1,3 +1,8 @@ +//! Readline interface for REPLs +//! +//! This module provides a common interface for reading lines from the console, with support for history and completion. +//! It uses the [`rustyline`] crate on non-WASM platforms and a custom implementation on WASM platforms. + use std::{io, path::Path}; type OtherError = Box; diff --git a/vm/src/suggestion.rs b/vm/src/suggestion.rs index 2bc9992d43..01f53d70ca 100644 --- a/vm/src/suggestion.rs +++ b/vm/src/suggestion.rs @@ -1,3 +1,6 @@ +//! This module provides functionality to suggest similar names for attributes or variables. +//! This is used during tracebacks. + use crate::{ AsObject, Py, PyObjectRef, VirtualMachine, builtins::{PyStr, PyStrRef}, diff --git a/vm/src/version.rs b/vm/src/version.rs index 7413f8f139..9d472e8be0 100644 --- a/vm/src/version.rs +++ b/vm/src/version.rs @@ -1,5 +1,4 @@ -/* Several function to retrieve version information. - */ +//! Several function to retrieve version information. use chrono::{Local, prelude::DateTime}; use std::time::{Duration, UNIX_EPOCH}; diff --git a/vm/src/vm/interpreter.rs b/vm/src/vm/interpreter.rs index cc669e0661..02c71bf136 100644 --- a/vm/src/vm/interpreter.rs +++ b/vm/src/vm/interpreter.rs @@ -64,7 +64,7 @@ impl Interpreter { /// /// To finalize the vm once all desired `enter`s are called, calling `finalize` will be helpful. /// - /// See also [`run`] for managed way to run the interpreter. + /// See also [`Interpreter::run`] for managed way to run the interpreter. pub fn enter(&self, f: F) -> R where F: FnOnce(&VirtualMachine) -> R, @@ -72,13 +72,12 @@ impl Interpreter { thread::enter_vm(&self.vm, || f(&self.vm)) } - /// Run [`enter`] and call `expect_pyresult` for the result. + /// Run [`Interpreter::enter`] and call [`VirtualMachine::expect_pyresult`] for the result. /// /// This function is useful when you want to expect a result from the function, /// but also print useful panic information when exception raised. /// - /// See [`enter`] for more information. - /// See [`expect_pyresult`] for more information. + /// See also [`Interpreter::enter`] and [`VirtualMachine::expect_pyresult`] for more information. pub fn enter_and_expect(&self, f: F, msg: &str) -> R where F: FnOnce(&VirtualMachine) -> PyResult, @@ -92,11 +91,11 @@ impl Interpreter { /// Run a function with the main virtual machine and return exit code. /// /// To enter vm context only once and safely terminate the vm, this function is preferred. - /// Unlike [`enter`], `run` calls finalize and returns exit code. + /// Unlike [`Interpreter::enter`], `run` calls finalize and returns exit code. /// You will not be able to obtain Python exception in this way. /// - /// See [`finalize`] for the finalization steps. - /// See also [`enter`] for pure function call to obtain Python exception. + /// See [`Interpreter::finalize`] for the finalization steps. + /// See also [`Interpreter::enter`] for pure function call to obtain Python exception. pub fn run(self, f: F) -> u8 where F: FnOnce(&VirtualMachine) -> PyResult<()>, diff --git a/vm/src/vm/mod.rs b/vm/src/vm/mod.rs index 4b1cece8c5..7baaae7770 100644 --- a/vm/src/vm/mod.rs +++ b/vm/src/vm/mod.rs @@ -61,7 +61,7 @@ pub const MAX_MEMORY_SIZE: usize = isize::MAX as usize; /// Top level container of a python virtual machine. In theory you could /// create more instances of this struct and have them operate fully isolated. /// -/// To construct this, please refer to the [`Interpreter`](Interpreter) +/// To construct this, please refer to the [`Interpreter`] pub struct VirtualMachine { pub builtins: PyRef, pub sys_module: PyRef, @@ -564,7 +564,7 @@ impl VirtualMachine { /// Call Python __import__ function without from_list. /// Roughly equivalent to `import module_name` or `import top.submodule`. /// - /// See also [`import_from`] for more advanced import. + /// See also [`VirtualMachine::import_from`] for more advanced import. /// See also [`rustpython_vm::import::import_source`] and other primitive import functions. #[inline] pub fn import<'a>(&self, module_name: impl AsPyStr<'a>, level: usize) -> PyResult { diff --git a/vm/src/vm/vm_ops.rs b/vm/src/vm/vm_ops.rs index c6be959a60..df33e822aa 100644 --- a/vm/src/vm/vm_ops.rs +++ b/vm/src/vm/vm_ops.rs @@ -152,9 +152,9 @@ impl VirtualMachine { /// Calling scheme used for binary operations: /// /// Order operations are tried until either a valid result or error: - /// b.rop(b,a)[*], a.op(a,b), b.rop(b,a) + /// `b.rop(b,a)[*], a.op(a,b), b.rop(b,a)` /// - /// [*] only when Py_TYPE(a) != Py_TYPE(b) && Py_TYPE(b) is a subclass of Py_TYPE(a) + /// `[*]` - only when Py_TYPE(a) != Py_TYPE(b) && Py_TYPE(b) is a subclass of Py_TYPE(a) pub fn binary_op1(&self, a: &PyObject, b: &PyObject, op_slot: PyNumberBinaryOp) -> PyResult { let class_a = a.class(); let class_b = b.class(); diff --git a/wtf8/src/lib.rs b/wtf8/src/lib.rs index 64ea42d06e..3ba28a5146 100644 --- a/wtf8/src/lib.rs +++ b/wtf8/src/lib.rs @@ -19,7 +19,7 @@ //! //! We use WTF-8 over something more similar to CPython's string implementation //! because of its compatibility with UTF-8, meaning that in the case where a -//! string has no surrogates, it can be viewed as a UTF-8 Rust [`str`] without +//! string has no surrogates, it can be viewed as a UTF-8 Rust [`prim@str`] without //! needing any copies or re-encoding. //! //! This implementation is mostly copied from the WTF-8 implementation in the From 36cce6b17464bb7744daf5e0d44eb24b223174ba Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Sat, 5 Apr 2025 00:14:03 +0900 Subject: [PATCH 013/126] run fmt --- src/shell.rs | 1 - vm/src/lib.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/shell.rs b/src/shell.rs index 04090a49fc..f733e03ddc 100644 --- a/src/shell.rs +++ b/src/shell.rs @@ -71,7 +71,6 @@ fn shell_exec( } } - /// Enter a repl loop pub fn run_shell(vm: &VirtualMachine, scope: Scope) -> PyResult<()> { let mut repl = Readline::new(helper::ShellHelper::new(vm, scope.globals.clone())); diff --git a/vm/src/lib.rs b/vm/src/lib.rs index 8ae5ff15db..de0042c619 100644 --- a/vm/src/lib.rs +++ b/vm/src/lib.rs @@ -3,7 +3,7 @@ //! - Interpreter //! - Import mechanics //! - Base objects -//! +//! //! Some stdlib modules are implemented here, but most of them are in the `rustpython-stdlib` module. The // to allow `mod foo {}` in foo.rs; clippy thinks this is a mistake/misunderstanding of From ab4dffb53ce2bb80a37efc000328c6d596296b18 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Fri, 4 Apr 2025 11:37:40 -0700 Subject: [PATCH 014/126] this should just fail if warnings happen because of RUSTFLAGS --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b728c81bc2..afd3201e28 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -326,7 +326,7 @@ jobs: - name: install ruff run: python -m pip install ruff==0.0.291 # astral-sh/ruff#7778 - name: Ensure docs generate no warnings - run: cargo doc -- -Dwarnings + run: cargo doc - name: run python lint run: ruff extra_tests wasm examples --exclude='./.*',./Lib,./vm/Lib,./benches/ --select=E9,F63,F7,F82 --show-source - name: install prettier From c2665e38ba636919d8deff103eef2d5215f5c05d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 4 Apr 2025 20:54:25 +0000 Subject: [PATCH 015/126] Bump openssl from 0.10.71 to 0.10.72 Bumps [openssl](https://github.com/sfackler/rust-openssl) from 0.10.71 to 0.10.72. - [Release notes](https://github.com/sfackler/rust-openssl/releases) - [Commits](https://github.com/sfackler/rust-openssl/compare/openssl-v0.10.71...openssl-v0.10.72) --- updated-dependencies: - dependency-name: openssl dependency-version: 0.10.72 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- Cargo.lock | 10 +++++----- stdlib/Cargo.toml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 577ef516bc..706f70f2d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1336,7 +1336,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -1659,9 +1659,9 @@ checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" [[package]] name = "openssl" -version = "0.10.71" +version = "0.10.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e14130c6a98cd258fdcb0fb6d744152343ff729cbfcb28c656a9d12b999fbcd" +checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" dependencies = [ "bitflags 2.8.0", "cfg-if", @@ -1700,9 +1700,9 @@ dependencies = [ [[package]] name = "openssl-sys" -version = "0.9.106" +version = "0.9.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb61ea9811cc39e3c2069f40b8b8e2e70d8569b361f879786cc7ed48b777cdd" +checksum = "8288979acd84749c744a9014b4382d42b8f7b2592847b5afb2ed29e5d16ede07" dependencies = [ "cc", "libc", diff --git a/stdlib/Cargo.toml b/stdlib/Cargo.toml index 0ec23bf132..a8393206d6 100644 --- a/stdlib/Cargo.toml +++ b/stdlib/Cargo.toml @@ -106,7 +106,7 @@ rustix = { workspace = true } gethostname = "1.0.0" socket2 = { version = "0.5.8", features = ["all"] } dns-lookup = "2" -openssl = { version = "0.10.66", optional = true } +openssl = { version = "0.10.72", optional = true } openssl-sys = { version = "0.9.80", optional = true } openssl-probe = { version = "0.1.5", optional = true } foreign-types-shared = { version = "0.1.1", optional = true } From d800a6bb9852bdf5c8cadd291d7a8969a9a81301 Mon Sep 17 00:00:00 2001 From: Hanif Ariffin Date: Sat, 5 Apr 2025 13:53:40 +0800 Subject: [PATCH 016/126] Update test_math from CPython 3.13.2 (#5610) Implemnted fma in math module. --- Lib/test/{ => mathdata}/cmath_testcases.txt | 3 + Lib/test/{ => mathdata}/ieee754.txt | 0 Lib/test/{ => mathdata}/math_testcases.txt | 0 Lib/test/test_math.py | 244 +++++++++++++++++++- stdlib/src/math.rs | 24 ++ 5 files changed, 268 insertions(+), 3 deletions(-) rename Lib/test/{ => mathdata}/cmath_testcases.txt (99%) rename Lib/test/{ => mathdata}/ieee754.txt (100%) rename Lib/test/{ => mathdata}/math_testcases.txt (100%) diff --git a/Lib/test/cmath_testcases.txt b/Lib/test/mathdata/cmath_testcases.txt similarity index 99% rename from Lib/test/cmath_testcases.txt rename to Lib/test/mathdata/cmath_testcases.txt index dd7e458ddc..0165e17634 100644 --- a/Lib/test/cmath_testcases.txt +++ b/Lib/test/mathdata/cmath_testcases.txt @@ -1536,6 +1536,7 @@ sqrt0141 sqrt -1.797e+308 -9.9999999999999999e+306 -> 3.7284476432057307e+152 -1 sqrt0150 sqrt 1.7976931348623157e+308 0.0 -> 1.3407807929942596355e+154 0.0 sqrt0151 sqrt 2.2250738585072014e-308 0.0 -> 1.4916681462400413487e-154 0.0 sqrt0152 sqrt 5e-324 0.0 -> 2.2227587494850774834e-162 0.0 +sqrt0153 sqrt 5e-324 1.0 -> 0.7071067811865476 0.7071067811865476 -- special values sqrt1000 sqrt 0.0 0.0 -> 0.0 0.0 @@ -1744,6 +1745,7 @@ cosh0023 cosh 2.218885944363501 2.0015727395883687 -> -1.94294321081968 4.129026 -- large real part cosh0030 cosh 710.5 2.3519999999999999 -> -1.2967465239355998e+308 1.3076707908857333e+308 cosh0031 cosh -710.5 0.69999999999999996 -> 1.4085466381392499e+308 -1.1864024666450239e+308 +cosh0032 cosh 720.0 0.0 -> inf 0.0 overflow -- Additional real values (mpmath) cosh0050 cosh 1e-150 0.0 -> 1.0 0.0 @@ -1853,6 +1855,7 @@ sinh0023 sinh 0.043713693678420068 0.22512549887532657 -> 0.042624198673416713 0 -- large real part sinh0030 sinh 710.5 -2.3999999999999999 -> -1.3579970564885919e+308 -1.24394470907798e+308 sinh0031 sinh -710.5 0.80000000000000004 -> -1.2830671601735164e+308 1.3210954193997678e+308 +sinh0032 sinh 720.0 0.0 -> inf 0.0 overflow -- Additional real values (mpmath) sinh0050 sinh 1e-100 0.0 -> 1.00000000000000002e-100 0.0 diff --git a/Lib/test/ieee754.txt b/Lib/test/mathdata/ieee754.txt similarity index 100% rename from Lib/test/ieee754.txt rename to Lib/test/mathdata/ieee754.txt diff --git a/Lib/test/math_testcases.txt b/Lib/test/mathdata/math_testcases.txt similarity index 100% rename from Lib/test/math_testcases.txt rename to Lib/test/mathdata/math_testcases.txt diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index fa79456ed4..bb02041644 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -33,8 +33,8 @@ else: file = __file__ test_dir = os.path.dirname(file) or os.curdir -math_testcases = os.path.join(test_dir, 'math_testcases.txt') -test_file = os.path.join(test_dir, 'cmath_testcases.txt') +math_testcases = os.path.join(test_dir, 'mathdata', 'math_testcases.txt') +test_file = os.path.join(test_dir, 'mathdata', 'cmath_testcases.txt') def to_ulps(x): @@ -2628,9 +2628,247 @@ def test_fractions(self): self.assertAllNotClose(fraction_examples, rel_tol=1e-9) +class FMATests(unittest.TestCase): + """ Tests for math.fma. """ + + def test_fma_nan_results(self): + # Selected representative values. + values = [ + -math.inf, -1e300, -2.3, -1e-300, -0.0, + 0.0, 1e-300, 2.3, 1e300, math.inf, math.nan + ] + + # If any input is a NaN, the result should be a NaN, too. + for a, b in itertools.product(values, repeat=2): + self.assertIsNaN(math.fma(math.nan, a, b)) + self.assertIsNaN(math.fma(a, math.nan, b)) + self.assertIsNaN(math.fma(a, b, math.nan)) + + def test_fma_infinities(self): + # Cases involving infinite inputs or results. + positives = [1e-300, 2.3, 1e300, math.inf] + finites = [-1e300, -2.3, -1e-300, -0.0, 0.0, 1e-300, 2.3, 1e300] + non_nans = [-math.inf, -2.3, -0.0, 0.0, 2.3, math.inf] + + # ValueError due to inf * 0 computation. + for c in non_nans: + for infinity in [math.inf, -math.inf]: + for zero in [0.0, -0.0]: + with self.assertRaises(ValueError): + math.fma(infinity, zero, c) + with self.assertRaises(ValueError): + math.fma(zero, infinity, c) + + # ValueError when a*b and c both infinite of opposite signs. + for b in positives: + with self.assertRaises(ValueError): + math.fma(math.inf, b, -math.inf) + with self.assertRaises(ValueError): + math.fma(math.inf, -b, math.inf) + with self.assertRaises(ValueError): + math.fma(-math.inf, -b, -math.inf) + with self.assertRaises(ValueError): + math.fma(-math.inf, b, math.inf) + with self.assertRaises(ValueError): + math.fma(b, math.inf, -math.inf) + with self.assertRaises(ValueError): + math.fma(-b, math.inf, math.inf) + with self.assertRaises(ValueError): + math.fma(-b, -math.inf, -math.inf) + with self.assertRaises(ValueError): + math.fma(b, -math.inf, math.inf) + + # Infinite result when a*b and c both infinite of the same sign. + for b in positives: + self.assertEqual(math.fma(math.inf, b, math.inf), math.inf) + self.assertEqual(math.fma(math.inf, -b, -math.inf), -math.inf) + self.assertEqual(math.fma(-math.inf, -b, math.inf), math.inf) + self.assertEqual(math.fma(-math.inf, b, -math.inf), -math.inf) + self.assertEqual(math.fma(b, math.inf, math.inf), math.inf) + self.assertEqual(math.fma(-b, math.inf, -math.inf), -math.inf) + self.assertEqual(math.fma(-b, -math.inf, math.inf), math.inf) + self.assertEqual(math.fma(b, -math.inf, -math.inf), -math.inf) + + # Infinite result when a*b finite, c infinite. + for a, b in itertools.product(finites, finites): + self.assertEqual(math.fma(a, b, math.inf), math.inf) + self.assertEqual(math.fma(a, b, -math.inf), -math.inf) + + # Infinite result when a*b infinite, c finite. + for b, c in itertools.product(positives, finites): + self.assertEqual(math.fma(math.inf, b, c), math.inf) + self.assertEqual(math.fma(-math.inf, b, c), -math.inf) + self.assertEqual(math.fma(-math.inf, -b, c), math.inf) + self.assertEqual(math.fma(math.inf, -b, c), -math.inf) + + self.assertEqual(math.fma(b, math.inf, c), math.inf) + self.assertEqual(math.fma(b, -math.inf, c), -math.inf) + self.assertEqual(math.fma(-b, -math.inf, c), math.inf) + self.assertEqual(math.fma(-b, math.inf, c), -math.inf) + + # gh-73468: On some platforms, libc fma() doesn't implement IEE 754-2008 + # properly: it doesn't use the right sign when the result is zero. + @unittest.skipIf( + sys.platform.startswith(("freebsd", "wasi", "netbsd")) + or (sys.platform == "android" and platform.machine() == "x86_64"), + f"this platform doesn't implement IEE 754-2008 properly") + def test_fma_zero_result(self): + nonnegative_finites = [0.0, 1e-300, 2.3, 1e300] + + # Zero results from exact zero inputs. + for b in nonnegative_finites: + self.assertIsPositiveZero(math.fma(0.0, b, 0.0)) + self.assertIsPositiveZero(math.fma(0.0, b, -0.0)) + self.assertIsNegativeZero(math.fma(0.0, -b, -0.0)) + self.assertIsPositiveZero(math.fma(0.0, -b, 0.0)) + self.assertIsPositiveZero(math.fma(-0.0, -b, 0.0)) + self.assertIsPositiveZero(math.fma(-0.0, -b, -0.0)) + self.assertIsNegativeZero(math.fma(-0.0, b, -0.0)) + self.assertIsPositiveZero(math.fma(-0.0, b, 0.0)) + + self.assertIsPositiveZero(math.fma(b, 0.0, 0.0)) + self.assertIsPositiveZero(math.fma(b, 0.0, -0.0)) + self.assertIsNegativeZero(math.fma(-b, 0.0, -0.0)) + self.assertIsPositiveZero(math.fma(-b, 0.0, 0.0)) + self.assertIsPositiveZero(math.fma(-b, -0.0, 0.0)) + self.assertIsPositiveZero(math.fma(-b, -0.0, -0.0)) + self.assertIsNegativeZero(math.fma(b, -0.0, -0.0)) + self.assertIsPositiveZero(math.fma(b, -0.0, 0.0)) + + # Exact zero result from nonzero inputs. + self.assertIsPositiveZero(math.fma(2.0, 2.0, -4.0)) + self.assertIsPositiveZero(math.fma(2.0, -2.0, 4.0)) + self.assertIsPositiveZero(math.fma(-2.0, -2.0, -4.0)) + self.assertIsPositiveZero(math.fma(-2.0, 2.0, 4.0)) + + # Underflow to zero. + tiny = 1e-300 + self.assertIsPositiveZero(math.fma(tiny, tiny, 0.0)) + self.assertIsNegativeZero(math.fma(tiny, -tiny, 0.0)) + self.assertIsPositiveZero(math.fma(-tiny, -tiny, 0.0)) + self.assertIsNegativeZero(math.fma(-tiny, tiny, 0.0)) + self.assertIsPositiveZero(math.fma(tiny, tiny, -0.0)) + self.assertIsNegativeZero(math.fma(tiny, -tiny, -0.0)) + self.assertIsPositiveZero(math.fma(-tiny, -tiny, -0.0)) + self.assertIsNegativeZero(math.fma(-tiny, tiny, -0.0)) + + # Corner case where rounding the multiplication would + # give the wrong result. + x = float.fromhex('0x1p-500') + y = float.fromhex('0x1p-550') + z = float.fromhex('0x1p-1000') + self.assertIsNegativeZero(math.fma(x-y, x+y, -z)) + self.assertIsPositiveZero(math.fma(y-x, x+y, z)) + self.assertIsNegativeZero(math.fma(y-x, -(x+y), -z)) + self.assertIsPositiveZero(math.fma(x-y, -(x+y), z)) + + def test_fma_overflow(self): + a = b = float.fromhex('0x1p512') + c = float.fromhex('0x1p1023') + # Overflow from multiplication. + with self.assertRaises(OverflowError): + math.fma(a, b, 0.0) + self.assertEqual(math.fma(a, b/2.0, 0.0), c) + # Overflow from the addition. + with self.assertRaises(OverflowError): + math.fma(a, b/2.0, c) + # No overflow, even though a*b overflows a float. + self.assertEqual(math.fma(a, b, -c), c) + + # Extreme case: a * b is exactly at the overflow boundary, so the + # tiniest offset makes a difference between overflow and a finite + # result. + a = float.fromhex('0x1.ffffffc000000p+511') + b = float.fromhex('0x1.0000002000000p+512') + c = float.fromhex('0x0.0000000000001p-1022') + with self.assertRaises(OverflowError): + math.fma(a, b, 0.0) + with self.assertRaises(OverflowError): + math.fma(a, b, c) + self.assertEqual(math.fma(a, b, -c), + float.fromhex('0x1.fffffffffffffp+1023')) + + # Another extreme case: here a*b is about as large as possible subject + # to math.fma(a, b, c) being finite. + a = float.fromhex('0x1.ae565943785f9p+512') + b = float.fromhex('0x1.3094665de9db8p+512') + c = float.fromhex('0x1.fffffffffffffp+1023') + self.assertEqual(math.fma(a, b, -c), c) + + def test_fma_single_round(self): + a = float.fromhex('0x1p-50') + self.assertEqual(math.fma(a - 1.0, a + 1.0, 1.0), a*a) + + def test_random(self): + # A collection of randomly generated inputs for which the naive FMA + # (with two rounds) gives a different result from a singly-rounded FMA. + + # tuples (a, b, c, expected) + test_values = [ + ('0x1.694adde428b44p-1', '0x1.371b0d64caed7p-1', + '0x1.f347e7b8deab8p-4', '0x1.19f10da56c8adp-1'), + ('0x1.605401ccc6ad6p-2', '0x1.ce3a40bf56640p-2', + '0x1.96e3bf7bf2e20p-2', '0x1.1af6d8aa83101p-1'), + ('0x1.e5abd653a67d4p-2', '0x1.a2e400209b3e6p-1', + '0x1.a90051422ce13p-1', '0x1.37d68cc8c0fbbp+0'), + ('0x1.f94e8efd54700p-2', '0x1.123065c812cebp-1', + '0x1.458f86fb6ccd0p-1', '0x1.ccdcee26a3ff3p-1'), + ('0x1.bd926f1eedc96p-1', '0x1.eee9ca68c5740p-1', + '0x1.960c703eb3298p-2', '0x1.3cdcfb4fdb007p+0'), + ('0x1.27348350fbccdp-1', '0x1.3b073914a53f1p-1', + '0x1.e300da5c2b4cbp-1', '0x1.4c51e9a3c4e29p+0'), + ('0x1.2774f00b3497bp-1', '0x1.7038ec336bff0p-2', + '0x1.2f6f2ccc3576bp-1', '0x1.99ad9f9c2688bp-1'), + ('0x1.51d5a99300e5cp-1', '0x1.5cd74abd445a1p-1', + '0x1.8880ab0bbe530p-1', '0x1.3756f96b91129p+0'), + ('0x1.73cb965b821b8p-2', '0x1.218fd3d8d5371p-1', + '0x1.d1ea966a1f758p-2', '0x1.5217b8fd90119p-1'), + ('0x1.4aa98e890b046p-1', '0x1.954d85dff1041p-1', + '0x1.122b59317ebdfp-1', '0x1.0bf644b340cc5p+0'), + ('0x1.e28f29e44750fp-1', '0x1.4bcc4fdcd18fep-1', + '0x1.fd47f81298259p-1', '0x1.9b000afbc9995p+0'), + ('0x1.d2e850717fe78p-3', '0x1.1dd7531c303afp-1', + '0x1.e0869746a2fc2p-2', '0x1.316df6eb26439p-1'), + ('0x1.cf89c75ee6fbap-2', '0x1.b23decdc66825p-1', + '0x1.3d1fe76ac6168p-1', '0x1.00d8ea4c12abbp+0'), + ('0x1.3265ae6f05572p-2', '0x1.16d7ec285f7a2p-1', + '0x1.0b8405b3827fbp-1', '0x1.5ef33c118a001p-1'), + ('0x1.c4d1bf55ec1a5p-1', '0x1.bc59618459e12p-2', + '0x1.ce5b73dc1773dp-1', '0x1.496cf6164f99bp+0'), + ('0x1.d350026ac3946p-1', '0x1.9a234e149a68cp-2', + '0x1.f5467b1911fd6p-2', '0x1.b5cee3225caa5p-1'), + ] + for a_hex, b_hex, c_hex, expected_hex in test_values: + a = float.fromhex(a_hex) + b = float.fromhex(b_hex) + c = float.fromhex(c_hex) + expected = float.fromhex(expected_hex) + self.assertEqual(math.fma(a, b, c), expected) + self.assertEqual(math.fma(b, a, c), expected) + + # Custom assertions. + def assertIsNaN(self, value): + self.assertTrue( + math.isnan(value), + msg="Expected a NaN, got {!r}".format(value) + ) + + def assertIsPositiveZero(self, value): + self.assertTrue( + value == 0 and math.copysign(1, value) > 0, + msg="Expected a positive zero, got {!r}".format(value) + ) + + def assertIsNegativeZero(self, value): + self.assertTrue( + value == 0 and math.copysign(1, value) < 0, + msg="Expected a negative zero, got {!r}".format(value) + ) + + def load_tests(loader, tests, pattern): from doctest import DocFileSuite - tests.addTest(DocFileSuite("ieee754.txt")) + tests.addTest(DocFileSuite(os.path.join("mathdata", "ieee754.txt"))) return tests if __name__ == '__main__': diff --git a/stdlib/src/math.rs b/stdlib/src/math.rs index 6665ee8b49..a7da60949c 100644 --- a/stdlib/src/math.rs +++ b/stdlib/src/math.rs @@ -975,4 +975,28 @@ mod math { Ok(result) } + + #[pyfunction] + fn fma( + x: ArgIntoFloat, + y: ArgIntoFloat, + z: ArgIntoFloat, + vm: &VirtualMachine, + ) -> PyResult { + let result = (*x).mul_add(*y, *z); + + if result.is_finite() { + return Ok(result); + } + + if result.is_nan() { + if !x.is_nan() && !y.is_nan() && !z.is_nan() { + return Err(vm.new_value_error("invalid operation in fma".to_string())); + } + } else if x.is_finite() && y.is_finite() && z.is_finite() { + return Err(vm.new_overflow_error("overflow in fma".to_string())); + } + + Ok(result) + } } From 2230d6c751eb8fdd2d7554df529574ea53b906b8 Mon Sep 17 00:00:00 2001 From: Hanif Ariffin Date: Sat, 5 Apr 2025 14:33:33 +0800 Subject: [PATCH 017/126] Fix not throwing the same error as CPython in test_pathlib.test_expanduser (#5578) * Fix not throwing the same error as CPython when trying to expanduser of a non-existent user Signed-off-by: Hanif Ariffin * add pwd test * Skip pwd test on windows --------- Signed-off-by: Hanif Ariffin Co-authored-by: Jeong YunWon Co-authored-by: Jeong, YunWon <69878+youknowone@users.noreply.github.com> --- extra_tests/snippets/stdlib_pwd.py | 12 ++++++++++++ vm/src/stdlib/pwd.rs | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 extra_tests/snippets/stdlib_pwd.py diff --git a/extra_tests/snippets/stdlib_pwd.py b/extra_tests/snippets/stdlib_pwd.py new file mode 100644 index 0000000000..6ef3a64d02 --- /dev/null +++ b/extra_tests/snippets/stdlib_pwd.py @@ -0,0 +1,12 @@ +import sys +# windows doesn't support pwd +if sys.platform.startswith("win"): + exit(0) + +from testutils import assert_raises +import pwd + +with assert_raises(KeyError): + fake_name = 'fake_user' + while pwd.getpwnam(fake_name): + fake_name += '1' diff --git a/vm/src/stdlib/pwd.rs b/vm/src/stdlib/pwd.rs index b95910c73f..20b4edb448 100644 --- a/vm/src/stdlib/pwd.rs +++ b/vm/src/stdlib/pwd.rs @@ -59,7 +59,7 @@ mod pwd { if pw_name.contains('\0') { return Err(exceptions::cstring_error(vm)); } - let user = User::from_name(name.as_str()).map_err(|err| err.into_pyexception(vm))?; + let user = User::from_name(name.as_str()).ok().flatten(); let user = user.ok_or_else(|| { vm.new_key_error( vm.ctx From 98137eb79c8813ad3b6de3e583d05572a0c89b35 Mon Sep 17 00:00:00 2001 From: Noa Date: Sun, 21 Aug 2022 15:10:50 -0500 Subject: [PATCH 018/126] Switch to const-initialized thread_local variables where appropriate --- common/src/static_cell.rs | 4 +++- common/src/str.rs | 5 ++++- vm/src/stdlib/thread.rs | 4 +++- wasm/lib/src/vm_class.rs | 4 +++- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/common/src/static_cell.rs b/common/src/static_cell.rs index 30e34f187f..a8beee0820 100644 --- a/common/src/static_cell.rs +++ b/common/src/static_cell.rs @@ -56,7 +56,9 @@ mod non_threading { $($(#[$attr])* $vis static $name: $crate::static_cell::StaticCell<$t> = { ::std::thread_local! { - $vis static $name: $crate::lock::OnceCell<&'static $t> = $crate::lock::OnceCell::new(); + $vis static $name: $crate::lock::OnceCell<&'static $t> = const { + $crate::lock::OnceCell::new() + }; } $crate::static_cell::StaticCell::_from_local_key(&$name) };)+ diff --git a/common/src/str.rs b/common/src/str.rs index fa26959e0b..ca5e0d117f 100644 --- a/common/src/str.rs +++ b/common/src/str.rs @@ -486,7 +486,10 @@ pub mod levenshtein { pub fn levenshtein_distance(a: &str, b: &str, max_cost: usize) -> usize { thread_local! { - static BUFFER: RefCell<[usize; MAX_STRING_SIZE]> = const { RefCell::new([0usize; MAX_STRING_SIZE]) }; + #[allow(clippy::declare_interior_mutable_const)] + static BUFFER: RefCell<[usize; MAX_STRING_SIZE]> = const { + RefCell::new([0usize; MAX_STRING_SIZE]) + }; } if a == b { diff --git a/vm/src/stdlib/thread.rs b/vm/src/stdlib/thread.rs index 0ee087e5e4..b3e345b20a 100644 --- a/vm/src/stdlib/thread.rs +++ b/vm/src/stdlib/thread.rs @@ -355,7 +355,9 @@ pub(crate) mod _thread { Err(vm.new_exception_empty(vm.ctx.exceptions.system_exit.to_owned())) } - thread_local!(static SENTINELS: RefCell>> = RefCell::default()); + thread_local! { + static SENTINELS: RefCell>> = const { RefCell::new(Vec::new()) }; + } #[pyfunction] fn _set_sentinel(vm: &VirtualMachine) -> PyRef { diff --git a/wasm/lib/src/vm_class.rs b/wasm/lib/src/vm_class.rs index d84603e986..bbd895c989 100644 --- a/wasm/lib/src/vm_class.rs +++ b/wasm/lib/src/vm_class.rs @@ -86,7 +86,9 @@ pub fn add_init_func(f: fn(&mut VirtualMachine)) { // https://rustwasm.github.io/2018/10/24/multithreading-rust-and-wasm.html#atomic-instructions thread_local! { static STORED_VMS: RefCell>> = RefCell::default(); - static VM_INIT_FUNCS: RefCell> = RefCell::default(); + static VM_INIT_FUNCS: RefCell> = const { + RefCell::new(Vec::new()) + }; } pub fn get_vm_id(vm: &VirtualMachine) -> &str { From be56911598b2cbe0719b655a6861bf317ca2aa8a Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Sun, 6 Apr 2025 01:21:28 -0700 Subject: [PATCH 019/126] _tkinter pt. 2 (#5640) --- .cspell.dict/python-more.txt | 1 + Cargo.lock | 791 +++++++++++++---------------------- Lib/tkinter/__init__.py | 4 +- flamegraph.svg | 1 + stdlib/Cargo.toml | 6 +- stdlib/src/tkinter.rs | 487 +++++++++++++++++++-- 6 files changed, 745 insertions(+), 545 deletions(-) create mode 100644 flamegraph.svg diff --git a/.cspell.dict/python-more.txt b/.cspell.dict/python-more.txt index 2edfe95bdf..32d13f59b9 100644 --- a/.cspell.dict/python-more.txt +++ b/.cspell.dict/python-more.txt @@ -224,6 +224,7 @@ sysvars teedata thisclass titlecased +tkapp tobytes tolist toreadonly diff --git a/Cargo.lock b/Cargo.lock index 706f70f2d9..f3a8f59af3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -59,9 +59,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.96" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b964d184e89d9b6b67dd2715bc8e74cf3107fb2b529990c90cf517326150bf4" +checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f" [[package]] name = "approx" @@ -116,37 +116,24 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" -[[package]] -name = "bind_syn" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d6608ba072b4bc847774fac76963956592b5cdfa3751afcefa252fb61cb85b9" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.98", -] - [[package]] name = "bindgen" -version = "0.64.0" +version = "0.71.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4243e6031260db77ede97ad86c27e501d646a27ab57b59a574f725d98ab1fb4" +checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.9.0", "cexpr", "clang-sys", - "lazy_static 1.5.0", - "lazycell", + "itertools 0.11.0", "log", - "peeking_take_while", + "prettyplease", "proc-macro2", "quote", "regex", - "rustc-hash 1.1.0", + "rustc-hash", "shlex", - "syn 1.0.109", - "which 4.4.2", + "syn 2.0.100", ] [[package]] @@ -157,9 +144,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.8.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" +checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" [[package]] name = "blake2" @@ -201,15 +188,9 @@ dependencies = [ [[package]] name = "bytemuck" -version = "1.21.0" +version = "1.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef657dfab802224e671f5818e9a4935f9b1957ed18e58292690cc39e7a4092a3" - -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +checksum = "b6b1fc10dbac614ebc03540c9dbd60e83887fda27794998c6528f1782047d540" [[package]] name = "bzip2" @@ -223,12 +204,11 @@ dependencies = [ [[package]] name = "bzip2-sys" -version = "0.1.12+1.0.8" +version = "0.1.13+1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72ebc2f1a417f01e1da30ef264ee86ae31d2dcd2d603ea283d3c244a883ca2a9" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" dependencies = [ "cc", - "libc", "pkg-config", ] @@ -258,35 +238,13 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.14" +version = "1.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c3d1b2e905a3a7b00a6141adb0e4c0bb941d11caf55349d863942a1cc44e3c9" +checksum = "1fcb57c740ae1daf453ae85f16e37396f672b039e00d9d866e07ddb24e328e3a" dependencies = [ "shlex", ] -[[package]] -name = "cex" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b0114a3f232423fadbbdbb692688e3e68c3b58b4b063ac3a7d0190d561080da" -dependencies = [ - "cex_derive", - "enumx", -] - -[[package]] -name = "cex_derive" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b5048cd656d7d2e739960fa33d9f95693005792dc8aad0af8b8f0b7d76c938d" -dependencies = [ - "indexmap 1.9.3", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "cexpr" version = "0.6.0" @@ -310,16 +268,16 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.39" +version = "0.4.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" +checksum = "1a7964611d71df112cb1730f2ee67324fcf4d0fc6606acbbe9bfe06df124637c" dependencies = [ "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "wasm-bindgen", - "windows-targets 0.52.6", + "windows-link", ] [[package]] @@ -344,19 +302,6 @@ dependencies = [ "unicode-width 0.1.14", ] -[[package]] -name = "clib" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fda1a698cd341f055d3ae1fdbfb1cc441e7f9bacce795356ecc685e69134e957" -dependencies = [ - "anyhow", - "bindgen", - "inwelling", - "pkg-config", - "toml", -] - [[package]] name = "clipboard-win" version = "5.4.0" @@ -382,9 +327,9 @@ dependencies = [ [[package]] name = "console" -version = "0.15.10" +version = "0.15.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" dependencies = [ "encode_unicode", "libc", @@ -484,10 +429,10 @@ dependencies = [ "cranelift-entity", "cranelift-isle", "gimli", - "hashbrown 0.15.2", + "hashbrown", "log", "regalloc2", - "rustc-hash 2.1.1", + "rustc-hash", "serde", "smallvec", "target-lexicon 0.13.2", @@ -711,7 +656,7 @@ checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", "unicode-xid", ] @@ -761,15 +706,15 @@ dependencies = [ [[package]] name = "dyn-clone" -version = "1.0.18" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "feeef44e73baff3a26d371801df019877a9866a8c493d315ab00177843314f35" +checksum = "1c7a8fb8a9fbf66c1f703fe16184d10ca0ee9d23be5b4436400408ba54a95005" [[package]] name = "either" -version = "1.13.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "encode_unicode" @@ -783,27 +728,6 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" -[[package]] -name = "enumx" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32875abeb14f7fe2c2b8ad15e58f41701f455d124d0a03bc88132d5face2663f" -dependencies = [ - "enumx_derive", -] - -[[package]] -name = "enumx_derive" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa5d66efdd1eab6ea85ba31bdb58bed1e4ce218c1361061384ece88f40ebeb49" -dependencies = [ - "indexmap 1.9.3", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "env_logger" version = "0.9.3" @@ -851,13 +775,13 @@ checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" [[package]] name = "fd-lock" -version = "4.0.2" +version = "4.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e5768da2206272c81ef0b5e951a41862938a6070da63bcea197899942d3b947" +checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" dependencies = [ "cfg-if", - "rustix", - "windows-sys 0.52.0", + "rustix 1.0.3", + "windows-sys 0.59.0", ] [[package]] @@ -891,7 +815,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8168cbad48fdda10be94de9c6319f9e8ac5d3cf0a1abda1864269dfcca3d302a" dependencies = [ "flame", - "indexmap 2.7.1", + "indexmap", "serde", "serde_json", ] @@ -946,11 +870,11 @@ dependencies = [ [[package]] name = "gethostname" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fd4b8790c0792e3b11895efdf5f289ebe8b59107a6624f1cce68f24ff8c7035" +checksum = "ed7131e57abbde63513e0e6636f76668a1ca9798dcae2df4e283cae9ee83859e" dependencies = [ - "rustix", + "rustix 1.0.3", "windows-targets 0.52.6", ] @@ -978,16 +902,16 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" dependencies = [ "cfg-if", "js-sys", "libc", - "wasi 0.13.3+wasi-0.2.2", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", "wasm-bindgen", - "windows-targets 0.52.6", ] [[package]] @@ -997,7 +921,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" dependencies = [ "fallible-iterator", - "indexmap 2.7.1", + "indexmap", "stable_deref_trait", ] @@ -1015,20 +939,14 @@ checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" [[package]] name = "half" -version = "2.4.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +checksum = "7db2ff139bba50379da6aa0766b52fdcb62cb5b263009b09ed58ba604e14bbd1" dependencies = [ "cfg-if", "crunchy", ] -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - [[package]] name = "hashbrown" version = "0.15.2" @@ -1038,30 +956,12 @@ dependencies = [ "foldhash", ] -[[package]] -name = "heck" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" -dependencies = [ - "unicode-segmentation", -] - [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "heredom" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a7a4d76fa670b51cfb56e908ad8bfd44a14fee853ea764790e46634d3fcdf4d" -dependencies = [ - "tuplex", -] - [[package]] name = "hermit-abi" version = "0.1.19" @@ -1100,16 +1000,17 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.61" +version = "0.1.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", + "log", "wasm-bindgen", - "windows-core", + "windows-core 0.61.0", ] [[package]] @@ -1123,35 +1024,25 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - -[[package]] -name = "indexmap" -version = "2.7.1" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" +checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058" dependencies = [ "equivalent", - "hashbrown 0.15.2", + "hashbrown", ] [[package]] name = "indoc" -version = "2.0.5" +version = "2.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" +checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" [[package]] name = "insta" -version = "1.42.1" +version = "1.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71c1b125e30d93896b365e156c33dadfffab45ee8400afcbba4752f59de08a86" +checksum = "50259abbaa67d11d2bcafc7ba1d094ed7a0c70e3ce893f0d0997f73558cb3084" dependencies = [ "console", "linked-hash-map", @@ -1160,26 +1051,16 @@ dependencies = [ "similar", ] -[[package]] -name = "inwelling" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f6292c68ffca1fa94ca8f95ca5ad2885d79d96377f1d37ced6a47cd26cfaf8c" -dependencies = [ - "toml", - "walkdir", -] - [[package]] name = "is-macro" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d57a3e447e24c22647738e4607f1df1e0ec6f72e16182c4cd199f647cdfb0e4" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -1211,9 +1092,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "js-sys" @@ -1246,9 +1127,13 @@ dependencies = [ [[package]] name = "lambert_w" -version = "1.0.17" +version = "1.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45bf98425154bfe790a47b72ac452914f6df9ebfb202bc59e089e29db00258cf" +checksum = "0eeec1be8d026f51b1cf70ed28442b9f0ece61ff196cd3a99d8b4492a83a864b" +dependencies = [ + "num-complex", + "num-traits", +] [[package]] name = "lazy_static" @@ -1262,12 +1147,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - [[package]] name = "lexical-parse-float" version = "1.0.5" @@ -1300,15 +1179,15 @@ dependencies = [ [[package]] name = "lexopt" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baff4b617f7df3d896f97fe922b64817f6cd9a756bb81d40f8883f2f66dcb401" +checksum = "9fa0e2a1fcbe2f6be6c42e342259976206b383122fc152e872795338b5a3f3a7" [[package]] name = "libc" -version = "0.2.169" +version = "0.2.171" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" [[package]] name = "libffi" @@ -1351,7 +1230,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "libc", ] @@ -1387,6 +1266,12 @@ version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +[[package]] +name = "linux-raw-sys" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe7db12097d22ec582439daf8618b8fdd1a7bef6270e9af3b1ebcd30893cf413" + [[package]] name = "lock_api" version = "0.4.12" @@ -1399,9 +1284,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.25" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "lz4_flex" @@ -1437,7 +1322,7 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5063891d2cec8fd20cabccbd3fc277bf8d5666f481fb3f79d999559b39a62713" dependencies = [ - "hashbrown 0.15.2", + "hashbrown", "itertools 0.11.0", "libm", "ryu", @@ -1533,9 +1418,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3b1c9bd4fe1f0f8b387f6eb9eb3b4a1aa26185e5750efb9140301703f62cd1b" +checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5" dependencies = [ "adler2", ] @@ -1549,12 +1434,6 @@ dependencies = [ "rand_core 0.9.3", ] -[[package]] -name = "mutf8" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b444426a4c188e9ad33560853ebd52309ab72811f536a9e6f37907fd12cf45" - [[package]] name = "nibble_vec" version = "0.1.0" @@ -1570,7 +1449,7 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "cfg-if", "cfg_aliases", "libc", @@ -1639,23 +1518,22 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56" dependencies = [ - "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "once_cell" -version = "1.20.3" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "oorandom" -version = "11.1.4" +version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" [[package]] name = "openssl" @@ -1663,7 +1541,7 @@ version = "0.10.72" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "cfg-if", "foreign-types", "libc", @@ -1680,7 +1558,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -1745,7 +1623,7 @@ checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.8", + "redox_syscall 0.5.10", "smallvec", "windows-targets 0.52.6", ] @@ -1756,12 +1634,6 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" -[[package]] -name = "peeking_take_while" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" - [[package]] name = "phf" version = "0.11.3" @@ -1802,29 +1674,29 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.9" +version = "1.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfe2e71e1471fe07709406bf725f710b02927c9c54b2b5b2ec0e8087d97c327d" +checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.9" +version = "1.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6e859e6e5bd50440ab63c47e3ebabc90f26251f7c73c3d3e837b74a1cc3fa67" +checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "pkg-config" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "plotters" @@ -1862,38 +1734,39 @@ checksum = "52a40bc70c2c58040d2d8b167ba9a5ff59fc9dab7ad44771cfde3dcfde7a09c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "portable-atomic" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" +checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" [[package]] name = "ppv-lite86" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ - "zerocopy 0.7.35", + "zerocopy 0.8.24", ] [[package]] -name = "proc-macro-crate" -version = "3.3.0" +name = "prettyplease" +version = "0.2.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35" +checksum = "5316f57387668042f561aae71480de936257848f9c43ce528e311d89a07cadeb" dependencies = [ - "toml_edit 0.22.24", + "proc-macro2", + "syn 2.0.100", ] [[package]] name = "proc-macro2" -version = "1.0.93" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" +checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" dependencies = [ "unicode-ident", ] @@ -1955,7 +1828,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -1964,22 +1837,28 @@ version = "0.22.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "quote" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" + [[package]] name = "radium" version = "0.7.0" @@ -2015,7 +1894,7 @@ checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.3", - "zerocopy 0.8.20", + "zerocopy 0.8.24", ] [[package]] @@ -2053,7 +1932,7 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom 0.3.1", + "getrandom 0.3.2", ] [[package]] @@ -2084,11 +1963,11 @@ checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" [[package]] name = "redox_syscall" -version = "0.5.8" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" +checksum = "0b8c0c260b63a8219631167be35e6a988e9554dbd323f8bd08439c8ed1302bd1" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", ] [[package]] @@ -2110,9 +1989,9 @@ checksum = "145c1c267e14f20fb0f88aa76a1c5ffec42d592c1d28b3cd9148ae35916158d3" dependencies = [ "allocator-api2", "bumpalo", - "hashbrown 0.15.2", + "hashbrown", "log", - "rustc-hash 2.1.1", + "rustc-hash", "smallvec", ] @@ -2175,7 +2054,7 @@ dependencies = [ "pmutil", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -2184,7 +2063,7 @@ version = "0.0.0" source = "git+https://github.com/astral-sh/ruff.git?tag=0.11.0#2cd25ef6410fb5fca96af1578728a3d828d2d53a" dependencies = [ "aho-corasick", - "bitflags 2.8.0", + "bitflags 2.9.0", "compact_str", "is-macro", "itertools 0.14.0", @@ -2192,7 +2071,7 @@ dependencies = [ "ruff_python_trivia", "ruff_source_file", "ruff_text_size", - "rustc-hash 2.1.1", + "rustc-hash", ] [[package]] @@ -2200,14 +2079,14 @@ name = "ruff_python_parser" version = "0.0.0" source = "git+https://github.com/astral-sh/ruff.git?tag=0.11.0#2cd25ef6410fb5fca96af1578728a3d828d2d53a" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "bstr", "compact_str", "memchr", "ruff_python_ast", "ruff_python_trivia", "ruff_text_size", - "rustc-hash 2.1.1", + "rustc-hash", "static_assertions", "unicode-ident", "unicode-normalization", @@ -2239,12 +2118,6 @@ name = "ruff_text_size" version = "0.0.0" source = "git+https://github.com/astral-sh/ruff.git?tag=0.11.0#2cd25ef6410fb5fca96af1578728a3d828d2d53a" -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - [[package]] name = "rustc-hash" version = "2.1.1" @@ -2257,10 +2130,23 @@ version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.4.15", + "windows-sys 0.59.0", +] + +[[package]] +name = "rustix" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e56a18552996ac8d29ecc3b190b4fdbb2d91ca4ec396de7bbffaf43f3d637e96" +dependencies = [ + "bitflags 2.9.0", + "errno", + "libc", + "linux-raw-sys 0.9.3", "windows-sys 0.59.0", ] @@ -2291,8 +2177,8 @@ name = "rustpython-codegen" version = "0.4.0" dependencies = [ "ahash", - "bitflags 2.8.0", - "indexmap 2.7.1", + "bitflags 2.9.0", + "indexmap", "insta", "itertools 0.14.0", "log", @@ -2307,7 +2193,7 @@ dependencies = [ "rustpython-compiler-source", "rustpython-literal", "rustpython-wtf8", - "thiserror 2.0.11", + "thiserror 2.0.12", "unicode_names2", ] @@ -2316,10 +2202,10 @@ name = "rustpython-common" version = "0.4.0" dependencies = [ "ascii", - "bitflags 2.8.0", + "bitflags 2.9.0", "bstr", "cfg-if", - "getrandom 0.3.1", + "getrandom 0.3.2", "itertools 0.14.0", "libc", "lock_api", @@ -2352,14 +2238,14 @@ dependencies = [ "rustpython-codegen", "rustpython-compiler-core", "rustpython-compiler-source", - "thiserror 2.0.11", + "thiserror 2.0.12", ] [[package]] name = "rustpython-compiler-core" version = "0.4.0" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "itertools 0.14.0", "lz4_flex", "malachite-bigint", @@ -2384,7 +2270,7 @@ dependencies = [ "proc-macro2", "rustpython-compiler", "rustpython-derive-impl", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -2397,9 +2283,9 @@ dependencies = [ "quote", "rustpython-compiler-core", "rustpython-doc", - "syn 2.0.98", + "syn 2.0.100", "syn-ext", - "textwrap 0.16.1", + "textwrap 0.16.2", ] [[package]] @@ -2422,7 +2308,7 @@ dependencies = [ "num-traits", "rustpython-compiler-core", "rustpython-derive", - "thiserror 2.0.11", + "thiserror 2.0.12", ] [[package]] @@ -2451,7 +2337,7 @@ dependencies = [ name = "rustpython-sre_engine" version = "0.4.0" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "criterion", "num_enum", "optional", @@ -2479,7 +2365,7 @@ dependencies = [ "foreign-types-shared", "gethostname", "hex", - "indexmap 2.7.1", + "indexmap", "itertools 0.14.0", "junction", "libc", @@ -2504,7 +2390,7 @@ dependencies = [ "paste", "puruspe", "rand_core 0.9.3", - "rustix", + "rustix 0.38.44", "rustpython-common", "rustpython-derive", "rustpython-vm", @@ -2514,9 +2400,9 @@ dependencies = [ "sha3", "socket2", "system-configuration", - "tcl", + "tcl-sys", "termios", - "tk", + "tk-sys", "ucd", "unic-char-property", "unic-normal", @@ -2538,7 +2424,7 @@ version = "0.4.0" dependencies = [ "ahash", "ascii", - "bitflags 2.8.0", + "bitflags 2.9.0", "bstr", "caseless", "cfg-if", @@ -2548,11 +2434,11 @@ dependencies = [ "exitcode", "flame", "flamer", - "getrandom 0.3.1", + "getrandom 0.3.2", "glob", - "half 2.4.1", + "half 2.5.0", "hex", - "indexmap 2.7.1", + "indexmap", "is-macro", "itertools 0.14.0", "junction", @@ -2578,7 +2464,7 @@ dependencies = [ "ruff_python_parser", "ruff_source_file", "ruff_text_size", - "rustix", + "rustix 0.38.44", "rustpython-codegen", "rustpython-common", "rustpython-compiler", @@ -2592,9 +2478,9 @@ dependencies = [ "schannel", "serde", "static_assertions", - "strum 0.27.1", - "strum_macros 0.27.1", - "thiserror 2.0.11", + "strum", + "strum_macros", + "thiserror 2.0.12", "thread_local", "timsort", "uname", @@ -2604,7 +2490,7 @@ dependencies = [ "unicode-casing", "unicode_names2", "wasm-bindgen", - "which 6.0.3", + "which", "widestring", "windows", "windows-sys 0.59.0", @@ -2642,9 +2528,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" +checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" [[package]] name = "rustyline" @@ -2652,7 +2538,7 @@ version = "15.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2ee1e066dc922e513bda599c6ccb5f3bb2b0ea5870a579448f2622993f0a9a2f" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", "cfg-if", "clipboard-win", "fd-lock", @@ -2670,9 +2556,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "same-file" @@ -2700,9 +2586,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "serde" -version = "1.0.218" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] @@ -2731,20 +2617,20 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.218" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "serde_json" -version = "1.0.139" +version = "1.0.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44f86c3acccc9c65b153fe1b85a3be07fe5515274ec9f0653b4a0875731c72a6" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" dependencies = [ "itoa", "memchr", @@ -2752,15 +2638,6 @@ dependencies = [ "serde", ] -[[package]] -name = "serde_spanned" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" -dependencies = [ - "serde", -] - [[package]] name = "sha-1" version = "0.10.1" @@ -2819,9 +2696,9 @@ checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" [[package]] name = "socket2" -version = "0.5.8" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" +checksum = "4f5fd57c80058a56cf5c777ab8a126398ece8e442983605d280a44ce79d0edef" dependencies = [ "libc", "windows-sys 0.52.0", @@ -2839,41 +2716,23 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" -[[package]] -name = "strum" -version = "0.19.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b89a286a7e3b5720b9a477b23253bc50debac207c8d21505f8e70b36792f11b5" - [[package]] name = "strum" version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f64def088c51c9510a8579e3c5d67c65349dcf755e5479ad3d010aa6454e2c32" -[[package]] -name = "strum_macros" -version = "0.19.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e61bb0be289045cb80bfce000512e32d09f8337e54c186725da381377ad1f8d5" -dependencies = [ - "heck 0.3.3", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "strum_macros" version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c77a8c5abcaf0f9ce05d62342b7d298c346515365c36b673df4ebe3ced01fde8" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", "rustversion", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -2895,9 +2754,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.98" +version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" dependencies = [ "proc-macro2", "quote", @@ -2912,7 +2771,7 @@ checksum = "b126de4ef6c2a628a68609dd00733766c3b015894698a438ebdf374933fc31d1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -2949,33 +2808,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" [[package]] -name = "tcl" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e0d0928e8b4dca8ebd485f687f725bb34e454c7a28c1d353bf7d1b8060581bf" -dependencies = [ - "cex", - "clib", - "enumx", - "inwelling", - "mutf8", - "serde", - "serde_derive", - "tcl_derive", - "tuplex", -] - -[[package]] -name = "tcl_derive" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "625d95e672231bbf31dead6861b0ad72bcb71a2891b26b0c4924cd1cc9687b93" +name = "tcl-sys" +version = "0.1.0" +source = "git+https://github.com/arihant2math/tkinter.git?tag=v0.1.0#09a4f62e894df64692b34e6c7f81af1e1ae376dd" dependencies = [ - "bind_syn", - "proc-macro2", - "quote", - "syn 2.0.98", - "uuid", + "bindgen", + "pkg-config", ] [[package]] @@ -3007,9 +2845,9 @@ dependencies = [ [[package]] name = "textwrap" -version = "0.16.1" +version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" +checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" [[package]] name = "thiserror" @@ -3022,11 +2860,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.11" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" dependencies = [ - "thiserror-impl 2.0.11", + "thiserror-impl 2.0.12", ] [[package]] @@ -3037,18 +2875,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "thiserror-impl" -version = "2.0.11" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] @@ -3090,9 +2928,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8" +checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" dependencies = [ "tinyvec_macros", ] @@ -3104,78 +2942,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] -name = "tk" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6fbe29c813c9eee5e0d4d996a4a615f6538220f0ad181269a413f21c13eb077" -dependencies = [ - "bitflags 1.3.2", - "cex", - "clib", - "enumx", - "heredom", - "inwelling", - "num_enum", - "once_cell", - "serde", - "strum 0.19.5", - "strum_macros 0.19.4", - "tcl", - "tcl_derive", - "tuplex", -] - -[[package]] -name = "toml" -version = "0.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd79e69d3b627db300ff956027cc6c3798cef26d22526befdfcd12feeb6d2257" -dependencies = [ - "serde", - "serde_spanned", - "toml_datetime", - "toml_edit 0.19.15", -] - -[[package]] -name = "toml_datetime" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" -dependencies = [ - "serde", -] - -[[package]] -name = "toml_edit" -version = "0.19.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" -dependencies = [ - "indexmap 2.7.1", - "serde", - "serde_spanned", - "toml_datetime", - "winnow 0.5.40", -] - -[[package]] -name = "toml_edit" -version = "0.22.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474" +name = "tk-sys" +version = "0.1.0" +source = "git+https://github.com/arihant2math/tkinter.git?tag=v0.1.0#09a4f62e894df64692b34e6c7f81af1e1ae376dd" dependencies = [ - "indexmap 2.7.1", - "toml_datetime", - "winnow 0.7.4", + "bindgen", + "pkg-config", ] -[[package]] -name = "tuplex" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "676ac81d5454c4dcf37955d34fa8626ede3490f744b86ca14a7b90168d2a08aa" - [[package]] name = "twox-hash" version = "1.6.3" @@ -3320,9 +3094,9 @@ checksum = "623f59e6af2a98bdafeb93fa277ac8e1e40440973001ca15cf4ae1541cd16d56" [[package]] name = "unicode-ident" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "unicode-normalization" @@ -3381,9 +3155,9 @@ dependencies = [ [[package]] name = "unindent" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" [[package]] name = "utf8parse" @@ -3393,12 +3167,11 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.13.2" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c1f41ffb7cf259f1ecc2876861a17e7142e63ead296f671f81f6ae85903e0d6" +checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" dependencies = [ "atomic", - "getrandom 0.3.1", ] [[package]] @@ -3437,9 +3210,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasi" -version = "0.13.3+wasi-0.2.2" +version = "0.14.2+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" dependencies = [ "wit-bindgen-rt", ] @@ -3466,7 +3239,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", "wasm-bindgen-shared", ] @@ -3501,7 +3274,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3537,18 +3310,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix", -] - [[package]] name = "which" version = "6.0.3" @@ -3557,15 +3318,15 @@ checksum = "b4ee928febd44d98f2f459a4a79bd4d928591333a494a10a868418ac1b39cf1f" dependencies = [ "either", "home", - "rustix", + "rustix 0.38.44", "winsafe", ] [[package]] name = "widestring" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7219d36b6eac893fa81e84ebe06485e7dcbb616177469b142df14f1f4deb1311" +checksum = "dd7cf3379ca1aac9eea11fba24fd7e315d621f8dfe35c8d7d2be8b793726e07d" [[package]] name = "winapi" @@ -3604,7 +3365,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" dependencies = [ - "windows-core", + "windows-core 0.52.0", "windows-targets 0.52.6", ] @@ -3617,6 +3378,65 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-core" +version = "0.61.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4763c1de310c86d75a878046489e2e5ba02c649d185f21c67d4cf8a56d098980" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", +] + +[[package]] +name = "windows-interface" +version = "0.59.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", +] + +[[package]] +name = "windows-link" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" + +[[package]] +name = "windows-result" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ba9642430ee452d5a7aa78d72907ebe8cfda358e8cb7918a2050581322f97" +dependencies = [ + "windows-link", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -3765,24 +3585,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" -[[package]] -name = "winnow" -version = "0.5.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" -dependencies = [ - "memchr", -] - -[[package]] -name = "winnow" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e97b544156e9bebe1a0ffbc03484fc1ffe3100cbce3ffb17eac35f7cdd7ab36" -dependencies = [ - "memchr", -] - [[package]] name = "winreg" version = "0.55.0" @@ -3801,11 +3603,11 @@ checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" [[package]] name = "wit-bindgen-rt" -version = "0.33.0" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ - "bitflags 2.8.0", + "bitflags 2.9.0", ] [[package]] @@ -3820,17 +3622,16 @@ version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ - "byteorder", "zerocopy-derive 0.7.35", ] [[package]] name = "zerocopy" -version = "0.8.20" +version = "0.8.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dde3bb8c68a8f3f1ed4ac9221aad6b10cece3e60a8e2ea54a6a2dec806d0084c" +checksum = "2586fea28e186957ef732a5f8b3be2da217d65c5969d4b1e17f973ebbe876879" dependencies = [ - "zerocopy-derive 0.8.20", + "zerocopy-derive 0.8.24", ] [[package]] @@ -3841,18 +3642,18 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] name = "zerocopy-derive" -version = "0.8.20" +version = "0.8.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eea57037071898bf96a6da35fd626f4f27e9cee3ead2a6c703cf09d472b2e700" +checksum = "a996a8f63c5c4448cd959ac1bab0aaa3306ccfd060472f85943ee0750f0169be" dependencies = [ "proc-macro2", "quote", - "syn 2.0.98", + "syn 2.0.100", ] [[package]] diff --git a/Lib/tkinter/__init__.py b/Lib/tkinter/__init__.py index 8b2502b4c0..df3b936ccd 100644 --- a/Lib/tkinter/__init__.py +++ b/Lib/tkinter/__init__.py @@ -2451,7 +2451,9 @@ def __init__(self, screenName=None, baseName=None, className='Tk', self.tk = None if baseName is None: import os - baseName = os.path.basename(sys.argv[0]) + # TODO: RUSTPYTHON + # baseName = os.path.basename(sys.argv[0]) + baseName = "" # sys.argv[0] baseName, ext = os.path.splitext(baseName) if ext not in ('.py', '.pyc'): baseName = baseName + ext diff --git a/flamegraph.svg b/flamegraph.svg new file mode 100644 index 0000000000..a196068928 --- /dev/null +++ b/flamegraph.svg @@ -0,0 +1 @@ +ERROR: No valid input provided to flamegraph \ No newline at end of file diff --git a/stdlib/Cargo.toml b/stdlib/Cargo.toml index a8393206d6..26c85103e5 100644 --- a/stdlib/Cargo.toml +++ b/stdlib/Cargo.toml @@ -18,7 +18,7 @@ bz2 = ["bzip2"] sqlite = ["dep:libsqlite3-sys"] ssl = ["openssl", "openssl-sys", "foreign-types-shared", "openssl-probe"] ssl-vendor = ["ssl", "openssl/vendored"] -tkinter = ["dep:tk", "dep:tcl"] +tkinter = ["dep:tk-sys", "dep:tcl-sys"] [dependencies] # rustpython crates @@ -83,8 +83,8 @@ libz-sys = { package = "libz-rs-sys", version = "0.4" } bzip2 = { version = "0.4", optional = true } # tkinter -tk = { version = "0.1.10", optional = true } -tcl = { version = "0.1.9", optional = true } +tk-sys = { git = "https://github.com/arihant2math/tkinter.git", tag = "v0.1.0", optional = true } +tcl-sys = { git = "https://github.com/arihant2math/tkinter.git", tag = "v0.1.0", optional = true } # uuid [target.'cfg(not(any(target_os = "ios", target_os = "android", target_os = "windows", target_arch = "wasm32", target_os = "redox")))'.dependencies] diff --git a/stdlib/src/tkinter.rs b/stdlib/src/tkinter.rs index 907dc55002..242570b410 100644 --- a/stdlib/src/tkinter.rs +++ b/stdlib/src/tkinter.rs @@ -4,83 +4,478 @@ pub(crate) use self::_tkinter::make_module; #[pymodule] mod _tkinter { - use crate::builtins::PyTypeRef; - use rustpython_vm::function::{Either, FuncArgs}; - use rustpython_vm::{PyResult, VirtualMachine, function::OptionalArg}; + use rustpython_vm::types::Constructor; + use rustpython_vm::{PyObjectRef, PyPayload, PyRef, PyResult, VirtualMachine}; - use crate::common::lock::PyRwLock; - use std::sync::Arc; - use tk::cmd::*; - use tk::*; + use rustpython_vm::builtins::{PyInt, PyStr, PyType}; + use std::{ffi, ptr}; - #[pyattr] - const TK_VERSION: &str = "8.6"; - #[pyattr] - const TCL_VERSION: &str = "8.6"; - #[pyattr] - const READABLE: i32 = 2; - #[pyattr] - const WRITABLE: i32 = 4; - #[pyattr] - const EXCEPTION: i32 = 8; - - fn demo() -> tk::TkResult<()> { - let tk = make_tk!()?; - let root = tk.root(); - root.add_label(-text("constructs widgets and layout step by step"))? - .pack(())?; - let f = root.add_frame(())?.pack(())?; - let _btn = f - .add_button("btn" - text("quit") - command("destroy ."))? - .pack(())?; - Ok(main_loop()) + use crate::builtins::PyTypeRef; + use rustpython_common::atomic::AtomicBool; + use rustpython_common::atomic::Ordering; + + #[cfg(windows)] + fn _get_tcl_lib_path() -> String { + // TODO: fix packaging + String::from(r"C:\ActiveTcl\lib") } - #[pyattr(once, name = "TclError")] + #[pyattr(name = "TclError", once)] fn tcl_error(vm: &VirtualMachine) -> PyTypeRef { vm.ctx.new_exception_type( - "zlib", + "_tkinter", "TclError", Some(vec![vm.ctx.exceptions.exception_type.to_owned()]), ) } - #[pyfunction] - fn create(args: FuncArgs, _vm: &VirtualMachine) -> PyResult { - // TODO: handle arguments - // TODO: this means creating 2 tk instances is not possible. - let tk = Tk::new(()).unwrap(); - Ok(TkApp { - tk: Arc::new(PyRwLock::new(tk)), - }) + #[pyattr(name = "TkError", once)] + fn tk_error(vm: &VirtualMachine) -> PyTypeRef { + vm.ctx.new_exception_type( + "_tkinter", + "TkError", + Some(vec![vm.ctx.exceptions.exception_type.to_owned()]), + ) + } + + #[pyattr(once, name = "TK_VERSION")] + fn tk_version(_vm: &VirtualMachine) -> String { + format!("{}.{}", 8, 6) + } + + #[pyattr(once, name = "TCL_VERSION")] + fn tcl_version(_vm: &VirtualMachine) -> String { + format!( + "{}.{}", + tk_sys::TCL_MAJOR_VERSION, + tk_sys::TCL_MINOR_VERSION + ) + } + + #[pyattr] + #[pyclass(name = "TclObject")] + #[derive(PyPayload)] + struct TclObject { + value: *mut tk_sys::Tcl_Obj, } + impl std::fmt::Debug for TclObject { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "TclObject") + } + } + + unsafe impl Send for TclObject {} + unsafe impl Sync for TclObject {} + + #[pyclass] + impl TclObject {} + + static QUIT_MAIN_LOOP: AtomicBool = AtomicBool::new(false); + #[pyattr] #[pyclass(name = "tkapp")] #[derive(PyPayload)] struct TkApp { - tk: Arc>>, + // Tcl_Interp *interp; + interpreter: *mut tk_sys::Tcl_Interp, + // int wantobjects; + want_objects: bool, + // int threaded; /* True if tcl_platform[threaded] */ + threaded: bool, + // Tcl_ThreadId thread_id; + thread_id: Option, + // int dispatching; + dispatching: bool, + // PyObject *trace; + trace: Option<()>, + // /* We cannot include tclInt.h, as this is internal. + // So we cache interesting types here. */ + old_boolean_type: *const tk_sys::Tcl_ObjType, + boolean_type: *const tk_sys::Tcl_ObjType, + byte_array_type: *const tk_sys::Tcl_ObjType, + double_type: *const tk_sys::Tcl_ObjType, + int_type: *const tk_sys::Tcl_ObjType, + wide_int_type: *const tk_sys::Tcl_ObjType, + bignum_type: *const tk_sys::Tcl_ObjType, + list_type: *const tk_sys::Tcl_ObjType, + string_type: *const tk_sys::Tcl_ObjType, + utf32_string_type: *const tk_sys::Tcl_ObjType, + pixel_type: *const tk_sys::Tcl_ObjType, } unsafe impl Send for TkApp {} - unsafe impl Sync for TkApp {} impl std::fmt::Debug for TkApp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("TkApp").finish() + write!(f, "TkApp") } } - #[pyclass] + #[derive(FromArgs, Debug)] + struct TkAppConstructorArgs { + #[pyarg(any)] + screen_name: Option, + #[pyarg(any)] + _base_name: Option, + #[pyarg(any)] + class_name: String, + #[pyarg(any)] + interactive: i32, + #[pyarg(any)] + wantobjects: i32, + #[pyarg(any, default = "true")] + want_tk: bool, + #[pyarg(any)] + sync: i32, + #[pyarg(any)] + use_: Option, + } + + impl Constructor for TkApp { + type Args = TkAppConstructorArgs; + + fn py_new( + _zelf: PyRef, + args: Self::Args, + vm: &VirtualMachine, + ) -> PyResult { + create(args, vm) + } + } + + fn varname_converter(obj: PyObjectRef, vm: &VirtualMachine) -> PyResult { + // if let Ok(bytes) = obj.bytes(vm) { + // todo!() + // } + + // str + if let Some(str) = obj.downcast_ref::() { + return Ok(str.as_str().to_string()); + } + + if let Some(_tcl_obj) = obj.downcast_ref::() { + // Assume that the Tcl object has a method to retrieve a string. + // return tcl_obj. + todo!(); + } + + // Construct an error message using the type name (truncated to 50 characters). + Err(vm.new_type_error(format!( + "must be str, bytes or Tcl_Obj, not {:.50}", + obj.obj_type().str(vm)?.as_str() + ))) + } + + // TODO: DISALLOW_INSTANTIATION + #[pyclass(with(Constructor))] impl TkApp { + fn from_bool(&self, obj: *mut tk_sys::Tcl_Obj) -> bool { + let mut res = -1; + unsafe { + if tk_sys::Tcl_GetBooleanFromObj(self.interpreter, obj, &mut res) + != tk_sys::TCL_OK as i32 + { + panic!("Tcl_GetBooleanFromObj failed"); + } + } + assert!(res == 0 || res == 1); + res != 0 + } + + fn from_object( + &self, + obj: *mut tk_sys::Tcl_Obj, + vm: &VirtualMachine, + ) -> PyResult { + let type_ptr = unsafe { (*obj).typePtr }; + if type_ptr == ptr::null() { + return self.unicode_from_object(obj, vm); + } else if type_ptr == self.old_boolean_type || type_ptr == self.boolean_type { + return Ok(vm.ctx.new_bool(self.from_bool(obj)).into()); + } else if type_ptr == self.string_type + || type_ptr == self.utf32_string_type + || type_ptr == self.pixel_type + { + return self.unicode_from_object(obj, vm); + } + // TODO: handle other types + + return Ok(TclObject { value: obj }.into_pyobject(vm)); + } + + fn unicode_from_string( + s: *mut ffi::c_char, + size: usize, + vm: &VirtualMachine, + ) -> PyResult { + // terribly unsafe + let s = unsafe { std::slice::from_raw_parts(s, size) } + .to_vec() + .into_iter() + .map(|c| c as u8) + .collect::>(); + let s = String::from_utf8(s).unwrap(); + Ok(PyObjectRef::from(vm.ctx.new_str(s))) + } + + fn unicode_from_object( + &self, + obj: *mut tk_sys::Tcl_Obj, + vm: &VirtualMachine, + ) -> PyResult { + let type_ptr = unsafe { (*obj).typePtr }; + if type_ptr != ptr::null() + && self.interpreter != ptr::null_mut() + && (type_ptr == self.string_type || type_ptr == self.utf32_string_type) + { + let len = ptr::null_mut(); + let data = unsafe { tk_sys::Tcl_GetUnicodeFromObj(obj, len) }; + return if size_of::() == 2 { + let v = unsafe { std::slice::from_raw_parts(data as *const u16, len as usize) }; + let s = String::from_utf16(v).unwrap(); + Ok(PyObjectRef::from(vm.ctx.new_str(s))) + } else { + let v = unsafe { std::slice::from_raw_parts(data as *const u32, len as usize) }; + let s = widestring::U32String::from_vec(v).to_string_lossy(); + Ok(PyObjectRef::from(vm.ctx.new_str(s))) + }; + } + let len = ptr::null_mut(); + let s = unsafe { tk_sys::Tcl_GetStringFromObj(obj, len) }; + Self::unicode_from_string(s, len as _, vm) + } + #[pymethod] - fn getvar(&self, name: &str) -> PyResult { - let tk = self.tk.read().unwrap(); - Ok(tk.getvar(name).unwrap()) + fn getvar(&self, arg: PyObjectRef, vm: &VirtualMachine) -> PyResult { + // TODO: technically not thread safe + let name = varname_converter(arg, vm)?; + + let res = unsafe { + tk_sys::Tcl_GetVar2Ex( + self.interpreter, + ptr::null(), + name.as_ptr() as _, + tk_sys::TCL_LEAVE_ERR_MSG as _, + ) + }; + if res == ptr::null_mut() { + todo!(); + } + let res = if self.want_objects { + self.from_object(res, vm) + } else { + self.unicode_from_object(res, vm) + }?; + Ok(res) + } + + #[pymethod] + fn getint(&self, arg: PyObjectRef, vm: &VirtualMachine) -> PyResult { + if let Some(int) = arg.downcast_ref::() { + return Ok(PyObjectRef::from(vm.ctx.new_int(int.as_bigint().clone()))); + } + + if let Some(obj) = arg.downcast_ref::() { + let value = obj.value; + unsafe { tk_sys::Tcl_IncrRefCount(value) }; + } else { + todo!(); + } + todo!(); + } + // TODO: Fix arguments + #[pymethod] + fn mainloop(&self, threshold: Option) -> PyResult<()> { + let threshold = threshold.unwrap_or(0); + todo!(); } #[pymethod] - fn createcommand(&self, name: String, callback: PyObjectRef) {} + fn quit(&self) { + QUIT_MAIN_LOOP.store(true, Ordering::Relaxed); + } } + + #[pyfunction] + fn create(args: TkAppConstructorArgs, vm: &VirtualMachine) -> PyResult { + unsafe { + let interp = tk_sys::Tcl_CreateInterp(); + let want_objects = args.wantobjects != 0; + let threaded = { + let part1 = String::from("tcl_platform"); + let part2 = String::from("threaded"); + let part1_ptr = part1.as_ptr(); + let part2_ptr = part2.as_ptr(); + tk_sys::Tcl_GetVar2Ex( + interp, + part1_ptr as _, + part2_ptr as _, + tk_sys::TCL_GLOBAL_ONLY as ffi::c_int, + ) + } != ptr::null_mut(); + let thread_id = tk_sys::Tcl_GetCurrentThread(); + let dispatching = false; + let trace = None; + // TODO: Handle threaded build + let bool_str = String::from("oldBoolean"); + let old_boolean_type = tk_sys::Tcl_GetObjType(bool_str.as_ptr() as _); + let (boolean_type, byte_array_type) = { + let true_str = String::from("true"); + let mut value = *tk_sys::Tcl_NewStringObj(true_str.as_ptr() as _, -1); + let mut bool_value = 0; + tk_sys::Tcl_GetBooleanFromObj(interp, &mut value, &mut bool_value); + let boolean_type = value.typePtr; + tk_sys::Tcl_DecrRefCount(&mut value); + + let mut value = + *tk_sys::Tcl_NewByteArrayObj(&bool_value as *const i32 as *const u8, 1); + let byte_array_type = value.typePtr; + tk_sys::Tcl_DecrRefCount(&mut value); + (boolean_type, byte_array_type) + }; + let double_str = String::from("double"); + let double_type = tk_sys::Tcl_GetObjType(double_str.as_ptr() as _); + let int_str = String::from("int"); + let int_type = tk_sys::Tcl_GetObjType(int_str.as_ptr() as _); + let int_type = if int_type == ptr::null() { + let mut value = *tk_sys::Tcl_NewIntObj(0); + let res = value.typePtr; + tk_sys::Tcl_DecrRefCount(&mut value); + res + } else { + int_type + }; + let wide_int_str = String::from("wideInt"); + let wide_int_type = tk_sys::Tcl_GetObjType(wide_int_str.as_ptr() as _); + let bignum_str = String::from("bignum"); + let bignum_type = tk_sys::Tcl_GetObjType(bignum_str.as_ptr() as _); + let list_str = String::from("list"); + let list_type = tk_sys::Tcl_GetObjType(list_str.as_ptr() as _); + let string_str = String::from("string"); + let string_type = tk_sys::Tcl_GetObjType(string_str.as_ptr() as _); + let utf32_str = String::from("utf32"); + let utf32_string_type = tk_sys::Tcl_GetObjType(utf32_str.as_ptr() as _); + let pixel_str = String::from("pixel"); + let pixel_type = tk_sys::Tcl_GetObjType(pixel_str.as_ptr() as _); + + let exit_str = String::from("exit"); + tk_sys::Tcl_DeleteCommand(interp, exit_str.as_ptr() as _); + + if let Some(name) = args.screen_name { + tk_sys::Tcl_SetVar2( + interp, + "env".as_ptr() as _, + "DISPLAY".as_ptr() as _, + name.as_ptr() as _, + tk_sys::TCL_GLOBAL_ONLY as i32, + ); + } + + if args.interactive != 0 { + tk_sys::Tcl_SetVar( + interp, + "tcl_interactive".as_ptr() as _, + "1".as_ptr() as _, + tk_sys::TCL_GLOBAL_ONLY as i32, + ); + } else { + tk_sys::Tcl_SetVar( + interp, + "tcl_interactive".as_ptr() as _, + "0".as_ptr() as _, + tk_sys::TCL_GLOBAL_ONLY as i32, + ); + } + + let argv0 = args.class_name.clone().to_lowercase(); + tk_sys::Tcl_SetVar( + interp, + "argv0".as_ptr() as _, + argv0.as_ptr() as _, + tk_sys::TCL_GLOBAL_ONLY as i32, + ); + + if !args.want_tk { + tk_sys::Tcl_SetVar( + interp, + "_tkinter_skip_tk_init".as_ptr() as _, + "1".as_ptr() as _, + tk_sys::TCL_GLOBAL_ONLY as i32, + ); + } + + if args.sync != 0 || args.use_.is_some() { + let mut argv = String::with_capacity(4); + if args.sync != 0 { + argv.push_str("-sync"); + } + if args.use_.is_some() { + if args.sync != 0 { + argv.push(' '); + } + argv.push_str("-use "); + argv.push_str(&args.use_.unwrap()); + } + argv.push_str("\0"); + let argv_ptr = argv.as_ptr() as *mut *mut i8; + tk_sys::Tcl_SetVar( + interp, + "argv".as_ptr() as _, + argv_ptr as *const i8, + tk_sys::TCL_GLOBAL_ONLY as i32, + ); + } + + #[cfg(windows)] + { + let ret = std::env::var("TCL_LIBRARY"); + if ret.is_err() { + let loc = _get_tcl_lib_path(); + std::env::set_var("TCL_LIBRARY", loc); + } + } + + // Bindgen cannot handle Tcl_AppInit + if tk_sys::Tcl_Init(interp) != tk_sys::TCL_OK as ffi::c_int { + todo!("Tcl_Init failed"); + } + + Ok(TkApp { + interpreter: interp, + want_objects, + threaded, + thread_id: Some(thread_id), + dispatching, + trace, + old_boolean_type, + boolean_type, + byte_array_type, + double_type, + int_type, + wide_int_type, + bignum_type, + list_type, + string_type, + utf32_string_type, + pixel_type, + } + .into_pyobject(vm)) + } + } + + #[pyattr] + const READABLE: i32 = tk_sys::TCL_READABLE as i32; + #[pyattr] + const WRITABLE: i32 = tk_sys::TCL_WRITABLE as i32; + #[pyattr] + const EXCEPTION: i32 = tk_sys::TCL_EXCEPTION as i32; + + #[pyattr] + const TIMER_EVENTS: i32 = tk_sys::TCL_TIMER_EVENTS as i32; + #[pyattr] + const IDLE_EVENTS: i32 = tk_sys::TCL_IDLE_EVENTS as i32; + #[pyattr] + const DONT_WAIT: i32 = tk_sys::TCL_DONT_WAIT as i32; } From 3c6bc2cf9f83a77cdeeb850799bc19f160bdf145 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Sun, 6 Apr 2025 01:22:26 -0700 Subject: [PATCH 020/126] Add _suggestions module (#5675) --- stdlib/src/lib.rs | 2 ++ stdlib/src/suggestions.rs | 20 ++++++++++++++++++++ vm/src/suggestion.rs | 2 +- 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 stdlib/src/suggestions.rs diff --git a/stdlib/src/lib.rs b/stdlib/src/lib.rs index 9ee8e3e81d..e9d10dfde4 100644 --- a/stdlib/src/lib.rs +++ b/stdlib/src/lib.rs @@ -33,6 +33,7 @@ mod pyexpat; mod pystruct; mod random; mod statistics; +mod suggestions; // TODO: maybe make this an extension module, if we ever get those // mod re; #[cfg(feature = "bz2")] @@ -133,6 +134,7 @@ pub fn get_module_inits() -> impl Iterator, StdlibInit "unicodedata" => unicodedata::make_module, "zlib" => zlib::make_module, "_statistics" => statistics::make_module, + "suggestions" => suggestions::make_module, // crate::vm::sysmodule::sysconfigdata_name() => sysconfigdata::make_module, } #[cfg(any(unix, target_os = "wasi"))] diff --git a/stdlib/src/suggestions.rs b/stdlib/src/suggestions.rs new file mode 100644 index 0000000000..e49e9dd4a4 --- /dev/null +++ b/stdlib/src/suggestions.rs @@ -0,0 +1,20 @@ +pub(crate) use _suggestions::make_module; + +#[pymodule] +mod _suggestions { + use rustpython_vm::VirtualMachine; + + use crate::vm::PyObjectRef; + + #[pyfunction] + fn _generate_suggestions( + candidates: Vec, + name: PyObjectRef, + vm: &VirtualMachine, + ) -> PyObjectRef { + match crate::vm::suggestion::calculate_suggestions(candidates.iter(), &name) { + Some(suggestion) => suggestion.into(), + None => vm.ctx.none(), + } + } +} diff --git a/vm/src/suggestion.rs b/vm/src/suggestion.rs index 01f53d70ca..3d075ee3bb 100644 --- a/vm/src/suggestion.rs +++ b/vm/src/suggestion.rs @@ -12,7 +12,7 @@ use std::iter::ExactSizeIterator; const MAX_CANDIDATE_ITEMS: usize = 750; -fn calculate_suggestions<'a>( +pub fn calculate_suggestions<'a>( dir_iter: impl ExactSizeIterator, name: &PyObjectRef, ) -> Option { From 861055f558779e307fae56a09274e7e482c5f770 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Sun, 6 Apr 2025 01:23:56 -0700 Subject: [PATCH 021/126] Add nt constants (#5676) --- vm/src/stdlib/nt.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/vm/src/stdlib/nt.rs b/vm/src/stdlib/nt.rs index 428d3421fd..083824bcd0 100644 --- a/vm/src/stdlib/nt.rs +++ b/vm/src/stdlib/nt.rs @@ -37,7 +37,13 @@ pub(crate) mod module { use libc::{O_BINARY, O_TEMPORARY}; #[pyattr] - const _LOAD_LIBRARY_SEARCH_DEFAULT_DIRS: i32 = 4096; + use windows_sys::Win32::System::LibraryLoader::{ + LOAD_LIBRARY_SEARCH_APPLICATION_DIR as _LOAD_LIBRARY_SEARCH_APPLICATION_DIR, + LOAD_LIBRARY_SEARCH_DEFAULT_DIRS as _LOAD_LIBRARY_SEARCH_DEFAULT_DIRS, + LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR as _LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR, + LOAD_LIBRARY_SEARCH_SYSTEM32 as _LOAD_LIBRARY_SEARCH_SYSTEM32, + LOAD_LIBRARY_SEARCH_USER_DIRS as _LOAD_LIBRARY_SEARCH_USER_DIRS, + }; #[pyfunction] pub(super) fn access(path: OsPath, mode: u8, vm: &VirtualMachine) -> PyResult { From ad5ffb648f2c0cb73152bdb75befa6e4b4e6ab52 Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Mon, 7 Apr 2025 21:37:47 -0700 Subject: [PATCH 022/126] Remove packaging from release (#5680) --- .github/workflows/release.yml | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3b0a797e0c..f6a1ad3209 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -80,20 +80,6 @@ jobs: run: cp target/${{ matrix.platform.target }}/release/rustpython.exe target/rustpython-release-${{ runner.os }}-${{ matrix.platform.target }}.exe if: runner.os == 'Windows' - - name: Install cargo-packager - run: cargo binstall cargo-packager - - - name: Generate MSI - if: runner.os == 'Windows' - run: cargo packager -f wix --release -o installer - - - name: Upload MSI - if: runner.os == 'Windows' - uses: actions/upload-artifact@v4 - with: - name: rustpython-installer-msi-${{ runner.os }}-${{ matrix.platform.target }} - path: installer/*.msi - - name: Upload Binary Artifacts uses: actions/upload-artifact@v4 with: From 8dc17180029337676395154fc33b513678ea4a8f Mon Sep 17 00:00:00 2001 From: Ashwin Naren Date: Wed, 9 Apr 2025 22:00:54 -0700 Subject: [PATCH 023/126] Match statements rewrite (#5628) --- Cargo.lock | 1 + compiler/codegen/Cargo.toml | 3 +- compiler/codegen/src/compile.rs | 1022 +++++++++++++++-- compiler/codegen/src/error.rs | 27 +- ...python_codegen__compile__tests__match.snap | 53 + compiler/core/src/bytecode.rs | 41 +- extra_tests/snippets/syntax_match.py | 50 + vm/src/frame.rs | 109 ++ 8 files changed, 1192 insertions(+), 114 deletions(-) create mode 100644 compiler/codegen/src/snapshots/rustpython_codegen__compile__tests__match.snap create mode 100644 extra_tests/snippets/syntax_match.py diff --git a/Cargo.lock b/Cargo.lock index f3a8f59af3..60f0c44ce1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2187,6 +2187,7 @@ dependencies = [ "num-complex", "num-traits", "ruff_python_ast", + "ruff_python_parser", "ruff_source_file", "ruff_text_size", "rustpython-compiler-core", diff --git a/compiler/codegen/Cargo.toml b/compiler/codegen/Cargo.toml index 53469b9f6e..479b0b29f6 100644 --- a/compiler/codegen/Cargo.toml +++ b/compiler/codegen/Cargo.toml @@ -33,8 +33,7 @@ memchr = { workspace = true } unicode_names2 = { workspace = true } [dev-dependencies] -# rustpython-parser = { workspace = true } - +ruff_python_parser = { workspace = true } insta = { workspace = true } [lints] diff --git a/compiler/codegen/src/compile.rs b/compiler/codegen/src/compile.rs index 83e2f5cf44..c8474a537f 100644 --- a/compiler/codegen/src/compile.rs +++ b/compiler/codegen/src/compile.rs @@ -9,8 +9,8 @@ use crate::{ IndexSet, ToPythonName, - error::{CodegenError, CodegenErrorType}, - ir, + error::{CodegenError, CodegenErrorType, PatternUnreachableReason}, + ir::{self, BlockIdx}, symboltable::{self, SymbolFlags, SymbolScope, SymbolTable}, unparse::unparse_expr, }; @@ -22,10 +22,11 @@ use ruff_python_ast::{ Alias, Arguments, BoolOp, CmpOp, Comprehension, ConversionFlag, DebugText, Decorator, DictItem, ExceptHandler, ExceptHandlerExceptHandler, Expr, ExprAttribute, ExprBoolOp, ExprFString, ExprList, ExprName, ExprStarred, ExprSubscript, ExprTuple, ExprUnaryOp, FString, - FStringElement, FStringElements, FStringFlags, FStringPart, Int, Keyword, MatchCase, - ModExpression, ModModule, Operator, Parameters, Pattern, PatternMatchAs, PatternMatchValue, - Stmt, StmtExpr, TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple, - TypeParams, UnaryOp, WithItem, + FStringElement, FStringElements, FStringFlags, FStringPart, Identifier, Int, Keyword, + MatchCase, ModExpression, ModModule, Operator, Parameters, Pattern, PatternMatchAs, + PatternMatchClass, PatternMatchOr, PatternMatchSequence, PatternMatchSingleton, + PatternMatchStar, PatternMatchValue, Singleton, Stmt, StmtExpr, TypeParam, TypeParamParamSpec, + TypeParamTypeVar, TypeParamTypeVarTuple, TypeParams, UnaryOp, WithItem, }; use ruff_source_file::OneIndexed; use ruff_text_size::{Ranged, TextRange}; @@ -33,7 +34,10 @@ use rustpython_wtf8::Wtf8Buf; // use rustpython_ast::located::{self as located_ast, Located}; use rustpython_compiler_core::{ Mode, - bytecode::{self, Arg as OpArgMarker, CodeObject, ConstantData, Instruction, OpArg, OpArgType}, + bytecode::{ + self, Arg as OpArgMarker, BinaryOperator, CodeObject, ComparisonOperator, ConstantData, + Instruction, OpArg, OpArgType, UnpackExArgs, + }, }; use rustpython_compiler_source::SourceCode; // use rustpython_parser_core::source_code::{LineNumber, SourceLocation}; @@ -206,10 +210,43 @@ macro_rules! emit { }; } -struct PatternContext { - current_block: usize, - blocks: Vec, - allow_irrefutable: bool, +/// The pattern context holds information about captured names and jump targets. +#[derive(Clone)] +pub struct PatternContext { + /// A list of names captured by the pattern. + pub stores: Vec, + /// If false, then any name captures against our subject will raise. + pub allow_irrefutable: bool, + /// A list of jump target labels used on pattern failure. + pub fail_pop: Vec, + /// The number of items on top of the stack that should remain. + pub on_top: usize, +} + +impl Default for PatternContext { + fn default() -> Self { + Self::new() + } +} + +impl PatternContext { + pub fn new() -> Self { + PatternContext { + stores: Vec::new(), + allow_irrefutable: false, + fail_pop: Vec::new(), + on_top: 0, + } + } + + pub fn fail_pop_size(&self) -> usize { + self.fail_pop.len() + } +} + +enum JumpOp { + Jump, + PopJumpIfFalse, } impl<'src> Compiler<'src> { @@ -1800,73 +1837,824 @@ impl Compiler<'_> { Ok(()) } - fn compile_pattern_value( + fn forbidden_name(&mut self, name: &str, ctx: NameUsage) -> CompileResult { + if ctx == NameUsage::Store && name == "__debug__" { + return Err(self.error(CodegenErrorType::Assign("__debug__"))); + // return Ok(true); + } + if ctx == NameUsage::Delete && name == "__debug__" { + return Err(self.error(CodegenErrorType::Delete("__debug__"))); + // return Ok(true); + } + Ok(false) + } + + fn compile_error_forbidden_name(&mut self, name: &str) -> CodegenError { + // TODO: make into error (fine for now since it realistically errors out earlier) + panic!("Failing due to forbidden name {:?}", name); + } + + /// Ensures that `pc.fail_pop` has at least `n + 1` entries. + /// If not, new labels are generated and pushed until the required size is reached. + fn ensure_fail_pop(&mut self, pc: &mut PatternContext, n: usize) -> CompileResult<()> { + let required_size = n + 1; + if required_size <= pc.fail_pop.len() { + return Ok(()); + } + while pc.fail_pop.len() < required_size { + let new_block = self.new_block(); + pc.fail_pop.push(new_block); + } + Ok(()) + } + + fn jump_to_fail_pop(&mut self, pc: &mut PatternContext, op: JumpOp) -> CompileResult<()> { + // Compute the total number of items to pop: + // items on top plus the captured objects. + let pops = pc.on_top + pc.stores.len(); + // Ensure that the fail_pop vector has at least `pops + 1` elements. + self.ensure_fail_pop(pc, pops)?; + // Emit a jump using the jump target stored at index `pops`. + match op { + JumpOp::Jump => { + emit!( + self, + Instruction::Jump { + target: pc.fail_pop[pops] + } + ); + } + JumpOp::PopJumpIfFalse => { + emit!( + self, + Instruction::JumpIfFalse { + target: pc.fail_pop[pops] + } + ); + } + } + Ok(()) + } + + /// Emits the necessary POP instructions for all failure targets in the pattern context, + /// then resets the fail_pop vector. + fn emit_and_reset_fail_pop(&mut self, pc: &mut PatternContext) -> CompileResult<()> { + // If the fail_pop vector is empty, nothing needs to be done. + if pc.fail_pop.is_empty() { + debug_assert!(pc.fail_pop.is_empty()); + return Ok(()); + } + // Iterate over the fail_pop vector in reverse order, skipping the first label. + for &label in pc.fail_pop.iter().skip(1).rev() { + self.switch_to_block(label); + // Emit the POP instruction. + emit!(self, Instruction::Pop); + } + // Finally, use the first label. + self.switch_to_block(pc.fail_pop[0]); + pc.fail_pop.clear(); + // Free the memory used by the vector. + pc.fail_pop.shrink_to_fit(); + Ok(()) + } + + /// Duplicate the effect of Python 3.10's ROT_* instructions using SWAPs. + fn pattern_helper_rotate(&mut self, mut count: usize) -> CompileResult<()> { + while count > 1 { + // Emit a SWAP instruction with the current count. + emit!( + self, + Instruction::Swap { + index: u32::try_from(count).unwrap() + } + ); + count -= 1; + } + Ok(()) + } + + /// Helper to store a captured name for a star pattern. + /// + /// If `n` is `None`, it emits a POP_TOP instruction. Otherwise, it first + /// checks that the name is allowed and not already stored. Then it rotates + /// the object on the stack beneath any preserved items and appends the name + /// to the list of captured names. + fn pattern_helper_store_name( + &mut self, + n: Option<&Identifier>, + pc: &mut PatternContext, + ) -> CompileResult<()> { + // If no name is provided, simply pop the top of the stack. + if n.is_none() { + emit!(self, Instruction::Pop); + return Ok(()); + } + let name = n.unwrap(); + + // Check if the name is forbidden for storing. + if self.forbidden_name(name.as_str(), NameUsage::Store)? { + return Err(self.compile_error_forbidden_name(name.as_str())); + } + + // Ensure we don't store the same name twice. + if pc.stores.contains(&name.to_string()) { + return Err(self.error(CodegenErrorType::DuplicateStore(name.as_str().to_string()))); + } + + // Calculate how many items to rotate: + // the count is the number of items to preserve on top plus the current stored names, + // plus one for the new value. + let rotations = pc.on_top + pc.stores.len() + 1; + self.pattern_helper_rotate(rotations)?; + + // Append the name to the captured stores. + pc.stores.push(name.to_string()); + Ok(()) + } + + fn pattern_unpack_helper(&mut self, elts: &[Pattern]) -> CompileResult<()> { + let n = elts.len(); + let mut seen_star = false; + for (i, elt) in elts.iter().enumerate() { + if elt.is_match_star() { + if !seen_star { + if i >= (1 << 8) || (n - i - 1) >= ((i32::MAX as usize) >> 8) { + todo!(); + // return self.compiler_error(loc, "too many expressions in star-unpacking sequence pattern"); + } + let args = UnpackExArgs { + before: u8::try_from(i).unwrap(), + after: u8::try_from(n - i - 1).unwrap(), + }; + emit!(self, Instruction::UnpackEx { args }); + seen_star = true; + } else { + // TODO: Fix error msg + return Err(self.error(CodegenErrorType::MultipleStarArgs)); + // return self.compiler_error(loc, "multiple starred expressions in sequence pattern"); + } + } + } + if !seen_star { + emit!( + self, + Instruction::UnpackSequence { + size: u32::try_from(n).unwrap() + } + ); + } + Ok(()) + } + + fn pattern_helper_sequence_unpack( &mut self, - value: &PatternMatchValue, - _pattern_context: &mut PatternContext, + patterns: &[Pattern], + _star: Option, + pc: &mut PatternContext, ) -> CompileResult<()> { - use crate::compile::bytecode::ComparisonOperator::*; + // Unpack the sequence into individual subjects. + self.pattern_unpack_helper(patterns)?; + let size = patterns.len(); + // Increase the on_top counter for the newly unpacked subjects. + pc.on_top += size; + // For each unpacked subject, compile its subpattern. + for pattern in patterns { + // Decrement on_top for each subject as it is consumed. + pc.on_top -= 1; + self.compile_pattern_subpattern(pattern, pc)?; + } + Ok(()) + } - self.compile_expression(&value.value)?; - emit!(self, Instruction::CompareOperation { op: Equal }); + fn pattern_helper_sequence_subscr( + &mut self, + patterns: &[Pattern], + star: usize, + pc: &mut PatternContext, + ) -> CompileResult<()> { + // Keep the subject around for extracting elements. + pc.on_top += 1; + for (i, pattern) in patterns.iter().enumerate() { + // if pattern.is_wildcard() { + // continue; + // } + if i == star { + // This must be a starred wildcard. + // assert!(pattern.is_star_wildcard()); + continue; + } + // Duplicate the subject. + emit!(self, Instruction::CopyItem { index: 1_u32 }); + if i < star { + // For indices before the star, use a nonnegative index equal to i. + self.emit_load_const(ConstantData::Integer { value: i.into() }); + } else { + // For indices after the star, compute a nonnegative index: + // index = len(subject) - (size - i) + emit!(self, Instruction::GetLen); + self.emit_load_const(ConstantData::Integer { + value: (patterns.len() - 1).into(), + }); + // Subtract to compute the correct index. + emit!( + self, + Instruction::BinaryOperation { + op: BinaryOperator::Subtract + } + ); + } + // Use BINARY_OP/NB_SUBSCR to extract the element. + emit!(self, Instruction::BinarySubscript); + // Compile the subpattern in irrefutable mode. + self.compile_pattern_subpattern(pattern, pc)?; + } + // Pop the subject off the stack. + pc.on_top -= 1; + emit!(self, Instruction::Pop); + Ok(()) + } + + fn compile_pattern_subpattern( + &mut self, + p: &Pattern, + pc: &mut PatternContext, + ) -> CompileResult<()> { + // Save the current allow_irrefutable state. + let old_allow_irrefutable = pc.allow_irrefutable; + // Temporarily allow irrefutable patterns. + pc.allow_irrefutable = true; + // Compile the pattern. + self.compile_pattern(p, pc)?; + // Restore the original state. + pc.allow_irrefutable = old_allow_irrefutable; Ok(()) } fn compile_pattern_as( &mut self, - as_pattern: &PatternMatchAs, - pattern_context: &mut PatternContext, + p: &PatternMatchAs, + pc: &mut PatternContext, ) -> CompileResult<()> { - if as_pattern.pattern.is_none() && !pattern_context.allow_irrefutable { - // TODO: better error message - if let Some(_name) = as_pattern.name.as_ref() { - return Err(self.error_ranged(CodegenErrorType::InvalidMatchCase, as_pattern.range)); + // If there is no sub-pattern, then it's an irrefutable match. + if p.pattern.is_none() { + if !pc.allow_irrefutable { + if let Some(_name) = p.name.as_ref() { + // TODO: This error message does not match cpython exactly + // A name capture makes subsequent patterns unreachable. + return Err(self.error(CodegenErrorType::UnreachablePattern( + PatternUnreachableReason::NameCapture, + ))); + } else { + // A wildcard makes remaining patterns unreachable. + return Err(self.error(CodegenErrorType::UnreachablePattern( + PatternUnreachableReason::Wildcard, + ))); + } } - return Err(self.error_ranged(CodegenErrorType::InvalidMatchCase, as_pattern.range)); + // If irrefutable matches are allowed, store the name (if any). + return self.pattern_helper_store_name(p.name.as_ref(), pc); } - // Need to make a copy for (possibly) storing later: - emit!(self, Instruction::Duplicate); - if let Some(pattern) = &as_pattern.pattern { - self.compile_pattern_inner(pattern, pattern_context)?; + + // Otherwise, there is a sub-pattern. Duplicate the object on top of the stack. + pc.on_top += 1; + emit!(self, Instruction::CopyItem { index: 1_u32 }); + // Compile the sub-pattern. + self.compile_pattern(p.pattern.as_ref().unwrap(), pc)?; + // After success, decrement the on_top counter. + pc.on_top -= 1; + // Store the captured name (if any). + self.pattern_helper_store_name(p.name.as_ref(), pc)?; + Ok(()) + } + + fn compile_pattern_star( + &mut self, + p: &PatternMatchStar, + pc: &mut PatternContext, + ) -> CompileResult<()> { + self.pattern_helper_store_name(p.name.as_ref(), pc)?; + Ok(()) + } + + /// Validates that keyword attributes in a class pattern are allowed + /// and not duplicated. + fn validate_kwd_attrs( + &mut self, + attrs: &[Identifier], + _patterns: &[Pattern], + ) -> CompileResult<()> { + let nattrs = attrs.len(); + for i in 0..nattrs { + let attr = attrs[i].as_str(); + // Check if the attribute name is forbidden in a Store context. + if self.forbidden_name(attr, NameUsage::Store)? { + // Return an error if the name is forbidden. + return Err(self.compile_error_forbidden_name(attr)); + } + // Check for duplicates: compare with every subsequent attribute. + for ident in attrs.iter().take(nattrs).skip(i + 1) { + let other = ident.as_str(); + if attr == other { + todo!(); + // return Err(self.compiler_error( + // &format!("attribute name repeated in class pattern: {}", attr), + // )); + } + } } - if let Some(name) = as_pattern.name.as_ref() { - self.store_name(name.as_str())?; - } else { - emit!(self, Instruction::Pop); + Ok(()) + } + + fn compile_pattern_class( + &mut self, + p: &PatternMatchClass, + pc: &mut PatternContext, + ) -> CompileResult<()> { + // Extract components from the MatchClass pattern. + let match_class = p; + let patterns = &match_class.arguments.patterns; + + // Extract keyword attributes and patterns. + // Capacity is pre-allocated based on the number of keyword arguments. + let mut kwd_attrs = Vec::with_capacity(match_class.arguments.keywords.len()); + let mut kwd_patterns = Vec::with_capacity(match_class.arguments.keywords.len()); + for kwd in &match_class.arguments.keywords { + kwd_attrs.push(kwd.attr.clone()); + kwd_patterns.push(kwd.pattern.clone()); + } + + let nargs = patterns.len(); + let nattrs = kwd_attrs.len(); + let nkwd_patterns = kwd_patterns.len(); + + // Validate that keyword attribute names and patterns match in length. + if nattrs != nkwd_patterns { + let msg = format!( + "kwd_attrs ({}) / kwd_patterns ({}) length mismatch in class pattern", + nattrs, nkwd_patterns + ); + unreachable!("{}", msg); + } + + // Check for too many sub-patterns. + if nargs > u32::MAX as usize || (nargs + nattrs).saturating_sub(1) > i32::MAX as usize { + let msg = format!( + "too many sub-patterns in class pattern {:?}", + match_class.cls + ); + panic!("{}", msg); + // return self.compiler_error(&msg); + } + + // Validate keyword attributes if any. + if nattrs != 0 { + self.validate_kwd_attrs(&kwd_attrs, &kwd_patterns)?; + } + + // Compile the class expression. + self.compile_expression(&match_class.cls)?; + + // Create a new tuple of attribute names. + let mut attr_names = vec![]; + for name in kwd_attrs.iter() { + // Py_NewRef(name) is emulated by cloning the name into a PyObject. + attr_names.push(ConstantData::Str { + value: name.as_str().to_string().into(), + }); + } + + // Emit instructions: + // 1. Load the new tuple of attribute names. + self.emit_load_const(ConstantData::Tuple { + elements: attr_names, + }); + // 2. Emit MATCH_CLASS with nargs. + emit!(self, Instruction::MatchClass(u32::try_from(nargs).unwrap())); + // 3. Duplicate the top of the stack. + emit!(self, Instruction::CopyItem { index: 1_u32 }); + // 4. Load None. + self.emit_load_const(ConstantData::None); + // 5. Compare with IS_OP 1. + emit!(self, Instruction::IsOperation(true)); + + // At this point the TOS is a tuple of (nargs + nattrs) attributes (or None). + pc.on_top += 1; + self.jump_to_fail_pop(pc, JumpOp::PopJumpIfFalse)?; + + // Unpack the tuple into (nargs + nattrs) items. + let total = nargs + nattrs; + emit!( + self, + Instruction::UnpackSequence { + size: u32::try_from(total).unwrap() + } + ); + pc.on_top += total; + pc.on_top -= 1; + + // Process each sub-pattern. + for i in 0..total { + // Decrement the on_top counter as each sub-pattern is processed. + pc.on_top -= 1; + let subpattern = if i < nargs { + // Positional sub-pattern. + &patterns[i] + } else { + // Keyword sub-pattern. + &kwd_patterns[i - nargs] + }; + if subpattern.is_wildcard() { + // For wildcard patterns, simply pop the top of the stack. + emit!(self, Instruction::Pop); + continue; + } + // Compile the subpattern without irrefutability checks. + self.compile_pattern_subpattern(subpattern, pc)?; } Ok(()) } - fn compile_pattern_inner( + // fn compile_pattern_mapping(&mut self, p: &PatternMatchMapping, pc: &mut PatternContext) -> CompileResult<()> { + // // Ensure the pattern is a mapping pattern. + // let mapping = p; // Extract MatchMapping-specific data. + // let keys = &mapping.keys; + // let patterns = &mapping.patterns; + // let size = keys.len(); + // let npatterns = patterns.len(); + + // if size != npatterns { + // panic!("keys ({}) / patterns ({}) length mismatch in mapping pattern", size, npatterns); + // // return self.compiler_error( + // // &format!("keys ({}) / patterns ({}) length mismatch in mapping pattern", size, npatterns) + // // ); + // } + + // // A double-star target is present if `rest` is set. + // let star_target = mapping.rest; + + // // Keep the subject on top during the mapping and length checks. + // pc.on_top += 1; + // emit!(self, Instruction::MatchMapping); + // self.jump_to_fail_pop(pc, JumpOp::PopJumpIfFalse)?; + + // // If the pattern is just "{}" (empty mapping) and there's no star target, + // // we're done—pop the subject. + // if size == 0 && star_target.is_none() { + // pc.on_top -= 1; + // emit!(self, Instruction::Pop); + // return Ok(()); + // } + + // // If there are any keys, perform a length check. + // if size != 0 { + // emit!(self, Instruction::GetLen); + // self.emit_load_const(ConstantData::Integer { value: size.into() }); + // emit!(self, Instruction::CompareOperation { op: ComparisonOperator::GreaterOrEqual }); + // self.jump_to_fail_pop(pc, JumpOp::PopJumpIfFalse)?; + // } + + // // Check that the number of subpatterns is not absurd. + // if size.saturating_sub(1) > (i32::MAX as usize) { + // panic!("too many sub-patterns in mapping pattern"); + // // return self.compiler_error("too many sub-patterns in mapping pattern"); + // } + + // // Collect all keys into a set for duplicate checking. + // let mut seen = HashSet::new(); + + // // For each key, validate it and check for duplicates. + // for (i, key) in keys.iter().enumerate() { + // if let Some(key_val) = key.as_literal_expr() { + // let in_seen = seen.contains(&key_val); + // if in_seen { + // panic!("mapping pattern checks duplicate key: {:?}", key_val); + // // return self.compiler_error(format!("mapping pattern checks duplicate key: {:?}", key_val)); + // } + // seen.insert(key_val); + // } else if !key.is_attribute_expr() { + // panic!("mapping pattern keys may only match literals and attribute lookups"); + // // return self.compiler_error("mapping pattern keys may only match literals and attribute lookups"); + // } + + // // Visit the key expression. + // self.compile_expression(key)?; + // } + // // Drop the set (its resources will be freed automatically). + + // // Build a tuple of keys and emit MATCH_KEYS. + // emit!(self, Instruction::BuildTuple { size: size as u32 }); + // emit!(self, Instruction::MatchKeys); + // // Now, on top of the subject there are two new tuples: one of keys and one of values. + // pc.on_top += 2; + + // // Prepare for matching the values. + // emit!(self, Instruction::CopyItem { index: 1_u32 }); + // self.emit_load_const(ConstantData::None); + // // TODO: should be is + // emit!(self, Instruction::IsOperation(true)); + // self.jump_to_fail_pop(pc, JumpOp::PopJumpIfFalse)?; + + // // Unpack the tuple of values. + // emit!(self, Instruction::UnpackSequence { size: size as u32 }); + // pc.on_top += size.saturating_sub(1); + + // // Compile each subpattern in "subpattern" mode. + // for pattern in patterns { + // pc.on_top = pc.on_top.saturating_sub(1); + // self.compile_pattern_subpattern(pattern, pc)?; + // } + + // // Consume the tuple of keys and the subject. + // pc.on_top = pc.on_top.saturating_sub(2); + // if let Some(star_target) = star_target { + // // If we have a starred name, bind a dict of remaining items to it. + // // This sequence of instructions performs: + // // rest = dict(subject) + // // for key in keys: del rest[key] + // emit!(self, Instruction::BuildMap { size: 0 }); // Build an empty dict. + // emit!(self, Instruction::Swap(3)); // Rearrange stack: [empty, keys, subject] + // emit!(self, Instruction::DictUpdate { size: 2 }); // Update dict with subject. + // emit!(self, Instruction::UnpackSequence { size: size as u32 }); // Unpack keys. + // let mut remaining = size; + // while remaining > 0 { + // emit!(self, Instruction::CopyItem { index: 1 + remaining as u32 }); // Duplicate subject copy. + // emit!(self, Instruction::Swap { index: 2_u32 }); // Bring key to top. + // emit!(self, Instruction::DeleteSubscript); // Delete key from dict. + // remaining -= 1; + // } + // // Bind the dict to the starred target. + // self.pattern_helper_store_name(Some(&star_target), pc)?; + // } else { + // // No starred target: just pop the tuple of keys and the subject. + // emit!(self, Instruction::Pop); + // emit!(self, Instruction::Pop); + // } + // Ok(()) + // } + + fn compile_pattern_or( &mut self, - pattern_type: &Pattern, - pattern_context: &mut PatternContext, + p: &PatternMatchOr, + pc: &mut PatternContext, ) -> CompileResult<()> { - match &pattern_type { - Pattern::MatchValue(value) => self.compile_pattern_value(value, pattern_context), - Pattern::MatchAs(as_pattern) => self.compile_pattern_as(as_pattern, pattern_context), - _ => { - eprintln!("not implemented pattern type: {pattern_type:?}"); - Err(self.error(CodegenErrorType::NotImplementedYet)) + // Ensure the pattern is a MatchOr. + let end = self.new_block(); // Create a new jump target label. + let size = p.patterns.len(); + assert!(size > 1, "MatchOr must have more than one alternative"); + + // Save the current pattern context. + let old_pc = pc.clone(); + // Simulate Py_INCREF on pc.stores by cloning it. + pc.stores = pc.stores.clone(); + let mut control: Option> = None; // Will hold the capture list of the first alternative. + + // Process each alternative. + for (i, alt) in p.patterns.iter().enumerate() { + // Create a fresh empty store for this alternative. + pc.stores = Vec::new(); + // An irrefutable subpattern must be last (if allowed). + pc.allow_irrefutable = (i == size - 1) && old_pc.allow_irrefutable; + // Reset failure targets and the on_top counter. + pc.fail_pop.clear(); + pc.on_top = 0; + // Emit a COPY(1) instruction before compiling the alternative. + emit!(self, Instruction::CopyItem { index: 1_u32 }); + self.compile_pattern(alt, pc)?; + + let nstores = pc.stores.len(); + if i == 0 { + // Save the captured names from the first alternative. + control = Some(pc.stores.clone()); + } else { + let control_vec = control.as_ref().unwrap(); + if nstores != control_vec.len() { + todo!(); + // return self.compiler_error("alternative patterns bind different names"); + } else if nstores > 0 { + // Check that the names occur in the same order. + for icontrol in (0..nstores).rev() { + let name = &control_vec[icontrol]; + // Find the index of `name` in the current stores. + let istores = pc.stores.iter().position(|n| n == name).unwrap(); + // .ok_or_else(|| self.compiler_error("alternative patterns bind different names"))?; + if icontrol != istores { + // The orders differ; we must reorder. + assert!(istores < icontrol, "expected istores < icontrol"); + let rotations = istores + 1; + // Rotate pc.stores: take a slice of the first `rotations` items... + let rotated = pc.stores[0..rotations].to_vec(); + // Remove those elements. + for _ in 0..rotations { + pc.stores.remove(0); + } + // Insert the rotated slice at the appropriate index. + let insert_pos = icontrol - istores; + for (j, elem) in rotated.into_iter().enumerate() { + pc.stores.insert(insert_pos + j, elem); + } + // Also perform the same rotation on the evaluation stack. + for _ in 0..(istores + 1) { + self.pattern_helper_rotate(icontrol + 1)?; + } + } + } + } + } + // Emit a jump to the common end label and reset any failure jump targets. + emit!(self, Instruction::Jump { target: end }); + self.emit_and_reset_fail_pop(pc)?; + } + + // Restore the original pattern context. + *pc = old_pc.clone(); + // Simulate Py_INCREF on pc.stores. + pc.stores = pc.stores.clone(); + // In C, old_pc.fail_pop is set to NULL to avoid freeing it later. + // In Rust, old_pc is a local clone, so we need not worry about that. + + // No alternative matched: pop the subject and fail. + emit!(self, Instruction::Pop); + self.jump_to_fail_pop(pc, JumpOp::Jump)?; + + // Use the label "end". + self.switch_to_block(end); + + // Adjust the final captures. + let nstores = control.as_ref().unwrap().len(); + let nrots = nstores + 1 + pc.on_top + pc.stores.len(); + for i in 0..nstores { + // Rotate the capture to its proper place. + self.pattern_helper_rotate(nrots)?; + let name = &control.as_ref().unwrap()[i]; + // Check for duplicate binding. + if pc.stores.iter().any(|n| n == name) { + return Err(self.error(CodegenErrorType::DuplicateStore(name.to_string()))); } + pc.stores.push(name.clone()); } + + // Old context and control will be dropped automatically. + // Finally, pop the copy of the subject. + emit!(self, Instruction::Pop); + Ok(()) } - fn compile_pattern( + fn compile_pattern_sequence( &mut self, - pattern_type: &Pattern, - pattern_context: &mut PatternContext, + p: &PatternMatchSequence, + pc: &mut PatternContext, ) -> CompileResult<()> { - self.compile_pattern_inner(pattern_type, pattern_context)?; + // Ensure the pattern is a MatchSequence. + let patterns = &p.patterns; // a slice of Pattern + let size = patterns.len(); + let mut star: Option = None; + let mut only_wildcard = true; + let mut star_wildcard = false; + + // Find a starred pattern, if it exists. There may be at most one. + for (i, pattern) in patterns.iter().enumerate() { + if pattern.is_match_star() { + if star.is_some() { + // TODO: Fix error msg + return Err(self.error(CodegenErrorType::MultipleStarArgs)); + } + // star wildcard check + star_wildcard = pattern + .as_match_star() + .map(|m| m.name.is_none()) + .unwrap_or(false); + only_wildcard &= star_wildcard; + star = Some(i); + continue; + } + // wildcard check + only_wildcard &= pattern + .as_match_as() + .map(|m| m.name.is_none()) + .unwrap_or(false); + } + + // Keep the subject on top during the sequence and length checks. + pc.on_top += 1; + emit!(self, Instruction::MatchSequence); + self.jump_to_fail_pop(pc, JumpOp::PopJumpIfFalse)?; + + if star.is_none() { + // No star: len(subject) == size + emit!(self, Instruction::GetLen); + self.emit_load_const(ConstantData::Integer { value: size.into() }); + emit!( + self, + Instruction::CompareOperation { + op: ComparisonOperator::Equal + } + ); + self.jump_to_fail_pop(pc, JumpOp::PopJumpIfFalse)?; + } else if size > 1 { + // Star exists: len(subject) >= size - 1 + emit!(self, Instruction::GetLen); + self.emit_load_const(ConstantData::Integer { + value: (size - 1).into(), + }); + emit!( + self, + Instruction::CompareOperation { + op: ComparisonOperator::GreaterOrEqual + } + ); + self.jump_to_fail_pop(pc, JumpOp::PopJumpIfFalse)?; + } + + // Whatever comes next should consume the subject. + pc.on_top -= 1; + if only_wildcard { + // Patterns like: [] / [_] / [_, _] / [*_] / [_, *_] / [_, _, *_] / etc. + emit!(self, Instruction::Pop); + } else if star_wildcard { + self.pattern_helper_sequence_subscr(patterns, star.unwrap(), pc)?; + } else { + self.pattern_helper_sequence_unpack(patterns, star, pc)?; + } + Ok(()) + } + + fn compile_pattern_value( + &mut self, + p: &PatternMatchValue, + pc: &mut PatternContext, + ) -> CompileResult<()> { + // TODO: ensure literal or attribute lookup + self.compile_expression(&p.value)?; emit!( self, - Instruction::JumpIfFalse { - target: pattern_context.blocks[pattern_context.current_block + 1] + Instruction::CompareOperation { + op: bytecode::ComparisonOperator::Equal } ); + // emit!(self, Instruction::ToBool); + self.jump_to_fail_pop(pc, JumpOp::PopJumpIfFalse)?; Ok(()) } + fn compile_pattern_singleton( + &mut self, + p: &PatternMatchSingleton, + pc: &mut PatternContext, + ) -> CompileResult<()> { + // Load the singleton constant value. + self.emit_load_const(match p.value { + Singleton::None => ConstantData::None, + Singleton::False => ConstantData::Boolean { value: false }, + Singleton::True => ConstantData::Boolean { value: true }, + }); + // Compare using the "Is" operator. + emit!( + self, + Instruction::CompareOperation { + op: bytecode::ComparisonOperator::Equal + } + ); + // Jump to the failure label if the comparison is false. + self.jump_to_fail_pop(pc, JumpOp::PopJumpIfFalse)?; + Ok(()) + } + + fn compile_pattern( + &mut self, + pattern_type: &Pattern, + pattern_context: &mut PatternContext, + ) -> CompileResult<()> { + match &pattern_type { + Pattern::MatchValue(pattern_type) => { + self.compile_pattern_value(pattern_type, pattern_context) + } + Pattern::MatchSingleton(pattern_type) => { + self.compile_pattern_singleton(pattern_type, pattern_context) + } + Pattern::MatchSequence(pattern_type) => { + self.compile_pattern_sequence(pattern_type, pattern_context) + } + // Pattern::MatchMapping(pattern_type) => self.compile_pattern_mapping(pattern_type, pattern_context), + Pattern::MatchClass(pattern_type) => { + self.compile_pattern_class(pattern_type, pattern_context) + } + Pattern::MatchStar(pattern_type) => { + self.compile_pattern_star(pattern_type, pattern_context) + } + Pattern::MatchAs(pattern_type) => { + self.compile_pattern_as(pattern_type, pattern_context) + } + Pattern::MatchOr(pattern_type) => { + self.compile_pattern_or(pattern_type, pattern_context) + } + _ => { + // The eprintln gives context as to which pattern type is not implemented. + eprintln!("not implemented pattern type: {pattern_type:?}"); + Err(self.error(CodegenErrorType::NotImplementedYet)) + } + } + } + fn compile_match_inner( &mut self, subject: &Expr, @@ -1874,63 +2662,67 @@ impl Compiler<'_> { pattern_context: &mut PatternContext, ) -> CompileResult<()> { self.compile_expression(subject)?; - pattern_context.blocks = std::iter::repeat_with(|| self.new_block()) - .take(cases.len() + 1) - .collect::>(); - let end_block = *pattern_context.blocks.last().unwrap(); - - let _match_case_type = cases.last().expect("cases is not empty"); - // TODO: get proper check for default case - // let has_default = match_case_type.pattern.is_match_as() && 1 < cases.len(); - let has_default = false; - for i in 0..cases.len() - (has_default as usize) { - self.switch_to_block(pattern_context.blocks[i]); - pattern_context.current_block = i; - pattern_context.allow_irrefutable = cases[i].guard.is_some() || i == cases.len() - 1; - let m = &cases[i]; - // Only copy the subject if we're *not* on the last case: - if i != cases.len() - has_default as usize - 1 { - emit!(self, Instruction::Duplicate); + let end = self.new_block(); + + let num_cases = cases.len(); + assert!(num_cases > 0); + let has_default = cases.iter().last().unwrap().pattern.is_match_star() && num_cases > 1; + + let case_count = num_cases - if has_default { 1 } else { 0 }; + for (i, m) in cases.iter().enumerate().take(case_count) { + // Only copy the subject if not on the last case + if i != case_count - 1 { + emit!(self, Instruction::CopyItem { index: 1_u32 }); } + + pattern_context.stores = Vec::with_capacity(1); + pattern_context.allow_irrefutable = m.guard.is_some() || i == case_count - 1; + pattern_context.fail_pop.clear(); + pattern_context.on_top = 0; + self.compile_pattern(&m.pattern, pattern_context)?; + assert_eq!(pattern_context.on_top, 0); + + for name in &pattern_context.stores { + self.compile_name(name, NameUsage::Store)?; + } + + if let Some(ref _guard) = m.guard { + self.ensure_fail_pop(pattern_context, 0)?; + // TODO: Fix compile jump if call + return Err(self.error(CodegenErrorType::NotImplementedYet)); + // Jump if the guard fails. We assume that patter_context.fail_pop[0] is the jump target. + // self.compile_jump_if(&m.pattern, &guard, pattern_context.fail_pop[0])?; + } + + if i != case_count - 1 { + emit!(self, Instruction::Pop); + } + self.compile_statements(&m.body)?; - emit!(self, Instruction::Jump { target: end_block }); + emit!(self, Instruction::Jump { target: end }); + self.emit_and_reset_fail_pop(pattern_context)?; } - // TODO: below code is not called and does not work + if has_default { - // A trailing "case _" is common, and lets us save a bit of redundant - // pushing and popping in the loop above: - let m = &cases.last().unwrap(); - self.switch_to_block(*pattern_context.blocks.last().unwrap()); - if cases.len() == 1 { - // No matches. Done with the subject: + let m = &cases[num_cases - 1]; + if num_cases == 1 { emit!(self, Instruction::Pop); } else { - // Show line coverage for default case (it doesn't create bytecode) - // emit!(self, Instruction::Nop); + emit!(self, Instruction::Nop); + } + if let Some(ref _guard) = m.guard { + // TODO: Fix compile jump if call + return Err(self.error(CodegenErrorType::NotImplementedYet)); } self.compile_statements(&m.body)?; } - - self.switch_to_block(end_block); - - let code = self.current_code_info(); - pattern_context - .blocks - .iter() - .zip(pattern_context.blocks.iter().skip(1)) - .for_each(|(a, b)| { - code.blocks[a.0 as usize].next = *b; - }); + self.switch_to_block(end); Ok(()) } fn compile_match(&mut self, subject: &Expr, cases: &[MatchCase]) -> CompileResult<()> { - let mut pattern_context = PatternContext { - current_block: usize::MAX, - blocks: Vec::new(), - allow_irrefutable: false, - }; + let mut pattern_context = PatternContext::new(); self.compile_match_inner(subject, cases, &mut pattern_context)?; Ok(()) } @@ -3637,7 +4429,7 @@ impl ToU32 for usize { } #[cfg(test)] -mod tests { +mod ruff_tests { use super::*; use ruff_python_ast::name::Name; use ruff_python_ast::*; @@ -3740,26 +4532,26 @@ mod tests { } } -/* #[cfg(test)] mod tests { use super::*; - use rustpython_parser::Parse; - use rustpython_parser::ast::Suite; - use rustpython_parser_core::source_code::LinearLocator; fn compile_exec(source: &str) -> CodeObject { - let mut locator: LinearLocator<'_> = LinearLocator::new(source); - use rustpython_parser::ast::fold::Fold; - let mut compiler: Compiler = Compiler::new( - CompileOpts::default(), - "source_path".to_owned(), - "".to_owned(), - ); - let ast = Suite::parse(source, "").unwrap(); - let ast = locator.fold(ast).unwrap(); - let symbol_scope = SymbolTable::scan_program(&ast).unwrap(); - compiler.compile_program(&ast, symbol_scope).unwrap(); + let opts = CompileOpts::default(); + let source_code = SourceCode::new("source_path", source); + let parsed = + ruff_python_parser::parse(source_code.text, ruff_python_parser::Mode::Module.into()) + .unwrap(); + let ast = parsed.into_syntax(); + let ast = match ast { + ruff_python_ast::Mod::Module(stmts) => stmts, + _ => unreachable!(), + }; + let symbol_table = SymbolTable::scan_program(&ast, source_code.clone()) + .map_err(|e| e.into_codegen_error(source_code.path.to_owned())) + .unwrap(); + let mut compiler = Compiler::new(opts, source_code, "".to_owned()); + compiler.compile_program(&ast, symbol_table).unwrap(); compiler.pop_code_object() } @@ -3816,8 +4608,24 @@ for stop_exc in (StopIteration('spam'), StopAsyncIteration('ham')): self.assertIs(ex, stop_exc) else: self.fail(f'{stop_exc} was suppressed') +" + )); + } + + #[test] + fn test_match() { + assert_dis_snapshot!(compile_exec( + "\ +class Test: + pass + +t = Test() +match t: + case Test(): + assert True + case _: + assert False " )); } } -*/ diff --git a/compiler/codegen/src/error.rs b/compiler/codegen/src/error.rs index 8f38680de0..b1b4f9379f 100644 --- a/compiler/codegen/src/error.rs +++ b/compiler/codegen/src/error.rs @@ -1,7 +1,22 @@ use ruff_source_file::SourceLocation; -use std::fmt; +use std::fmt::{self, Display}; use thiserror::Error; +#[derive(Debug)] +pub enum PatternUnreachableReason { + NameCapture, + Wildcard, +} + +impl Display for PatternUnreachableReason { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::NameCapture => write!(f, "name capture"), + Self::Wildcard => write!(f, "wildcard"), + } + } +} + // pub type CodegenError = rustpython_parser_core::source_code::LocatedError; #[derive(Error, Debug)] @@ -47,8 +62,9 @@ pub enum CodegenErrorType { TooManyStarUnpack, EmptyWithItems, EmptyWithBody, + ForbiddenName, DuplicateStore(String), - InvalidMatchCase, + UnreachablePattern(PatternUnreachableReason), NotImplementedYet, // RustPython marker for unimplemented features } @@ -94,11 +110,14 @@ impl fmt::Display for CodegenErrorType { EmptyWithBody => { write!(f, "empty body on With") } + ForbiddenName => { + write!(f, "forbidden attribute name") + } DuplicateStore(s) => { write!(f, "duplicate store {s}") } - InvalidMatchCase => { - write!(f, "invalid match case") + UnreachablePattern(reason) => { + write!(f, "{reason} makes remaining patterns unreachable") } NotImplementedYet => { write!(f, "RustPython does not implement this feature yet") diff --git a/compiler/codegen/src/snapshots/rustpython_codegen__compile__tests__match.snap b/compiler/codegen/src/snapshots/rustpython_codegen__compile__tests__match.snap new file mode 100644 index 0000000000..f09f0f5eaf --- /dev/null +++ b/compiler/codegen/src/snapshots/rustpython_codegen__compile__tests__match.snap @@ -0,0 +1,53 @@ +--- +source: compiler/codegen/src/compile.rs +expression: "compile_exec(\"\\\nclass Test:\n pass\n\nt = Test()\nmatch t:\n case Test():\n assert True\n case _:\n assert False\n\")" +--- + 2 0 LoadBuildClass + 1 LoadConst (): 1 0 LoadGlobal (0, __name__) + 1 StoreLocal (1, __module__) + 2 LoadConst ("Test") + 3 StoreLocal (2, __qualname__) + 4 LoadConst (None) + 5 StoreLocal (3, __doc__) + + 2 6 ReturnConst (None) + + 2 LoadConst ("Test") + 3 MakeFunction (MakeFunctionFlags(0x0)) + 4 LoadConst ("Test") + 5 CallFunctionPositional(2) + 6 StoreLocal (0, Test) + + 4 7 LoadNameAny (0, Test) + 8 CallFunctionPositional(0) + 9 StoreLocal (1, t) + + 5 10 LoadNameAny (1, t) + 11 CopyItem (1) + + 6 12 LoadNameAny (0, Test) + 13 LoadConst (()) + 14 MatchClass (0) + 15 CopyItem (1) + 16 LoadConst (None) + 17 IsOperation (true) + 18 JumpIfFalse (27) + 19 UnpackSequence (0) + 20 Pop + + 7 21 LoadConst (True) + 22 JumpIfTrue (26) + 23 LoadGlobal (2, AssertionError) + 24 CallFunctionPositional(0) + 25 Raise (Raise) + >> 26 Jump (35) + >> 27 Pop + 28 Pop + + 9 29 LoadConst (False) + 30 JumpIfTrue (34) + 31 LoadGlobal (2, AssertionError) + 32 CallFunctionPositional(0) + 33 Raise (Raise) + >> 34 Jump (35) + >> 35 ReturnConst (None) diff --git a/compiler/core/src/bytecode.rs b/compiler/core/src/bytecode.rs index 94d080ace4..81dd591ad1 100644 --- a/compiler/core/src/bytecode.rs +++ b/compiler/core/src/bytecode.rs @@ -381,6 +381,7 @@ pub type NameIdx = u32; #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[repr(u8)] pub enum Instruction { + Nop, /// Importing by name ImportName { idx: Arg, @@ -429,21 +430,33 @@ pub enum Instruction { BinaryOperationInplace { op: Arg, }, + BinarySubscript, LoadAttr { idx: Arg, }, TestOperation { op: Arg, }, + /// If the argument is true, perform IS NOT. Otherwise perform the IS operation. + // TODO: duplication of TestOperator::{Is,IsNot}. Fix later. + IsOperation(Arg), CompareOperation { op: Arg, }, + CopyItem { + index: Arg, + }, Pop, + Swap { + index: Arg, + }, + // ToBool, Rotate2, Rotate3, Duplicate, Duplicate2, GetIter, + GetLen, Continue { target: Arg