From 68511b46654dc488adf6926444530dc24a6b9b02 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sun, 23 Mar 2025 18:09:00 +0900 Subject: [PATCH 1/8] refactor(pypi): implement PEP508 compliant marker evaluation This implements the PEP508 compliant marker evaluation in starlark and removes the need for the Python interpreter when evaluating requirements files passed to `pip.parse`. This makes the evaluation faster and allows us to fix a few known issues (#2690). In the future the intent is to move the `METADATA` parsing to pure starlark so that the `RequiresDist` could be parsed in starlark at the macro evaluation or analysis phases. This should make it possible to more easily solve the design problem that more and more things need to be passed to `whl_library` as args to have a robust dependency parsing: * #2319 needs the full Python version to have correct cross-platform compatible METADATA parsing and passing it to `Python` and back makes it difficult/annoying to implement. * Parsing the `METADATA` file requires the precise list of target platform or the list of available packages in the `requirements.txt`. This means that without it we cannot trim the dependency tree in the `whl_library`. Doing this at macro loading phase allows us to depend on `.bzl` files in the `hub_repository` and more effectively pass information. Fixes #2423 --- python/private/pypi/BUILD.bazel | 35 +- python/private/pypi/evaluate_markers.bzl | 67 +-- python/private/pypi/extension.bzl | 35 +- python/private/pypi/parse_requirements.bzl | 12 +- python/private/pypi/pep508.bzl | 23 + python/private/pypi/pep508_env.bzl | 109 ++++ python/private/pypi/pep508_evaluate.bzl | 483 ++++++++++++++++++ python/private/pypi/pep508_req.bzl | 42 ++ python/private/pypi/pip_repository.bzl | 17 +- .../pypi/requirements_parser/BUILD.bazel | 0 .../resolve_target_platforms.py | 63 --- python/private/semver.bzl | 55 +- tests/pypi/pep508/BUILD.bazel | 5 + tests/pypi/pep508/evaluate_tests.bzl | 241 +++++++++ tests/semver/semver_test.bzl | 18 + 15 files changed, 1027 insertions(+), 178 deletions(-) create mode 100644 python/private/pypi/pep508.bzl create mode 100644 python/private/pypi/pep508_env.bzl create mode 100644 python/private/pypi/pep508_evaluate.bzl create mode 100644 python/private/pypi/pep508_req.bzl delete mode 100644 python/private/pypi/requirements_parser/BUILD.bazel delete mode 100755 python/private/pypi/requirements_parser/resolve_target_platforms.py create mode 100644 tests/pypi/pep508/BUILD.bazel create mode 100644 tests/pypi/pep508/evaluate_tests.bzl diff --git a/python/private/pypi/BUILD.bazel b/python/private/pypi/BUILD.bazel index 6f80272af6..141125e9c4 100644 --- a/python/private/pypi/BUILD.bazel +++ b/python/private/pypi/BUILD.bazel @@ -75,7 +75,9 @@ bzl_library( name = "evaluate_markers_bzl", srcs = ["evaluate_markers.bzl"], deps = [ - ":pypi_repo_utils_bzl", + ":pep508_env_bzl", + ":pep508_evaluate_bzl", + ":pep508_req_bzl", ], ) @@ -208,6 +210,37 @@ bzl_library( ], ) +bzl_library( + name = "pep508_bzl", + srcs = ["pep508.bzl"], + deps = [ + ":pep508_env_bzl", + ":pep508_evaluate_bzl", + ], +) + +bzl_library( + name = "pep508_env_bzl", + srcs = ["pep508_env.bzl"], +) + +bzl_library( + name = "pep508_evaluate_bzl", + srcs = ["pep508_evaluate.bzl"], + deps = [ + "//python/private:enum_bzl", + "//python/private:semver_bzl", + ], +) + +bzl_library( + name = "pep508_req_bzl", + srcs = ["pep508_req.bzl"], + deps = [ + "//python/private:normalize_name_bzl", + ], +) + bzl_library( name = "pip_bzl", srcs = ["pip.bzl"], diff --git a/python/private/pypi/evaluate_markers.bzl b/python/private/pypi/evaluate_markers.bzl index 028657f716..1d4c30753f 100644 --- a/python/private/pypi/evaluate_markers.bzl +++ b/python/private/pypi/evaluate_markers.bzl @@ -14,65 +14,24 @@ """A simple function that evaluates markers using a python interpreter.""" -load(":deps.bzl", "record_files") -load(":pypi_repo_utils.bzl", "pypi_repo_utils") +load(":pep508_env.bzl", "env", _platform_from_str = "platform_from_str") +load(":pep508_evaluate.bzl", "evaluate") +load(":pep508_req.bzl", _req = "requirement") -# Used as a default value in a rule to ensure we fetch the dependencies. -SRCS = [ - # When the version, or any of the files in `packaging` package changes, - # this file will change as well. - record_files["pypi__packaging"], - Label("//python/private/pypi/requirements_parser:resolve_target_platforms.py"), - Label("//python/private/pypi/whl_installer:platform.py"), -] - -def evaluate_markers(mrctx, *, requirements, python_interpreter, python_interpreter_target, srcs, logger = None): +def evaluate_markers(requirements): """Return the list of supported platforms per requirements line. Args: - mrctx: repository_ctx or module_ctx. - requirements: list[str] of the requirement file lines to evaluate. - python_interpreter: str, path to the python_interpreter to use to - evaluate the env markers in the given requirements files. It will - be only called if the requirements files have env markers. This - should be something that is in your PATH or an absolute path. - python_interpreter_target: Label, same as python_interpreter, but in a - label format. - srcs: list[Label], the value of SRCS passed from the `rctx` or `mctx` to this function. - logger: repo_utils.logger or None, a simple struct to log diagnostic - messages. Defaults to None. + requirements: dict[str, list[str]] of the requirement file lines to evaluate. Returns: dict of string lists with target platforms """ - if not requirements: - return {} - - in_file = mrctx.path("requirements_with_markers.in.json") - out_file = mrctx.path("requirements_with_markers.out.json") - mrctx.file(in_file, json.encode(requirements)) - - pypi_repo_utils.execute_checked( - mrctx, - op = "ResolveRequirementEnvMarkers({})".format(in_file), - python = pypi_repo_utils.resolve_python_interpreter( - mrctx, - python_interpreter = python_interpreter, - python_interpreter_target = python_interpreter_target, - ), - arguments = [ - "-m", - "python.private.pypi.requirements_parser.resolve_target_platforms", - in_file, - out_file, - ], - srcs = srcs, - environment = { - "PYTHONPATH": [ - Label("@pypi__packaging//:BUILD.bazel"), - Label("//:BUILD.bazel"), - ], - }, - logger = logger, - ) - return json.decode(mrctx.read(out_file)) + ret = {} + for req_string, platforms in requirements.items(): + req = _req(req_string) + for platform in platforms: + if evaluate(req.marker, env = env(_platform_from_str(platform, None))): + ret.setdefault(req_string, []).append(platform) + + return ret diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl index be00bf8ab3..8353d2359c 100644 --- a/python/private/pypi/extension.bzl +++ b/python/private/pypi/extension.bzl @@ -22,7 +22,7 @@ load("//python/private:repo_utils.bzl", "repo_utils") load("//python/private:semver.bzl", "semver") load("//python/private:version_label.bzl", "version_label") load(":attrs.bzl", "use_isolated") -load(":evaluate_markers.bzl", "evaluate_markers", EVALUATE_MARKERS_SRCS = "SRCS") +load(":evaluate_markers.bzl", "evaluate_markers") load(":hub_repository.bzl", "hub_repository", "whl_config_settings_to_json") load(":parse_requirements.bzl", "parse_requirements") load(":parse_whl_name.bzl", "parse_whl_name") @@ -166,28 +166,10 @@ def _create_whl_repos( ), extra_pip_args = pip_attr.extra_pip_args, get_index_urls = get_index_urls, - # NOTE @aignas 2024-08-02: , we will execute any interpreter that we find either - # in the PATH or if specified as a label. We will configure the env - # markers when evaluating the requirement lines based on the output - # from the `requirements_files_by_platform` which should have something - # similar to: - # { - # "//:requirements.txt": ["cp311_linux_x86_64", ...] - # } - # - # We know the target python versions that we need to evaluate the - # markers for and thus we don't need to use multiple python interpreter - # instances to perform this manipulation. This function should be executed - # only once by the underlying code to minimize the overhead needed to - # spin up a Python interpreter. - evaluate_markers = lambda module_ctx, requirements: evaluate_markers( - module_ctx, - requirements = requirements, - python_interpreter = pip_attr.python_interpreter, - python_interpreter_target = python_interpreter_target, - srcs = pip_attr._evaluate_markers_srcs, - logger = logger, - ), + # NOTE @aignas 2025-02-24: we will use the "cp3xx_os_arch" platform labels + # for converting to the PEP508 environment and will evaluate them in starlark + # without involving the interpreter at all. + evaluate_markers = evaluate_markers, logger = logger, ) @@ -764,13 +746,6 @@ a corresponding `python.toolchain()` configured. doc = """\ A dict of labels to wheel names that is typically generated by the whl_modifications. The labels are JSON config files describing the modifications. -""", - ), - "_evaluate_markers_srcs": attr.label_list( - default = EVALUATE_MARKERS_SRCS, - doc = """\ -The list of labels to use as SRCS for the marker evaluation code. This ensures that the -code will be re-evaluated when any of files in the default changes. """, ), }, **ATTRS) diff --git a/python/private/pypi/parse_requirements.bzl b/python/private/pypi/parse_requirements.bzl index dbff44ecb3..7aadc15eac 100644 --- a/python/private/pypi/parse_requirements.bzl +++ b/python/private/pypi/parse_requirements.bzl @@ -67,10 +67,10 @@ def parse_requirements( of the distribution URLs from a PyPI index. Accepts ctx and distribution names to query. evaluate_markers: A function to use to evaluate the requirements. - Accepts the ctx and a dict where keys are requirement lines to - evaluate against the platforms stored as values in the input dict. - Returns the same dict, but with values being platforms that are - compatible with the requirements line. + Accepts a dict where keys are requirement lines to evaluate against + the platforms stored as values in the input dict. Returns the same + dict, but with values being platforms that are compatible with the + requirements line. logger: repo_utils.logger or None, a simple struct to log diagnostic messages. Returns: @@ -93,7 +93,7 @@ def parse_requirements( The second element is extra_pip_args should be passed to `whl_library`. """ - evaluate_markers = evaluate_markers or (lambda *_: {}) + evaluate_markers = evaluate_markers or (lambda _: {}) options = {} requirements = {} for file, plats in requirements_by_platform.items(): @@ -168,7 +168,7 @@ def parse_requirements( # to do, we could use Python to parse the requirement lines and infer the # URL of the files to download things from. This should be important for # VCS package references. - env_marker_target_platforms = evaluate_markers(ctx, reqs_with_env_markers) + env_marker_target_platforms = evaluate_markers(reqs_with_env_markers) if logger: logger.debug(lambda: "Evaluated env markers from:\n{}\n\nTo:\n{}".format( reqs_with_env_markers, diff --git a/python/private/pypi/pep508.bzl b/python/private/pypi/pep508.bzl new file mode 100644 index 0000000000..e74352def2 --- /dev/null +++ b/python/private/pypi/pep508.bzl @@ -0,0 +1,23 @@ +# Copyright 2025 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module is for implementing PEP508 in starlark as FeatureFlagInfo +""" + +load(":pep508_env.bzl", _env = "env") +load(":pep508_evaluate.bzl", _evaluate = "evaluate", _to_string = "to_string") + +to_string = _to_string +evaluate = _evaluate +env = _env diff --git a/python/private/pypi/pep508_env.bzl b/python/private/pypi/pep508_env.bzl new file mode 100644 index 0000000000..f91a4a2582 --- /dev/null +++ b/python/private/pypi/pep508_env.bzl @@ -0,0 +1,109 @@ +# Copyright 2025 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module is for implementing PEP508 environment definition. +""" + +_platform_machine_values = { + "aarch64": "arm64", + "ppc": "ppc64le", + "s390x": "s390x", + "x86_32": "i386", + "x86_64": "x86_64", +} +_platform_system_values = { + "linux": "Linux", + "osx": "Darwin", + "windows": "Windows", +} +_sys_platform_values = { + "linux": "posix", + "osx": "darwin", + "windows": "win32", +} +_os_name_values = { + "linux": "posix", + "osx": "posix", + "windows": "nt", +} + +def env(target_platform, *, extra = None): + """Return an env target platform + + Args: + target_platform: {type}`str` the target platform identifier, e.g. + `cp33_linux_aarch64` + extra: {type}`str` the extra value to be added into the env. + + Returns: + A dict that can be used as `env` in the marker evaluation. + """ + + # TODO @aignas 2025-02-13: consider moving this into config settings. + + env = {"extra": extra} if extra != None else {} + env = env | { + "implementation_name": "cpython", + "platform_python_implementation": "CPython", + "platform_release": "", + "platform_version": "", + } + if target_platform.abi: + minor_version, _, micro_version = target_platform.abi[3:].partition(".") + micro_version = micro_version or "0" + env = env | { + "implementation_version": "3.{}.{}".format(minor_version, micro_version), + "python_full_version": "3.{}.{}".format(minor_version, micro_version), + "python_version": "3.{}".format(minor_version), + } + if target_platform.os and target_platform.arch: + os = target_platform.os + arch = target_platform.arch + env = env | { + "os_name": _os_name_values.get(os, ""), + "platform_machine": "aarch64" if (os, arch) == ("linux", "aarch64") else _platform_machine_values.get(arch, ""), + "platform_system": _platform_system_values.get(os, ""), + "sys_platform": _sys_platform_values.get(os, ""), + } + + # This is split by topic + return env + +def _platform(*, abi = None, os = None, arch = None): + return struct( + abi = abi, + os = os, + arch = arch, + ) + +def platform_from_str(p, python_version): + """Return a platform from a string. + + Args: + p: {type}`str` the actual string. + python_version: {type}`str` the python version to add to platform if needed. + + Returns: + A struct that is returned by the `_platform` function. + """ + if p.startswith("cp"): + abi, _, p = p.partition("_") + elif python_version: + major, _, tail = python_version.partition(".") + abi = "cp{}{}".format(major, tail) + else: + abi = None + + os, _, arch = p.partition("_") + return _platform(abi = abi, os = os or None, arch = arch or None) diff --git a/python/private/pypi/pep508_evaluate.bzl b/python/private/pypi/pep508_evaluate.bzl new file mode 100644 index 0000000000..a5082df95e --- /dev/null +++ b/python/private/pypi/pep508_evaluate.bzl @@ -0,0 +1,483 @@ +# Copyright 2025 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module is for implementing PEP508 in starlark as FeatureFlagInfo +""" + +load("//python/private:enum.bzl", "enum") +load("//python/private:semver.bzl", "semver") + +# The expression parsing and resolution for the PEP508 is below +# + +# Taken from +# https://peps.python.org/pep-0508/#grammar +# +# version_cmp = wsp* '<' | '<=' | '!=' | '==' | '>=' | '>' | '~=' | '===' +_VERSION_CMP = sorted( + [ + i.strip(" '") + for i in "'<' | '<=' | '!=' | '==' | '>=' | '>' | '~=' | '==='".split(" | ") + ], + key = lambda x: (-len(x), x), +) + +_STATE = enum( + STRING = "string", + VAR = "var", + OP = "op", + NONE = "none", +) +_BRACKETS = "()" +_OPCHARS = "<>!=~" +_QUOTES = "'\"" +_WSP = " \t" +_NON_VERSION_VAR_NAMES = [ + "implementation_name", + "os_name", + "platform_machine", + "platform_python_implementation", + "platform_release", + "platform_system", + "sys_platform", + "extra", +] +_AND = "and" +_OR = "or" +_NOT = "not" + +def tokenize(marker): + """Tokenize the input string. + + The output will have double-quoted values (i.e. the quoting will be normalized) and all of the whitespace will be trimmed. + + Args: + marker: {type}`str` The input to tokenize. + + Returns: + The {type}`str` that is the list of recognized tokens that should be parsed. + """ + if not marker: + return [] + + tokens = [] + token = "" + state = _STATE.NONE + char = "" + + # Due to the `continue` in the loop, we will be processing chars at a slower pace + for _ in range(2 * len(marker)): + if token and (state == _STATE.NONE or not marker): + if tokens and token == "in" and tokens[-1] == _NOT: + tokens[-1] += " " + token + else: + tokens.append(token) + token = "" + + if not marker: + return tokens + + char = marker[0] + if char in _BRACKETS: + state = _STATE.NONE + token = char + elif state == _STATE.STRING and char in _QUOTES: + state = _STATE.NONE + token = '"{}"'.format(token) + elif ( + (state == _STATE.VAR and not char.isalnum() and char != "_") or + (state == _STATE.OP and char not in _OPCHARS) + ): + state = _STATE.NONE + continue # Skip consuming the char below + elif state == _STATE.NONE: + # Transition from _STATE.NONE to something or stay in NONE + if char in _QUOTES: + state = _STATE.STRING + elif char.isalnum(): + state = _STATE.VAR + token += char + elif char in _OPCHARS: + state = _STATE.OP + token += char + elif char in _WSP: + state = _STATE.NONE + else: + fail("BUG: Cannot parse '{}' in {} ({})".format(char, state, marker)) + else: + token += char + + # Consume the char + marker = marker[1:] + + return fail("BUG: failed to process the marker in allocated cycles: {}".format(marker)) + +def evaluate(marker, *, env, strict = True, **kwargs): + """Evaluate the marker against a given env. + + Args: + marker: {type}`str`: The string marker to evaluate. + env: {type}`dict`: The environment to evaluate the marker against. + strict: {type}`bool`: A setting to not fail on missing values in the env. + **kwargs: Extra kwargs to be passed to the expression evaluator. + + Returns: + The {type}`bool` If the marker is compatible with the given env. + """ + tokens = tokenize(marker) + + ast = _new_expr(**kwargs) + for _ in range(len(tokens) * 2): + if not tokens: + break + + tokens = ast.parse(env = env, tokens = tokens, strict = strict) + + if not tokens: + return ast.value() + + fail("Could not evaluate: {}".format(marker)) + +_STRING_REPLACEMENTS = { + "!=": "neq", + "(": "_", + ")": "_", + "<": "lt", + "<=": "lteq", + "==": "eq", + "===": "eeq", + ">": "gt", + ">=": "gteq", + "not in": "not_in", + "~==": "cmp", +} + +def to_string(marker): + return "_".join([ + _STRING_REPLACEMENTS.get(t, t) + for t in tokenize(marker) + ]).replace("\"", "") + +def _and_fn(x, y): + """Our custom `and` evaluation function. + + Allow partial evaluation if one of the values is a string, return the + string value because that means that `marker_expr` was set to + `strict = False` and we are only evaluating what we can. + """ + if not (x and y): + return False + + x_is_str = type(x) == type("") + y_is_str = type(y) == type("") + if x_is_str and y_is_str: + return "{} and {}".format(x, y) + elif x_is_str: + return x + else: + return y + +def _or_fn(x, y): + """Our custom `or` evaluation function. + + Allow partial evaluation if one of the values is a string, return the + string value because that means that `marker_expr` was set to + `strict = False` and we are only evaluating what we can. + """ + x_is_str = type(x) == type("") + y_is_str = type(y) == type("") + + if x_is_str and y_is_str: + return "{} or {}".format(x, y) if x and y else "" + elif x_is_str: + return "" if y else x + elif y_is_str: + return "" if x else y + else: + return x or y + +def _not_fn(x): + """Our custom `not` evaluation function. + + Allow partial evaluation if the value is a string. + """ + if type(x) == type(""): + return "not {}".format(x) + else: + return not x + +def _new_expr( + and_fn = _and_fn, + or_fn = _or_fn, + not_fn = _not_fn): + # buildifier: disable=uninitialized + self = struct( + tree = [], + parse = lambda **kwargs: _parse(self, **kwargs), + value = lambda: _value(self), + # This is a way for us to have a handle to the currently constructed + # expression tree branch. + current = lambda: self._current[0] if self._current else None, + _current = [], + _and = and_fn, + _or = or_fn, + _not = not_fn, + ) + return self + +def _parse(self, *, env, tokens, strict = False): + """The parse function takes the consumed tokens and returns the remaining.""" + token, remaining = tokens[0], tokens[1:] + + if token == "(": + expr = _open_parenthesis(self) + elif token == ")": + expr = _close_parenthesis(self) + elif token == _AND: + expr = _and_expr(self) + elif token == _OR: + expr = _or_expr(self) + elif token == _NOT: + expr = _not_expr(self) + else: + expr = marker_expr(env = env, strict = strict, *tokens[:3]) + remaining = tokens[3:] + + _append(self, expr) + return remaining + +def _value(self): + """Evaluate the expression tree""" + if not self.tree: + # Basic case where no marker should evaluate to True + return True + + for _ in range(len(self.tree)): + if len(self.tree) == 1: + return self.tree[0] + + # Resolve all of the `or` expressions as it is safe to do now since all + # `and` and `not` expressions have been taken care of by now. + if getattr(self.tree[-2], "op", None) == _OR: + current = self.tree.pop() + self.tree[-1] = self.tree[-1].value(current) + else: + break + + fail("BUG: invalid state: {}".format(self.tree)) + +def marker_expr(left, op, right, *, env, strict = True): + """Evaluate a marker expression + + Args: + left: {type}`str` the env identifier or a value quoted in `"`. + op: {type}`str` the operation to carry out. + right: {type}`str` the env identifier or a value quoted in `"`. + strict: {type}`bool` if false, only evaluates the values that are present + in the environment, otherwise returns the original expression. + env: {type}`dict[str, str]` the `env` to substitute `env` identifiers in + the ` ` expression. + + Returns: + {type}`bool` if the expression evaluation result or {type}`str` if the expression + could not be evaluated. + """ + var_name = None + if right not in env and left not in env and not strict: + return "{} {} {}".format(left, op, right) + if left[0] == '"': + var_name = right + right = env[right] + left = left.strip("\"") + else: + var_name = left + left = env[left] + right = right.strip("\"") + + if var_name in _NON_VERSION_VAR_NAMES: + return _env_expr(left, op, right) + elif var_name.endswith("_version"): + return _version_expr(left, op, right) + else: + # Do not fail here, just evaluate the expression to False. + return False + +def _env_expr(left, op, right): + """Evaluate a string comparison expression""" + if op == "==": + return left == right + elif op == "!=": + return left != right + elif op == "in": + return left in right + elif op == "not in": + return left not in right + else: + return fail("TODO: op unsupported: '{}'".format(op)) + +def _version_expr(left, op, right): + """Evaluate a version comparison expression""" + left = semver(left) + right = semver(right) + _left = left.key() + _right = right.key() + if op == "<": + return _left < _right + elif op == ">": + return _left > _right + elif op == "<=": + return _left <= _right + elif op == ">=": + return _left >= _right + elif op == "!=": + return _left != _right + elif op == "==": + # Matching of major, minor, patch only + return _left[:3] == _right[:3] + elif op == "~=": + right_plus = right.upper() + _right_plus = right_plus.key() + return _left >= _right and _left < _right_plus + elif op == "===": + # Strict matching + return _left == _right + elif op in _VERSION_CMP: + fail("TODO: op unsupported: '{}'".format(op)) + else: + return False # Let's just ignore the invalid ops + +# Code to allowing to combine expressions with logical operators + +def _append(self, value): + if value == None: + return + + current = self.current() or self + op = getattr(value, "op", None) + + if op == _NOT: + current.tree.append(value) + elif op in [_AND, _OR]: + value.append(current.tree[-1]) + current.tree[-1] = value + elif not current.tree: + current.tree.append(value) + elif hasattr(current.tree[-1], "append"): + current.tree[-1].append(value) + else: + current.tree._append(value) + +def _open_parenthesis(self): + """Add an extra node into the tree to perform evaluate inside parenthesis.""" + self._current.append(_new_expr( + and_fn = self._and, + or_fn = self._or, + not_fn = self._not, + )) + +def _close_parenthesis(self): + """Backtrack and evaluate the expression within parenthesis.""" + value = self._current.pop().value() + if type(value) == type(""): + return "({})".format(value) + else: + return value + +def _not_expr(self): + """Add an extra node into the tree to perform an 'not' operation.""" + + def _append(value): + """Append a value to the not expression node. + + This codifies `not` precedence over `and` and performs backtracking to + evaluate any `not` statements and forward the value to the first `and` + statement if needed. + """ + + current = self.current() or self + current.tree[-1] = self._not(value) + + for _ in range(len(current.tree)): + if not len(current.tree) > 1: + break + + op = getattr(current.tree[-2], "op", None) + if op == None: + pass + elif op == _NOT: + value = current.tree.pop() + current.tree[-1] = self._not(value) + continue + elif op == _AND: + value = current.tree.pop() + current.tree[-1].append(value) + elif op != _OR: + fail("BUG: '{} not' compound is unsupported".format(current.tree[-1])) + + break + + return struct( + op = _NOT, + append = _append, + ) + +def _and_expr(self): + """Add an extra node into the tree to perform an 'and' operation""" + maybe_value = [None] + + def _append(value): + """Append a value to the and expression node. + + Here we backtrack, but we only evaluate the current `and` statement - + all of the `not` statements will be by now evaluated and `or` + statements need to be evaluated later. + """ + if maybe_value[0] == None: + maybe_value[0] = value + return + + current = self.current() or self + current.tree[-1] = self._and(maybe_value[0], value) + + return struct( + op = _AND, + append = _append, + # private fields that help debugging + _maybe_value = maybe_value, + ) + +def _or_expr(self): + """Add an extra node into the tree to perform an 'or' operation""" + maybe_value = [None] + + def _append(value): + """Append a value to the or expression node. + + Here we just append the extra values to the tree and the `or` + statements will be evaluated in the _value() function. + """ + if maybe_value[0] == None: + maybe_value[0] = value + return + + current = self.current() or self + current.tree.append(value) + + return struct( + op = _OR, + value = lambda x: self._or(maybe_value[0], x), + append = _append, + # private fields that help debugging + _maybe_value = maybe_value, + ) diff --git a/python/private/pypi/pep508_req.bzl b/python/private/pypi/pep508_req.bzl new file mode 100644 index 0000000000..618ffaf17a --- /dev/null +++ b/python/private/pypi/pep508_req.bzl @@ -0,0 +1,42 @@ +# Copyright 2025 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This module is for parsing PEP508 requires-dist and requirements lines. +""" + +load("//python/private:normalize_name.bzl", "normalize_name") + +_STRIP = ["(", " ", ">", "=", "<", "~", "!"] + +def requirement(spec): + """Parse a PEP508 requirement line + + Args: + spec: {type}`str` requirement line that will be parsed. + + Returns: + A struct with the information. + """ + requires, _, maybe_hashes = spec.partition(";") + marker, _, _ = maybe_hashes.partition("--hash") + requires, _, extras_unparsed = requires.partition("[") + for char in _STRIP: + requires, _, _ = requires.partition(char) + extras = extras_unparsed.strip("]").split(",") + + return struct( + name = normalize_name(requires.strip(" ")), + marker = marker.strip(" "), + extras = extras, + ) diff --git a/python/private/pypi/pip_repository.bzl b/python/private/pypi/pip_repository.bzl index 7976cfaae9..01a541cf2f 100644 --- a/python/private/pypi/pip_repository.bzl +++ b/python/private/pypi/pip_repository.bzl @@ -18,7 +18,7 @@ load("@bazel_skylib//lib:sets.bzl", "sets") load("//python/private:normalize_name.bzl", "normalize_name") load("//python/private:repo_utils.bzl", "REPO_DEBUG_ENV_VAR") load("//python/private:text_util.bzl", "render") -load(":evaluate_markers.bzl", "evaluate_markers", EVALUATE_MARKERS_SRCS = "SRCS") +load(":evaluate_markers.bzl", "evaluate_markers") load(":parse_requirements.bzl", "host_platform", "parse_requirements", "select_requirement") load(":pip_repository_attrs.bzl", "ATTRS") load(":render_pkg_aliases.bzl", "render_pkg_aliases") @@ -82,13 +82,7 @@ def _pip_repository_impl(rctx): extra_pip_args = rctx.attr.extra_pip_args, ), extra_pip_args = rctx.attr.extra_pip_args, - evaluate_markers = lambda rctx, requirements: evaluate_markers( - rctx, - requirements = requirements, - python_interpreter = rctx.attr.python_interpreter, - python_interpreter_target = rctx.attr.python_interpreter_target, - srcs = rctx.attr._evaluate_markers_srcs, - ), + evaluate_markers = evaluate_markers, ) selected_requirements = {} options = None @@ -234,13 +228,6 @@ file](https://github.com/bazel-contrib/rules_python/blob/main/examples/pip_repos _template = attr.label( default = ":requirements.bzl.tmpl.workspace", ), - _evaluate_markers_srcs = attr.label_list( - default = EVALUATE_MARKERS_SRCS, - doc = """\ -The list of labels to use as SRCS for the marker evaluation code. This ensures that the -code will be re-evaluated when any of files in the default changes. -""", - ), **ATTRS ), doc = """Accepts a locked/compiled requirements file and installs the dependencies listed within. diff --git a/python/private/pypi/requirements_parser/BUILD.bazel b/python/private/pypi/requirements_parser/BUILD.bazel deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/python/private/pypi/requirements_parser/resolve_target_platforms.py b/python/private/pypi/requirements_parser/resolve_target_platforms.py deleted file mode 100755 index c899a943cc..0000000000 --- a/python/private/pypi/requirements_parser/resolve_target_platforms.py +++ /dev/null @@ -1,63 +0,0 @@ -"""A CLI to evaluate env markers for requirements files. - -A simple script to evaluate the `requirements.txt` files. Currently it is only -handling environment markers in the requirements files, but in the future it -may handle more things. We require a `python` interpreter that can run on the -host platform and then we depend on the [packaging] PyPI wheel. - -In order to be able to resolve requirements files for any platform, we are -re-using the same code that is used in the `whl_library` installer. See -[here](../whl_installer/wheel.py). - -Requirements for the code are: -- Depends only on `packaging` and core Python. -- Produces the same result irrespective of the Python interpreter platform or version. - -[packaging]: https://packaging.pypa.io/en/stable/ -""" - -import argparse -import json -import pathlib - -from packaging.requirements import Requirement - -from python.private.pypi.whl_installer.platform import Platform - -INPUT_HELP = """\ -Input path to read the requirements as a json file, the keys in the dictionary -are the requirements lines and the values are strings of target platforms. -""" -OUTPUT_HELP = """\ -Output to write the requirements as a json filepath, the keys in the dictionary -are the requirements lines and the values are strings of target platforms, which -got changed based on the evaluated markers. -""" - - -def main(): - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("input_path", type=pathlib.Path, help=INPUT_HELP.strip()) - parser.add_argument("output_path", type=pathlib.Path, help=OUTPUT_HELP.strip()) - args = parser.parse_args() - - with args.input_path.open() as f: - reqs = json.load(f) - - response = {} - for requirement_line, target_platforms in reqs.items(): - entry, prefix, hashes = requirement_line.partition("--hash") - hashes = prefix + hashes - - req = Requirement(entry) - for p in target_platforms: - (platform,) = Platform.from_string(p) - if not req.marker or req.marker.evaluate(platform.env_markers("")): - response.setdefault(requirement_line, []).append(p) - - with args.output_path.open("w") as f: - json.dump(response, f) - - -if __name__ == "__main__": - main() diff --git a/python/private/semver.bzl b/python/private/semver.bzl index 73d6b130ae..cc9ae6ecb6 100644 --- a/python/private/semver.bzl +++ b/python/private/semver.bzl @@ -43,6 +43,49 @@ def _to_dict(self): "pre_release": self.pre_release, } +def _upper(self): + major = self.major + minor = self.minor + patch = self.patch + build = "" + pre_release = "" + version = self.str() + + if patch != None: + minor = minor + 1 + patch = 0 + elif minor != None: + major = major + 1 + minor = 0 + elif minor == None: + major = major + 1 + + return _new( + major = major, + minor = minor, + patch = patch, + build = build, + pre_release = pre_release, + version = "~" + version, + ) + +def _new(*, major, minor, patch, pre_release, build, version = None): + # buildifier: disable=uninitialized + self = struct( + major = int(major), + minor = None if minor == None else int(minor), + # NOTE: this is called `micro` in the Python interpreter versioning scheme + patch = None if patch == None else int(patch), + pre_release = pre_release, + build = build, + # buildifier: disable=uninitialized + key = lambda: _key(self), + str = lambda: version, + to_dict = lambda: _to_dict(self), + upper = lambda: _upper(self), + ) + return self + def semver(version): """Parse the semver version and return the values as a struct. @@ -59,17 +102,11 @@ def semver(version): patch, _, build = tail.partition("+") patch, _, pre_release = patch.partition("-") - # buildifier: disable=uninitialized - self = struct( + return _new( major = int(major), minor = int(minor) if minor.isdigit() else None, - # NOTE: this is called `micro` in the Python interpreter versioning scheme patch = int(patch) if patch.isdigit() else None, - pre_release = pre_release, build = build, - # buildifier: disable=uninitialized - key = lambda: _key(self), - str = lambda: version, - to_dict = lambda: _to_dict(self), + pre_release = pre_release, + version = version, ) - return self diff --git a/tests/pypi/pep508/BUILD.bazel b/tests/pypi/pep508/BUILD.bazel new file mode 100644 index 0000000000..b795db0591 --- /dev/null +++ b/tests/pypi/pep508/BUILD.bazel @@ -0,0 +1,5 @@ +load(":evaluate_tests.bzl", "evaluate_test_suite") + +evaluate_test_suite( + name = "evaluate_tests", +) diff --git a/tests/pypi/pep508/evaluate_tests.bzl b/tests/pypi/pep508/evaluate_tests.bzl new file mode 100644 index 0000000000..18337c40db --- /dev/null +++ b/tests/pypi/pep508/evaluate_tests.bzl @@ -0,0 +1,241 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for construction of Python version matching config settings.""" + +load("@rules_testing//lib:test_suite.bzl", "test_suite") +load("//python/private/pypi:pep508_evaluate.bzl", "evaluate", "tokenize") # buildifier: disable=bzl-visibility + +_tests = [] + +def _tokenize_tests(env): + for input, want in { + "": [], + "'osx' == os_name": ['"osx"', "==", "os_name"], + "'x' not in os_name": ['"x"', "not in", "os_name"], + "()": ["(", ")"], + "(os_name == 'osx' and not os_name == 'posix') or os_name == \"win\"": [ + "(", + "os_name", + "==", + '"osx"', + "and", + "not", + "os_name", + "==", + '"posix"', + ")", + "or", + "os_name", + "==", + '"win"', + ], + "os_name\t==\t'osx'": ["os_name", "==", '"osx"'], + "os_name == 'osx'": ["os_name", "==", '"osx"'], + "python_version <= \"1.0\"": ["python_version", "<=", '"1.0"'], + "python_version>='1.0.0'": ["python_version", ">=", '"1.0.0"'], + "python_version~='1.0.0'": ["python_version", "~=", '"1.0.0"'], + }.items(): + got = tokenize(input) + env.expect.that_collection(got).contains_exactly(want).in_order() + +_tests.append(_tokenize_tests) + +def _evaluate_non_version_env_tests(env): + for var_name in [ + "implementation_name", + "os_name", + "platform_machine", + "platform_python_implementation", + "platform_release", + "platform_system", + "sys_platform", + "extra", + ]: + # Given + marker_env = {var_name: "osx"} + + # When + for input, want in { + "{} == 'osx'".format(var_name): True, + "{} != 'osx'".format(var_name): False, + "'osx' == {}".format(var_name): True, + "'osx' != {}".format(var_name): False, + "'x' in {}".format(var_name): True, + "'w' not in {}".format(var_name): True, + }.items(): # buildifier: @unsorted-dict-items + got = evaluate( + input, + env = marker_env, + ) + env.expect.that_bool(got).equals(want) + + # Check that the non-strict eval gives us back the input when no + # env is supplied. + got = evaluate( + input, + env = {}, + strict = False, + ) + env.expect.that_bool(got).equals(input.replace("'", '"')) + +_tests.append(_evaluate_non_version_env_tests) + +def _evaluate_version_env_tests(env): + for var_name in [ + "python_version", + "implementation_version", + "platform_version", + "python_full_version", + ]: + # Given + marker_env = {var_name: "3.7.9"} + + # When + for input, want in { + "{} < '3.8'".format(var_name): True, + "{} > '3.7'".format(var_name): True, + "{} >= '3.7.9'".format(var_name): True, + "{} >= '3.7.10'".format(var_name): False, + "{} >= '3.7.8'".format(var_name): True, + "{} <= '3.7.9'".format(var_name): True, + "{} <= '3.7.10'".format(var_name): True, + "{} <= '3.7.8'".format(var_name): False, + "{} == '3.7.9'".format(var_name): True, + "{} != '3.7.9'".format(var_name): False, + "{} ~= '3.7.1'".format(var_name): True, + "{} ~= '3.7.10'".format(var_name): False, + "{} ~= '3.8.0'".format(var_name): False, + "{} === '3.7.9+rc2'".format(var_name): False, + "{} === '3.7.9'".format(var_name): True, + "{} == '3.7.9+rc2'".format(var_name): True, + }.items(): # buildifier: @unsorted-dict-items + got = evaluate( + input, + env = marker_env, + ) + env.expect.that_collection((input, got)).contains_exactly((input, want)) + + # Check that the non-strict eval gives us back the input when no + # env is supplied. + got = evaluate( + input, + env = {}, + strict = False, + ) + env.expect.that_bool(got).equals(input.replace("'", '"')) + +_tests.append(_evaluate_version_env_tests) + +def _logical_expression_tests(env): + for input, want in { + # Basic + "": True, + "(())": True, + "()": True, + + # expr + "os_name == 'fo'": False, + "(os_name == 'fo')": False, + "not (os_name == 'fo')": True, + + # and + "os_name == 'fo' and os_name == 'foo'": False, + + # and not + "os_name == 'fo' and not os_name == 'foo'": False, + + # or + "os_name == 'oo' or os_name == 'foo'": True, + + # or not + "os_name == 'foo' or not os_name == 'foo'": True, + + # multiple or + "os_name == 'oo' or os_name == 'fo' or os_name == 'foo'": True, + "os_name == 'oo' or os_name == 'foo' or os_name == 'fo'": True, + + # multiple and + "os_name == 'foo' and os_name == 'foo' and os_name == 'fo'": False, + + # x or not y and z != (x or not y), but is instead evaluated as x or (not y and z) + "os_name == 'foo' or not os_name == 'fo' and os_name == 'fo'": True, + + # x or y and z != (x or y) and z, but is instead evaluated as x or (y and z) + "os_name == 'foo' or os_name == 'fo' and os_name == 'fo'": True, + "not (os_name == 'foo' or os_name == 'fo' and os_name == 'fo')": False, + + # x or y and z and w != (x or y and z) and w, but is instead evaluated as x or (y and z and w) + "os_name == 'foo' or os_name == 'fo' and os_name == 'fo' and os_name == 'fo'": True, + + # not not True + "not not os_name == 'foo'": True, + "not not not os_name == 'foo'": False, + }.items(): # buildifier: @unsorted-dict-items + got = evaluate( + input, + env = { + "os_name": "foo", + }, + ) + env.expect.that_collection((input, got)).contains_exactly((input, want)) + + if not input.strip("()"): + # These cases will just return True, because they will be evaluated + # and the brackets will be processed. + continue + + # Check that the non-strict eval gives us back the input when no env + # is supplied. + got = evaluate( + input, + env = {}, + strict = False, + ) + env.expect.that_bool(got).equals(input.replace("'", '"')) + +_tests.append(_logical_expression_tests) + +def _evaluate_partial_only_extra(env): + # Given + extra = "foo" + + # When + for input, want in { + "os_name == 'osx' and extra == 'bar'": False, + "os_name == 'osx' and extra == 'foo'": "os_name == \"osx\"", + "platform_system == 'aarch64' and os_name == 'osx' and extra == 'foo'": "platform_system == \"aarch64\" and os_name == \"osx\"", + "platform_system == 'aarch64' and extra == 'foo' and os_name == 'osx'": "platform_system == \"aarch64\" and os_name == \"osx\"", + "os_name == 'osx' or extra == 'bar'": "os_name == \"osx\"", + "os_name == 'osx' or extra == 'foo'": "", + "extra == 'bar' or os_name == 'osx'": "os_name == \"osx\"", + "extra == 'foo' or os_name == 'osx'": "", + "os_name == 'win' or extra == 'bar' or os_name == 'osx'": "os_name == \"win\" or os_name == \"osx\"", + "os_name == 'win' or extra == 'foo' or os_name == 'osx'": "", + }.items(): # buildifier: @unsorted-dict-items + got = evaluate( + input, + env = { + "extra": extra, + }, + strict = False, + ) + env.expect.that_bool(got).equals(want) + +_tests.append(_evaluate_partial_only_extra) + +def evaluate_test_suite(name): # buildifier: disable=function-docstring + test_suite( + name = name, + basic_tests = _tests, + ) diff --git a/tests/semver/semver_test.bzl b/tests/semver/semver_test.bzl index 9d13402c92..aef3deca82 100644 --- a/tests/semver/semver_test.bzl +++ b/tests/semver/semver_test.bzl @@ -104,6 +104,24 @@ def _test_semver_sort(env): _tests.append(_test_semver_sort) +def _test_upper(env): + for input, want in { + # Depending on how many version numbers are specified we will increase + # the upper bound differently. See https://packaging.python.org/en/latest/specifications/version-specifiers/#compatible-release for docs + "0.0.1": "0.1.0", + "0.1": "1.0", + "0.1.0": "0.2.0", + "1": "2", + "1.0.0-pre": "1.1.0", # pre-release info is dropped + "1.2.0": "1.3.0", + "2.0.0+build0": "2.1.0", # build info is dropped + }.items(): + actual = semver(input).upper().key() + want = semver(want).key() + env.expect.that_collection(actual).contains_exactly(want).in_order() + +_tests.append(_test_upper) + def semver_test_suite(name): """Create the test suite. From 435b9d25ff26ccbd63fcecb26acc4a2cc7b5cb33 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sun, 23 Mar 2025 18:35:39 +0900 Subject: [PATCH 2/8] fix tests --- tests/pypi/parse_requirements/parse_requirements_tests.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pypi/parse_requirements/parse_requirements_tests.bzl b/tests/pypi/parse_requirements/parse_requirements_tests.bzl index 8edc2689bf..7bbd696afa 100644 --- a/tests/pypi/parse_requirements/parse_requirements_tests.bzl +++ b/tests/pypi/parse_requirements/parse_requirements_tests.bzl @@ -454,7 +454,7 @@ def _test_select_requirement_none_platform(env): _tests.append(_test_select_requirement_none_platform) def _test_env_marker_resolution(env): - def _mock_eval_markers(_, input): + def _mock_eval_markers(input): ret = { "foo[extra]==0.0.1 ;marker --hash=sha256:deadbeef": ["cp311_windows_x86_64"], } From f7edb9e0e756ba8ab122c295677e4f9cf6d9fb48 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sun, 23 Mar 2025 18:39:42 +0900 Subject: [PATCH 3/8] fix more tests --- tests/pypi/extension/extension_tests.bzl | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/pypi/extension/extension_tests.bzl b/tests/pypi/extension/extension_tests.bzl index 8c01a02271..9ddc254a15 100644 --- a/tests/pypi/extension/extension_tests.bzl +++ b/tests/pypi/extension/extension_tests.bzl @@ -76,7 +76,6 @@ def _parse( *, hub_name, python_version, - _evaluate_markers_srcs = [], add_libdir_to_library_search_path = False, auth_patterns = {}, download_only = False, @@ -104,7 +103,6 @@ def _parse( whl_modifications = {}, **kwargs): return struct( - _evaluate_markers_srcs = _evaluate_markers_srcs, auth_patterns = auth_patterns, add_libdir_to_library_search_path = add_libdir_to_library_search_path, download_only = download_only, @@ -275,14 +273,6 @@ torch==2.4.1 ; platform_machine != 'x86_64' \ available_interpreters = { "python_3_15_host": "unit_test_interpreter_target", }, - evaluate_markers = lambda _, requirements, **__: { - key: [ - platform - for platform in platforms - if ("x86_64" in platform and "platform_machine ==" in key) or ("x86_64" not in platform and "platform_machine !=" in key) - ] - for key, platforms in requirements.items() - }, ) pypi.is_reproducible().equals(True) From d4b19b430e2a13a22547656517f0410d7d453199 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sun, 30 Mar 2025 08:07:06 +0900 Subject: [PATCH 4/8] Fix ppc handling --- python/private/pypi/pep508_env.bzl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/private/pypi/pep508_env.bzl b/python/private/pypi/pep508_env.bzl index f91a4a2582..75ac77e68f 100644 --- a/python/private/pypi/pep508_env.bzl +++ b/python/private/pypi/pep508_env.bzl @@ -17,7 +17,8 @@ _platform_machine_values = { "aarch64": "arm64", - "ppc": "ppc64le", + "ppc": "ppc", + "ppc64le": "ppc64le", "s390x": "s390x", "x86_32": "i386", "x86_64": "x86_64", From 0585c0d944a3d4fdd36894781e50cfd60aa527fa Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sun, 30 Mar 2025 18:48:51 +0900 Subject: [PATCH 5/8] wip --- tests/pypi/extension/extension_tests.bzl | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tests/pypi/extension/extension_tests.bzl b/tests/pypi/extension/extension_tests.bzl index e303b8e6c7..858c026df8 100644 --- a/tests/pypi/extension/extension_tests.bzl +++ b/tests/pypi/extension/extension_tests.bzl @@ -399,15 +399,6 @@ torch==2.4.1+cpu ; platform_machine == 'x86_64' \ available_interpreters = { "python_3_12_host": "unit_test_interpreter_target", }, - evaluate_markers = lambda _, requirements, **__: { - # todo once 2692 is merged, this is going to be easier to test. - key: [ - platform - for platform in platforms - if ("x86_64" in platform and "platform_machine ==" in key) or ("x86_64" not in platform and "platform_machine !=" in key) - ] - for key, platforms in requirements.items() - }, simpleapi_download = mocksimpleapi_download, ) From a17aa5597773ffa38eb16fd5b18bb0af21ec345c Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sun, 30 Mar 2025 20:33:50 +0900 Subject: [PATCH 6/8] normalize the platform_machine during evaluation --- python/private/pypi/pep508_env.bzl | 6 ++-- python/private/pypi/pep508_evaluate.bzl | 41 ++++++++++++++++++++----- 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/python/private/pypi/pep508_env.bzl b/python/private/pypi/pep508_env.bzl index 75ac77e68f..576e13c450 100644 --- a/python/private/pypi/pep508_env.bzl +++ b/python/private/pypi/pep508_env.bzl @@ -15,10 +15,12 @@ """This module is for implementing PEP508 environment definition. """ +# See https://stackoverflow.com/questions/45125516/possible-values-for-uname-m _platform_machine_values = { - "aarch64": "arm64", + "aarch64": "aarch64", "ppc": "ppc", "ppc64le": "ppc64le", + "riscv64": "riscv64", "s390x": "s390x", "x86_32": "i386", "x86_64": "x86_64", @@ -73,7 +75,7 @@ def env(target_platform, *, extra = None): arch = target_platform.arch env = env | { "os_name": _os_name_values.get(os, ""), - "platform_machine": "aarch64" if (os, arch) == ("linux", "aarch64") else _platform_machine_values.get(arch, ""), + "platform_machine": _platform_machine_values.get(arch, ""), "platform_system": _platform_system_values.get(os, ""), "sys_platform": _sys_platform_values.get(os, ""), } diff --git a/python/private/pypi/pep508_evaluate.bzl b/python/private/pypi/pep508_evaluate.bzl index a5082df95e..1d0b83440e 100644 --- a/python/private/pypi/pep508_evaluate.bzl +++ b/python/private/pypi/pep508_evaluate.bzl @@ -56,6 +56,16 @@ _NON_VERSION_VAR_NAMES = [ _AND = "and" _OR = "or" _NOT = "not" +_VALUE_ALIASES = { + "platform_machine": { + # These pairs mean the same hardware, but different values may be used + # on different host platforms. + "amd64": "x86_64", + "arm64": "aarch64", + "i386": "x86_32", + "i686": "x86_32", + }, +} def tokenize(marker): """Tokenize the input string. @@ -123,13 +133,17 @@ def tokenize(marker): return fail("BUG: failed to process the marker in allocated cycles: {}".format(marker)) -def evaluate(marker, *, env, strict = True, **kwargs): +def evaluate(marker, *, env, strict = True, value_aliases = _VALUE_ALIASES, **kwargs): """Evaluate the marker against a given env. Args: - marker: {type}`str`: The string marker to evaluate. - env: {type}`dict`: The environment to evaluate the marker against. - strict: {type}`bool`: A setting to not fail on missing values in the env. + marker: {type}`str` The string marker to evaluate. + env: {type}`dict` The environment to evaluate the marker against. + strict: {type}`bool` A setting to not fail on missing values in the env. + value_aliases: {type}`dict` The value normalization to do for certain + fields to ensure that `aarch64` evaluation in the `platform_machine` + works the same way irrespective if the marker uses `arm64` or + `aarch64` value in the expression. **kwargs: Extra kwargs to be passed to the expression evaluator. Returns: @@ -142,7 +156,7 @@ def evaluate(marker, *, env, strict = True, **kwargs): if not tokens: break - tokens = ast.parse(env = env, tokens = tokens, strict = strict) + tokens = ast.parse(env = env, tokens = tokens, strict = strict, value_aliases = value_aliases) if not tokens: return ast.value() @@ -236,7 +250,7 @@ def _new_expr( ) return self -def _parse(self, *, env, tokens, strict = False): +def _parse(self, *, env, tokens, strict = False, value_aliases = {}): """The parse function takes the consumed tokens and returns the remaining.""" token, remaining = tokens[0], tokens[1:] @@ -251,7 +265,7 @@ def _parse(self, *, env, tokens, strict = False): elif token == _NOT: expr = _not_expr(self) else: - expr = marker_expr(env = env, strict = strict, *tokens[:3]) + expr = marker_expr(env = env, strict = strict, value_aliases = value_aliases, *tokens[:3]) remaining = tokens[3:] _append(self, expr) @@ -277,7 +291,7 @@ def _value(self): fail("BUG: invalid state: {}".format(self.tree)) -def marker_expr(left, op, right, *, env, strict = True): +def marker_expr(left, op, right, *, env, strict = True, value_aliases = {}): """Evaluate a marker expression Args: @@ -288,6 +302,7 @@ def marker_expr(left, op, right, *, env, strict = True): in the environment, otherwise returns the original expression. env: {type}`dict[str, str]` the `env` to substitute `env` identifiers in the ` ` expression. + value_aliases: the value normalization used for certain fields. Returns: {type}`bool` if the expression evaluation result or {type}`str` if the expression @@ -300,11 +315,21 @@ def marker_expr(left, op, right, *, env, strict = True): var_name = right right = env[right] left = left.strip("\"") + + # On Windows, Linux, OSX different values may mean the same hardware, + # e.g. Python on Windows returns arm64, but on Linux returns aarch64. + # e.g. Python on Windows returns amd64, but on Linux returns x86_64. + # + # The following normalizes the values + left = value_aliases.get(var_name, {}).get(left, left) else: var_name = left left = env[left] right = right.strip("\"") + # See the note above on normalization + right = value_aliases.get(var_name, {}).get(right, right) + if var_name in _NON_VERSION_VAR_NAMES: return _env_expr(left, op, right) elif var_name.endswith("_version"): From aa4e05571f0dc9022dd9a5553208aa530e2df0be Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sun, 30 Mar 2025 22:53:19 +0900 Subject: [PATCH 7/8] wip --- python/private/pypi/pep508_env.bzl | 15 ++++---------- tests/pypi/pep508/evaluate_tests.bzl | 30 ++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/python/private/pypi/pep508_env.bzl b/python/private/pypi/pep508_env.bzl index 576e13c450..042f5da78f 100644 --- a/python/private/pypi/pep508_env.bzl +++ b/python/private/pypi/pep508_env.bzl @@ -16,15 +16,6 @@ """ # See https://stackoverflow.com/questions/45125516/possible-values-for-uname-m -_platform_machine_values = { - "aarch64": "aarch64", - "ppc": "ppc", - "ppc64le": "ppc64le", - "riscv64": "riscv64", - "s390x": "s390x", - "x86_32": "i386", - "x86_64": "x86_64", -} _platform_system_values = { "linux": "Linux", "osx": "Darwin", @@ -62,6 +53,9 @@ def env(target_platform, *, extra = None): "platform_release": "", "platform_version": "", } + if type(target_platform) == type(""): + target_platform = platform_from_str(target_platform, python_version = "") + if target_platform.abi: minor_version, _, micro_version = target_platform.abi[3:].partition(".") micro_version = micro_version or "0" @@ -72,10 +66,9 @@ def env(target_platform, *, extra = None): } if target_platform.os and target_platform.arch: os = target_platform.os - arch = target_platform.arch env = env | { "os_name": _os_name_values.get(os, ""), - "platform_machine": _platform_machine_values.get(arch, ""), + "platform_machine": target_platform.arch, "platform_system": _platform_system_values.get(os, ""), "sys_platform": _sys_platform_values.get(os, ""), } diff --git a/tests/pypi/pep508/evaluate_tests.bzl b/tests/pypi/pep508/evaluate_tests.bzl index 18337c40db..80b70f4dad 100644 --- a/tests/pypi/pep508/evaluate_tests.bzl +++ b/tests/pypi/pep508/evaluate_tests.bzl @@ -14,6 +14,7 @@ """Tests for construction of Python version matching config settings.""" load("@rules_testing//lib:test_suite.bzl", "test_suite") +load("//python/private/pypi:pep508_env.bzl", pep508_env = "env") # buildifier: disable=bzl-visibility load("//python/private/pypi:pep508_evaluate.bzl", "evaluate", "tokenize") # buildifier: disable=bzl-visibility _tests = [] @@ -234,6 +235,35 @@ def _evaluate_partial_only_extra(env): _tests.append(_evaluate_partial_only_extra) +def _evaluate_with_aliases(env): + # When + for target_platform, tests in { + # buildifier: @unsorted-dict-items + "osx_aarch64": { + "platform_system == 'Darwin' and platform_machine == 'arm64'": True, + "platform_system == 'Darwin' and platform_machine == 'aarch64'": True, + "platform_system == 'Darwin' and platform_machine == 'amd64'": False, + }, + "osx_x86_64": { + "platform_system == 'Darwin' and platform_machine == 'amd64'": True, + "platform_system == 'Darwin' and platform_machine == 'x86_64'": True, + }, + "osx_x86_32": { + "platform_system == 'Darwin' and platform_machine == 'i386'": True, + "platform_system == 'Darwin' and platform_machine == 'i686'": True, + "platform_system == 'Darwin' and platform_machine == 'x86_32'": True, + "platform_system == 'Darwin' and platform_machine == 'x86_64'": False, + }, + }.items(): # buildifier: @unsorted-dict-items + for input, want in tests.items(): + got = evaluate( + input, + env = pep508_env(target_platform), + ) + env.expect.that_bool(got).equals(want) + +_tests.append(_evaluate_with_aliases) + def evaluate_test_suite(name): # buildifier: disable=function-docstring test_suite( name = name, From f2ec55e33cc6bc1292d6decf16c9c69ce26cc7e5 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sun, 30 Mar 2025 23:02:27 +0900 Subject: [PATCH 8/8] refactor --- python/private/pypi/pep508_env.bzl | 14 ++++++- python/private/pypi/pep508_evaluate.bzl | 50 +++++++++++-------------- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/python/private/pypi/pep508_env.bzl b/python/private/pypi/pep508_env.bzl index 042f5da78f..17d41871d1 100644 --- a/python/private/pypi/pep508_env.bzl +++ b/python/private/pypi/pep508_env.bzl @@ -16,6 +16,14 @@ """ # See https://stackoverflow.com/questions/45125516/possible-values-for-uname-m +_platform_machine_aliases = { + # These pairs mean the same hardware, but different values may be used + # on different host platforms. + "amd64": "x86_64", + "arm64": "aarch64", + "i386": "x86_32", + "i686": "x86_32", +} _platform_system_values = { "linux": "Linux", "osx": "Darwin", @@ -74,7 +82,11 @@ def env(target_platform, *, extra = None): } # This is split by topic - return env + return env | { + "_aliases": { + "platform_machine": _platform_machine_aliases, + }, + } def _platform(*, abi = None, os = None, arch = None): return struct( diff --git a/python/private/pypi/pep508_evaluate.bzl b/python/private/pypi/pep508_evaluate.bzl index 1d0b83440e..f45eb75cdb 100644 --- a/python/private/pypi/pep508_evaluate.bzl +++ b/python/private/pypi/pep508_evaluate.bzl @@ -56,16 +56,7 @@ _NON_VERSION_VAR_NAMES = [ _AND = "and" _OR = "or" _NOT = "not" -_VALUE_ALIASES = { - "platform_machine": { - # These pairs mean the same hardware, but different values may be used - # on different host platforms. - "amd64": "x86_64", - "arm64": "aarch64", - "i386": "x86_32", - "i686": "x86_32", - }, -} +_ENV_ALIASES = "_aliases" def tokenize(marker): """Tokenize the input string. @@ -133,17 +124,13 @@ def tokenize(marker): return fail("BUG: failed to process the marker in allocated cycles: {}".format(marker)) -def evaluate(marker, *, env, strict = True, value_aliases = _VALUE_ALIASES, **kwargs): +def evaluate(marker, *, env, strict = True, **kwargs): """Evaluate the marker against a given env. Args: marker: {type}`str` The string marker to evaluate. env: {type}`dict` The environment to evaluate the marker against. strict: {type}`bool` A setting to not fail on missing values in the env. - value_aliases: {type}`dict` The value normalization to do for certain - fields to ensure that `aarch64` evaluation in the `platform_machine` - works the same way irrespective if the marker uses `arm64` or - `aarch64` value in the expression. **kwargs: Extra kwargs to be passed to the expression evaluator. Returns: @@ -156,7 +143,7 @@ def evaluate(marker, *, env, strict = True, value_aliases = _VALUE_ALIASES, **kw if not tokens: break - tokens = ast.parse(env = env, tokens = tokens, strict = strict, value_aliases = value_aliases) + tokens = ast.parse(env = env, tokens = tokens, strict = strict) if not tokens: return ast.value() @@ -250,7 +237,7 @@ def _new_expr( ) return self -def _parse(self, *, env, tokens, strict = False, value_aliases = {}): +def _parse(self, *, env, tokens, strict = False): """The parse function takes the consumed tokens and returns the remaining.""" token, remaining = tokens[0], tokens[1:] @@ -265,7 +252,7 @@ def _parse(self, *, env, tokens, strict = False, value_aliases = {}): elif token == _NOT: expr = _not_expr(self) else: - expr = marker_expr(env = env, strict = strict, value_aliases = value_aliases, *tokens[:3]) + expr = marker_expr(env = env, strict = strict, *tokens[:3]) remaining = tokens[3:] _append(self, expr) @@ -291,7 +278,7 @@ def _value(self): fail("BUG: invalid state: {}".format(self.tree)) -def marker_expr(left, op, right, *, env, strict = True, value_aliases = {}): +def marker_expr(left, op, right, *, env, strict = True): """Evaluate a marker expression Args: @@ -301,8 +288,11 @@ def marker_expr(left, op, right, *, env, strict = True, value_aliases = {}): strict: {type}`bool` if false, only evaluates the values that are present in the environment, otherwise returns the original expression. env: {type}`dict[str, str]` the `env` to substitute `env` identifiers in - the ` ` expression. - value_aliases: the value normalization used for certain fields. + the ` ` expression. Note, if `env` has a key + "_aliases", then we will do normalization so that we can ensure + that e.g. `aarch64` evaluation in the `platform_machine` works the + same way irrespective if the marker uses `arm64` or `aarch64` value + in the expression. Returns: {type}`bool` if the expression evaluation result or {type}`str` if the expression @@ -316,19 +306,21 @@ def marker_expr(left, op, right, *, env, strict = True, value_aliases = {}): right = env[right] left = left.strip("\"") - # On Windows, Linux, OSX different values may mean the same hardware, - # e.g. Python on Windows returns arm64, but on Linux returns aarch64. - # e.g. Python on Windows returns amd64, but on Linux returns x86_64. - # - # The following normalizes the values - left = value_aliases.get(var_name, {}).get(left, left) + if _ENV_ALIASES in env: + # On Windows, Linux, OSX different values may mean the same hardware, + # e.g. Python on Windows returns arm64, but on Linux returns aarch64. + # e.g. Python on Windows returns amd64, but on Linux returns x86_64. + # + # The following normalizes the values + left = env.get(_ENV_ALIASES, {}).get(var_name, {}).get(left, left) else: var_name = left left = env[left] right = right.strip("\"") - # See the note above on normalization - right = value_aliases.get(var_name, {}).get(right, right) + if _ENV_ALIASES in env: + # See the note above on normalization + right = env.get(_ENV_ALIASES, {}).get(var_name, {}).get(right, right) if var_name in _NON_VERSION_VAR_NAMES: return _env_expr(left, op, right)