From 607e6694521b8739c3f6b14a2d08242211e28a3b Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sun, 11 May 2025 11:21:28 +0900 Subject: [PATCH 1/3] fix(pypi): make the URL/filename extraction from requirement more robust Summary: - Make the requirement line the same as the one that is used in whls. It only contains extras and the version if it is present. - Add debug log statements if we fail to get the version from a direct URL reference. - Move some tests from `parse_requirements_tests` to `index_sources_tests` to improve test maintenance. - Replace the URL encoded `+` to a regular `+` in the filename. - Correctly handle the case when the `=sha256:` is used in the URL. I cannot think of anything else that we can do for this as of now, so will mark the associated issue as resolved. Fixes #2363 --- .editorconfig | 4 + CHANGELOG.md | 4 + python/private/pypi/index_sources.bzl | 42 ++++++- python/private/pypi/parse_requirements.bzl | 41 ++++--- python/private/pypi/whl_library.bzl | 12 +- tests/pypi/extension/extension_tests.bzl | 6 +- .../index_sources/index_sources_tests.bzl | 39 ++++++- .../parse_requirements_tests.bzl | 109 +----------------- 8 files changed, 115 insertions(+), 142 deletions(-) diff --git a/.editorconfig b/.editorconfig index 26bb52ffac..2737b0f184 100644 --- a/.editorconfig +++ b/.editorconfig @@ -15,3 +15,7 @@ max_line_length = 100 [*.{py,bzl}] indent_style = space indent_size = 4 + +# different overrides for git commit messages +[.git/COMMIT_EDITMSG] +max_line_length = 72 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f67c8a5ec..a3d30484fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -86,6 +86,10 @@ END_UNRELEASED_TEMPLATE multiple times. * (tools/wheelmaker.py) Extras are now preserved in Requires-Dist metadata when using requires_file to specify the requirements. +* (pypi) Use bazel downloader for direct URL references and correctly detect the filenames from + various URL formats - URL encoded version strings get correctly resolved, sha256 value can be + also retrieved from the URL as opposed to only the `--hash` parameter. Fixes + [#2363](https://github.com/bazel-contrib/rules_python/issues/2363). {#v0-0-0-added} ### Added diff --git a/python/private/pypi/index_sources.bzl b/python/private/pypi/index_sources.bzl index e3762d2a48..803670c3e4 100644 --- a/python/private/pypi/index_sources.bzl +++ b/python/private/pypi/index_sources.bzl @@ -16,6 +16,23 @@ A file that houses private functions used in the `bzlmod` extension with the same name. """ +# Just list them here and me super conservative +_KNOWN_EXTS = [ + # Note, the following source in pip has more extensions + # https://github.com/pypa/pip/blob/3c5a189141a965f21a473e46c3107e689eb9f79f/src/pip/_vendor/distlib/locators.py#L90 + # + # ".tar.bz2", + # ".tar", + # ".tgz", + # ".tbz", + # + # But we support only the following, used in 'packaging' + # https://github.com/pypa/pip/blob/3c5a189141a965f21a473e46c3107e689eb9f79f/src/pip/_vendor/packaging/utils.py#L137 + ".whl", + ".tar.gz", + ".zip", +] + def index_sources(line): """Get PyPI sources from a requirements.txt line. @@ -58,11 +75,31 @@ def index_sources(line): ).strip() url = "" + filename = "" if "@" in head: - requirement = requirement_line - _, _, url_and_rest = requirement.partition("@") + maybe_requirement, _, url_and_rest = requirement.partition("@") url = url_and_rest.strip().partition(" ")[0].strip() + url, _, sha256 = url.partition("#sha256=") + if sha256: + shas.append(sha256) + _, _, filename = url.rpartition("/") + + # Replace URL encoded characters and luckily there is only one case + filename = filename.replace("%2B", "+") + is_known_ext = False + for ext in _KNOWN_EXTS: + if filename.endswith(ext): + is_known_ext = True + break + + if is_known_ext: + requirement = maybe_requirement.strip() + else: + # could not detect filename from the URL + filename = "" + requirement = requirement_line + return struct( requirement = requirement, requirement_line = requirement_line, @@ -70,4 +107,5 @@ def index_sources(line): shas = sorted(shas), marker = marker, url = url, + filename = filename, ) diff --git a/python/private/pypi/parse_requirements.bzl b/python/private/pypi/parse_requirements.bzl index 1583c89199..8e3995f775 100644 --- a/python/private/pypi/parse_requirements.bzl +++ b/python/private/pypi/parse_requirements.bzl @@ -206,7 +206,16 @@ def parse_requirements( ret_requirements.append( struct( distribution = r.distribution, - srcs = r.srcs, + srcs = struct( + # Recreate the struct so that the tests stay less brittle when + # r.srcs changes + requirement = r.srcs.requirement, + requirement_line = r.srcs.requirement_line, + marker = r.srcs.marker, + shas = r.srcs.shas, + url = r.srcs.url, + version = r.srcs.version, + ), target_platforms = sorted(target_platforms), extra_pip_args = r.extra_pip_args, whls = whls, @@ -281,32 +290,26 @@ def _add_dists(*, requirement, index_urls, logger = None): logger: A logger for printing diagnostic info. """ - # Handle direct URLs in requirements if requirement.srcs.url: - url = requirement.srcs.url - _, _, filename = url.rpartition("/") - filename, _, _ = filename.partition("#sha256=") - if "." not in filename: - # detected filename has no extension, it might be an sdist ref - # TODO @aignas 2025-04-03: should be handled if the following is fixed: - # https://github.com/bazel-contrib/rules_python/issues/2363 + if not requirement.srcs.filename: + if logger: + logger.debug(lambda: "Could not detect the filename from the URL, falling back to pip: {}".format( + requirement.srcs.url, + )) return [], None - if "@" in filename: - # this is most likely foo.git@git_sha, skip special handling of these - return [], None - - direct_url_dist = struct( - url = url, - filename = filename, + # Handle direct URLs in requirements + dist = struct( + url = requirement.srcs.url, + filename = requirement.srcs.filename, sha256 = requirement.srcs.shas[0] if requirement.srcs.shas else "", yanked = False, ) - if filename.endswith(".whl"): - return [direct_url_dist], None + if dist.filename.endswith(".whl"): + return [dist], None else: - return [], direct_url_dist + return [], dist if not index_urls: return [], None diff --git a/python/private/pypi/whl_library.bzl b/python/private/pypi/whl_library.bzl index 160bb5b799..4427c0e3ef 100644 --- a/python/private/pypi/whl_library.bzl +++ b/python/private/pypi/whl_library.bzl @@ -258,19 +258,9 @@ def _whl_library_impl(rctx): # Simulate the behaviour where the whl is present in the current directory. rctx.symlink(whl_path, whl_path.basename) whl_path = rctx.path(whl_path.basename) - elif rctx.attr.urls: + elif rctx.attr.urls and rctx.attr.filename: filename = rctx.attr.filename urls = rctx.attr.urls - if not filename: - _, _, filename = urls[0].rpartition("/") - - if not (filename.endswith(".whl") or filename.endswith("tar.gz") or filename.endswith(".zip")): - if rctx.attr.filename: - msg = "got '{}'".format(filename) - else: - msg = "detected '{}' from url:\n{}".format(filename, urls[0]) - fail("Only '.whl', '.tar.gz' or '.zip' files are supported, {}".format(msg)) - result = rctx.download( url = urls, output = filename, diff --git a/tests/pypi/extension/extension_tests.bzl b/tests/pypi/extension/extension_tests.bzl index 1cd6869c84..b4a7746271 100644 --- a/tests/pypi/extension/extension_tests.bzl +++ b/tests/pypi/extension/extension_tests.bzl @@ -806,7 +806,7 @@ git_dep @ git+https://git.server/repo/project@deadbeefdeadbeef "extra_pip_args": ["--extra-args-for-sdist-building"], "filename": "any-name.tar.gz", "python_interpreter_target": "unit_test_interpreter_target", - "requirement": "direct_sdist_without_sha @ some-archive/any-name.tar.gz", + "requirement": "direct_sdist_without_sha", "sha256": "", "urls": ["some-archive/any-name.tar.gz"], }, @@ -824,7 +824,7 @@ git_dep @ git+https://git.server/repo/project@deadbeefdeadbeef ], "filename": "direct_without_sha-0.0.1-py3-none-any.whl", "python_interpreter_target": "unit_test_interpreter_target", - "requirement": "direct_without_sha==0.0.1 @ example-direct.org/direct_without_sha-0.0.1-py3-none-any.whl", + "requirement": "direct_without_sha==0.0.1", "sha256": "", "urls": ["example-direct.org/direct_without_sha-0.0.1-py3-none-any.whl"], }, @@ -891,7 +891,7 @@ git_dep @ git+https://git.server/repo/project@deadbeefdeadbeef ], "filename": "some_pkg-0.0.1-py3-none-any.whl", "python_interpreter_target": "unit_test_interpreter_target", - "requirement": "some_pkg==0.0.1 @ example-direct.org/some_pkg-0.0.1-py3-none-any.whl --hash=sha256:deadbaaf", + "requirement": "some_pkg==0.0.1", "sha256": "deadbaaf", "urls": ["example-direct.org/some_pkg-0.0.1-py3-none-any.whl"], }, diff --git a/tests/pypi/index_sources/index_sources_tests.bzl b/tests/pypi/index_sources/index_sources_tests.bzl index 9d12bc6399..d4062b47fe 100644 --- a/tests/pypi/index_sources/index_sources_tests.bzl +++ b/tests/pypi/index_sources/index_sources_tests.bzl @@ -23,42 +23,72 @@ def _test_no_simple_api_sources(env): inputs = { "foo @ git+https://github.com/org/foo.git@deadbeef": struct( requirement = "foo @ git+https://github.com/org/foo.git@deadbeef", + requirement_line = "foo @ git+https://github.com/org/foo.git@deadbeef", marker = "", url = "git+https://github.com/org/foo.git@deadbeef", shas = [], version = "", + filename = "", ), "foo==0.0.1": struct( requirement = "foo==0.0.1", + requirement_line = "foo==0.0.1", marker = "", url = "", version = "0.0.1", + filename = "", ), "foo==0.0.1 @ https://someurl.org": struct( requirement = "foo==0.0.1 @ https://someurl.org", + requirement_line = "foo==0.0.1 @ https://someurl.org", marker = "", url = "https://someurl.org", version = "0.0.1", + filename = "", ), "foo==0.0.1 @ https://someurl.org/package.whl": struct( - requirement = "foo==0.0.1 @ https://someurl.org/package.whl", + requirement = "foo==0.0.1", + requirement_line = "foo==0.0.1 @ https://someurl.org/package.whl", marker = "", url = "https://someurl.org/package.whl", version = "0.0.1", + filename = "package.whl", ), "foo==0.0.1 @ https://someurl.org/package.whl --hash=sha256:deadbeef": struct( - requirement = "foo==0.0.1 @ https://someurl.org/package.whl --hash=sha256:deadbeef", + requirement = "foo==0.0.1", + requirement_line = "foo==0.0.1 @ https://someurl.org/package.whl --hash=sha256:deadbeef", marker = "", url = "https://someurl.org/package.whl", shas = ["deadbeef"], version = "0.0.1", + filename = "package.whl", ), "foo==0.0.1 @ https://someurl.org/package.whl; python_version < \"2.7\"\\ --hash=sha256:deadbeef": struct( - requirement = "foo==0.0.1 @ https://someurl.org/package.whl --hash=sha256:deadbeef", + requirement = "foo==0.0.1", + requirement_line = "foo==0.0.1 @ https://someurl.org/package.whl --hash=sha256:deadbeef", marker = "python_version < \"2.7\"", url = "https://someurl.org/package.whl", shas = ["deadbeef"], version = "0.0.1", + filename = "package.whl", + ), + "foo[extra] @ https://example.org/foo-1.0.tar.gz --hash=sha256:deadbe0f": struct( + requirement = "foo[extra]", + requirement_line = "foo[extra] @ https://example.org/foo-1.0.tar.gz --hash=sha256:deadbe0f", + marker = "", + url = "https://example.org/foo-1.0.tar.gz", + shas = ["deadbe0f"], + version = "", + filename = "foo-1.0.tar.gz", + ), + "torch @ https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp311-cp311-linux_x86_64.whl#sha256=deadbeef": struct( + requirement = "torch", + requirement_line = "torch @ https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp311-cp311-linux_x86_64.whl#sha256=deadbeef", + marker = "", + url = "https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp311-cp311-linux_x86_64.whl", + shas = ["deadbeef"], + version = "", + filename = "torch-2.6.0+cpu-cp311-cp311-linux_x86_64.whl", ), } for input, want in inputs.items(): @@ -66,9 +96,10 @@ def _test_no_simple_api_sources(env): env.expect.that_collection(got.shas).contains_exactly(want.shas if hasattr(want, "shas") else []) env.expect.that_str(got.version).equals(want.version) env.expect.that_str(got.requirement).equals(want.requirement) - env.expect.that_str(got.requirement_line).equals(got.requirement) + env.expect.that_str(got.requirement_line).equals(got.requirement_line) env.expect.that_str(got.marker).equals(want.marker) env.expect.that_str(got.url).equals(want.url) + env.expect.that_str(got.filename).equals(want.filename) _tests.append(_test_no_simple_api_sources) diff --git a/tests/pypi/parse_requirements/parse_requirements_tests.bzl b/tests/pypi/parse_requirements/parse_requirements_tests.bzl index c5b24870ea..dbda20f078 100644 --- a/tests/pypi/parse_requirements/parse_requirements_tests.bzl +++ b/tests/pypi/parse_requirements/parse_requirements_tests.bzl @@ -27,10 +27,6 @@ foo==0.0.1 \ """, "requirements_direct": """\ foo[extra] @ https://some-url/package.whl -bar @ https://example.org/bar-1.0.whl --hash=sha256:deadbeef -baz @ https://test.com/baz-2.0.whl; python_version < "3.8" --hash=sha256:deadb00f -qux @ https://example.org/qux-1.0.tar.gz --hash=sha256:deadbe0f -torch @ https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp311-cp311-linux_x86_64.whl#sha256=5b6ae523bfb67088a17ca7734d131548a2e60346c622621e4248ed09dd0790cc """, "requirements_extra_args": """\ --index-url=example.org @@ -136,7 +132,8 @@ def _test_simple(env): _tests.append(_test_simple) -def _test_direct_urls(env): +def _test_direct_urls_integration(env): + """Check that we are using the filename from index_sources.""" got = parse_requirements( ctx = _mock_ctx(), requirements_by_platform = { @@ -144,52 +141,6 @@ def _test_direct_urls(env): }, ) env.expect.that_dict(got).contains_exactly({ - "bar": [ - struct( - distribution = "bar", - extra_pip_args = [], - sdist = None, - is_exposed = True, - srcs = struct( - marker = "", - requirement = "bar @ https://example.org/bar-1.0.whl --hash=sha256:deadbeef", - requirement_line = "bar @ https://example.org/bar-1.0.whl --hash=sha256:deadbeef", - shas = ["deadbeef"], - version = "", - url = "https://example.org/bar-1.0.whl", - ), - target_platforms = ["linux_x86_64"], - whls = [struct( - url = "https://example.org/bar-1.0.whl", - filename = "bar-1.0.whl", - sha256 = "deadbeef", - yanked = False, - )], - ), - ], - "baz": [ - struct( - distribution = "baz", - extra_pip_args = [], - sdist = None, - is_exposed = True, - srcs = struct( - marker = "python_version < \"3.8\"", - requirement = "baz @ https://test.com/baz-2.0.whl --hash=sha256:deadb00f", - requirement_line = "baz @ https://test.com/baz-2.0.whl --hash=sha256:deadb00f", - shas = ["deadb00f"], - version = "", - url = "https://test.com/baz-2.0.whl", - ), - target_platforms = ["linux_x86_64"], - whls = [struct( - url = "https://test.com/baz-2.0.whl", - filename = "baz-2.0.whl", - sha256 = "deadb00f", - yanked = False, - )], - ), - ], "foo": [ struct( distribution = "foo", @@ -198,7 +149,7 @@ def _test_direct_urls(env): is_exposed = True, srcs = struct( marker = "", - requirement = "foo[extra] @ https://some-url/package.whl", + requirement = "foo[extra]", requirement_line = "foo[extra] @ https://some-url/package.whl", shas = [], version = "", @@ -213,57 +164,9 @@ def _test_direct_urls(env): )], ), ], - "qux": [ - struct( - distribution = "qux", - extra_pip_args = [], - sdist = struct( - url = "https://example.org/qux-1.0.tar.gz", - filename = "qux-1.0.tar.gz", - sha256 = "deadbe0f", - yanked = False, - ), - is_exposed = True, - srcs = struct( - marker = "", - requirement = "qux @ https://example.org/qux-1.0.tar.gz --hash=sha256:deadbe0f", - requirement_line = "qux @ https://example.org/qux-1.0.tar.gz --hash=sha256:deadbe0f", - shas = ["deadbe0f"], - version = "", - url = "https://example.org/qux-1.0.tar.gz", - ), - target_platforms = ["linux_x86_64"], - whls = [], - ), - ], - "torch": [ - struct( - distribution = "torch", - extra_pip_args = [], - is_exposed = True, - sdist = None, - srcs = struct( - marker = "", - requirement = "torch @ https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp311-cp311-linux_x86_64.whl#sha256=5b6ae523bfb67088a17ca7734d131548a2e60346c622621e4248ed09dd0790cc", - requirement_line = "torch @ https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp311-cp311-linux_x86_64.whl#sha256=5b6ae523bfb67088a17ca7734d131548a2e60346c622621e4248ed09dd0790cc", - shas = [], - url = "https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp311-cp311-linux_x86_64.whl#sha256=5b6ae523bfb67088a17ca7734d131548a2e60346c622621e4248ed09dd0790cc", - version = "", - ), - target_platforms = ["linux_x86_64"], - whls = [ - struct( - filename = "torch-2.6.0%2Bcpu-cp311-cp311-linux_x86_64.whl", - sha256 = "", - url = "https://download.pytorch.org/whl/cpu/torch-2.6.0%2Bcpu-cp311-cp311-linux_x86_64.whl#sha256=5b6ae523bfb67088a17ca7734d131548a2e60346c622621e4248ed09dd0790cc", - yanked = False, - ), - ], - ), - ], }) -_tests.append(_test_direct_urls) +_tests.append(_test_direct_urls_integration) def _test_extra_pip_args(env): got = parse_requirements( @@ -612,7 +515,7 @@ def _test_optional_hash(env): is_exposed = True, srcs = struct( marker = "", - requirement = "foo==0.0.4 @ https://example.org/foo-0.0.4.whl", + requirement = "foo==0.0.4", requirement_line = "foo==0.0.4 @ https://example.org/foo-0.0.4.whl", shas = [], version = "0.0.4", @@ -633,7 +536,7 @@ def _test_optional_hash(env): is_exposed = True, srcs = struct( marker = "", - requirement = "foo==0.0.5 @ https://example.org/foo-0.0.5.whl --hash=sha256:deadbeef", + requirement = "foo==0.0.5", requirement_line = "foo==0.0.5 @ https://example.org/foo-0.0.5.whl --hash=sha256:deadbeef", shas = ["deadbeef"], version = "0.0.5", From b3203bad6e42f102fde35f4ef55aac4681b0e47e Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Mon, 12 May 2025 20:06:21 +0900 Subject: [PATCH 2/3] refactor: move the requirement vs requirement_line selection logic from extension.bzl --- python/private/pypi/extension.bzl | 4 +- python/private/pypi/parse_requirements.bzl | 11 +- .../parse_requirements_tests.bzl | 175 ++---------------- 3 files changed, 22 insertions(+), 168 deletions(-) diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl index 647407f16f..9368e6539f 100644 --- a/python/private/pypi/extension.bzl +++ b/python/private/pypi/extension.bzl @@ -301,7 +301,7 @@ def _whl_repos(*, requirement, whl_library_args, download_only, netrc, auth_patt # This is no-op because pip is not used to download the wheel. args.pop("download_only", None) - args["requirement"] = requirement.srcs.requirement + args["requirement"] = requirement.line args["urls"] = [distribution.url] args["sha256"] = distribution.sha256 args["filename"] = distribution.filename @@ -338,7 +338,7 @@ def _whl_repos(*, requirement, whl_library_args, download_only, netrc, auth_patt # Fallback to a pip-installed wheel args = dict(whl_library_args) # make a copy - args["requirement"] = requirement.srcs.requirement_line + args["requirement"] = requirement.line if requirement.extra_pip_args: args["extra_pip_args"] = requirement.extra_pip_args diff --git a/python/private/pypi/parse_requirements.bzl b/python/private/pypi/parse_requirements.bzl index 8e3995f775..30d3af4875 100644 --- a/python/private/pypi/parse_requirements.bzl +++ b/python/private/pypi/parse_requirements.bzl @@ -206,16 +206,7 @@ def parse_requirements( ret_requirements.append( struct( distribution = r.distribution, - srcs = struct( - # Recreate the struct so that the tests stay less brittle when - # r.srcs changes - requirement = r.srcs.requirement, - requirement_line = r.srcs.requirement_line, - marker = r.srcs.marker, - shas = r.srcs.shas, - url = r.srcs.url, - version = r.srcs.version, - ), + line = r.srcs.requirement if whls or sdist else r.srcs.requirement_line, target_platforms = sorted(target_platforms), extra_pip_args = r.extra_pip_args, whls = whls, diff --git a/tests/pypi/parse_requirements/parse_requirements_tests.bzl b/tests/pypi/parse_requirements/parse_requirements_tests.bzl index dbda20f078..497e08361f 100644 --- a/tests/pypi/parse_requirements/parse_requirements_tests.bzl +++ b/tests/pypi/parse_requirements/parse_requirements_tests.bzl @@ -107,14 +107,7 @@ def _test_simple(env): extra_pip_args = [], sdist = None, is_exposed = True, - srcs = struct( - marker = "", - requirement = "foo[extra]==0.0.1", - requirement_line = "foo[extra]==0.0.1 --hash=sha256:deadbeef", - shas = ["deadbeef"], - version = "0.0.1", - url = "", - ), + line = "foo[extra]==0.0.1 --hash=sha256:deadbeef", target_platforms = [ "linux_x86_64", "windows_x86_64", @@ -123,12 +116,6 @@ def _test_simple(env): ), ], }) - env.expect.that_str( - select_requirement( - got["foo"], - platform = "linux_x86_64", - ).srcs.version, - ).equals("0.0.1") _tests.append(_test_simple) @@ -147,14 +134,7 @@ def _test_direct_urls_integration(env): extra_pip_args = [], sdist = None, is_exposed = True, - srcs = struct( - marker = "", - requirement = "foo[extra]", - requirement_line = "foo[extra] @ https://some-url/package.whl", - shas = [], - version = "", - url = "https://some-url/package.whl", - ), + line = "foo[extra]", target_platforms = ["linux_x86_64"], whls = [struct( url = "https://some-url/package.whl", @@ -183,14 +163,7 @@ def _test_extra_pip_args(env): extra_pip_args = ["--index-url=example.org", "--trusted-host=example.org"], sdist = None, is_exposed = True, - srcs = struct( - marker = "", - requirement = "foo[extra]==0.0.1", - requirement_line = "foo[extra]==0.0.1 --hash=sha256:deadbeef", - shas = ["deadbeef"], - version = "0.0.1", - url = "", - ), + line = "foo[extra]==0.0.1 --hash=sha256:deadbeef", target_platforms = [ "linux_x86_64", ], @@ -198,12 +171,6 @@ def _test_extra_pip_args(env): ), ], }) - env.expect.that_str( - select_requirement( - got["foo"], - platform = "linux_x86_64", - ).srcs.version, - ).equals("0.0.1") _tests.append(_test_extra_pip_args) @@ -221,14 +188,7 @@ def _test_dupe_requirements(env): extra_pip_args = [], sdist = None, is_exposed = True, - srcs = struct( - marker = "", - requirement = "foo[extra,extra_2]==0.0.1", - requirement_line = "foo[extra,extra_2]==0.0.1 --hash=sha256:deadbeef", - shas = ["deadbeef"], - version = "0.0.1", - url = "", - ), + line = "foo[extra,extra_2]==0.0.1 --hash=sha256:deadbeef", target_platforms = ["linux_x86_64"], whls = [], ), @@ -251,14 +211,7 @@ def _test_multi_os(env): struct( distribution = "bar", extra_pip_args = [], - srcs = struct( - marker = "", - requirement = "bar==0.0.1", - requirement_line = "bar==0.0.1 --hash=sha256:deadb00f", - shas = ["deadb00f"], - version = "0.0.1", - url = "", - ), + line = "bar==0.0.1 --hash=sha256:deadb00f", target_platforms = ["windows_x86_64"], whls = [], sdist = None, @@ -269,14 +222,7 @@ def _test_multi_os(env): struct( distribution = "foo", extra_pip_args = [], - srcs = struct( - marker = "", - requirement = "foo==0.0.3", - requirement_line = "foo==0.0.3 --hash=sha256:deadbaaf", - shas = ["deadbaaf"], - version = "0.0.3", - url = "", - ), + line = "foo==0.0.3 --hash=sha256:deadbaaf", target_platforms = ["linux_x86_64"], whls = [], sdist = None, @@ -285,14 +231,7 @@ def _test_multi_os(env): struct( distribution = "foo", extra_pip_args = [], - srcs = struct( - marker = "", - requirement = "foo[extra]==0.0.2", - requirement_line = "foo[extra]==0.0.2 --hash=sha256:deadbeef", - shas = ["deadbeef"], - version = "0.0.2", - url = "", - ), + line = "foo[extra]==0.0.2 --hash=sha256:deadbeef", target_platforms = ["windows_x86_64"], whls = [], sdist = None, @@ -304,8 +243,8 @@ def _test_multi_os(env): select_requirement( got["foo"], platform = "windows_x86_64", - ).srcs.version, - ).equals("0.0.2") + ).line, + ).equals("foo[extra]==0.0.2 --hash=sha256:deadbeef") _tests.append(_test_multi_os) @@ -325,14 +264,7 @@ def _test_multi_os_legacy(env): extra_pip_args = ["--platform=manylinux_2_17_x86_64", "--python-version=39", "--implementation=cp", "--abi=cp39"], is_exposed = False, sdist = None, - srcs = struct( - marker = "", - requirement = "bar==0.0.1", - requirement_line = "bar==0.0.1 --hash=sha256:deadb00f", - shas = ["deadb00f"], - version = "0.0.1", - url = "", - ), + line = "bar==0.0.1 --hash=sha256:deadb00f", target_platforms = ["cp39_linux_x86_64"], whls = [], ), @@ -343,14 +275,7 @@ def _test_multi_os_legacy(env): extra_pip_args = ["--platform=manylinux_2_17_x86_64", "--python-version=39", "--implementation=cp", "--abi=cp39"], is_exposed = True, sdist = None, - srcs = struct( - marker = "", - requirement = "foo==0.0.1", - requirement_line = "foo==0.0.1 --hash=sha256:deadbeef", - shas = ["deadbeef"], - version = "0.0.1", - url = "", - ), + line = "foo==0.0.1 --hash=sha256:deadbeef", target_platforms = ["cp39_linux_x86_64"], whls = [], ), @@ -359,14 +284,7 @@ def _test_multi_os_legacy(env): extra_pip_args = ["--platform=macosx_10_9_arm64", "--python-version=39", "--implementation=cp", "--abi=cp39"], is_exposed = True, sdist = None, - srcs = struct( - marker = "", - requirement_line = "foo==0.0.3 --hash=sha256:deadbaaf", - requirement = "foo==0.0.3", - shas = ["deadbaaf"], - version = "0.0.3", - url = "", - ), + line = "foo==0.0.3 --hash=sha256:deadbaaf", target_platforms = ["cp39_osx_aarch64"], whls = [], ), @@ -413,14 +331,7 @@ def _test_env_marker_resolution(env): extra_pip_args = [], is_exposed = True, sdist = None, - srcs = struct( - marker = "", - requirement = "bar==0.0.1", - requirement_line = "bar==0.0.1 --hash=sha256:deadbeef", - shas = ["deadbeef"], - version = "0.0.1", - url = "", - ), + line = "bar==0.0.1 --hash=sha256:deadbeef", target_platforms = ["cp311_linux_super_exotic", "cp311_windows_x86_64"], whls = [], ), @@ -431,25 +342,12 @@ def _test_env_marker_resolution(env): extra_pip_args = [], is_exposed = False, sdist = None, - srcs = struct( - marker = "marker", - requirement = "foo[extra]==0.0.1", - requirement_line = "foo[extra]==0.0.1 --hash=sha256:deadbeef", - shas = ["deadbeef"], - version = "0.0.1", - url = "", - ), + line = "foo[extra]==0.0.1 --hash=sha256:deadbeef", target_platforms = ["cp311_windows_x86_64"], whls = [], ), ], }) - env.expect.that_str( - select_requirement( - got["foo"], - platform = "windows_x86_64", - ).srcs.version, - ).equals("0.0.1") _tests.append(_test_env_marker_resolution) @@ -467,14 +365,7 @@ def _test_different_package_version(env): extra_pip_args = [], is_exposed = True, sdist = None, - srcs = struct( - marker = "", - requirement = "foo==0.0.1", - requirement_line = "foo==0.0.1 --hash=sha256:deadb00f", - shas = ["deadb00f"], - version = "0.0.1", - url = "", - ), + line = "foo==0.0.1 --hash=sha256:deadb00f", target_platforms = ["linux_x86_64"], whls = [], ), @@ -483,14 +374,7 @@ def _test_different_package_version(env): extra_pip_args = [], is_exposed = True, sdist = None, - srcs = struct( - marker = "", - requirement = "foo==0.0.1+local", - requirement_line = "foo==0.0.1+local --hash=sha256:deadbeef", - shas = ["deadbeef"], - version = "0.0.1+local", - url = "", - ), + line = "foo==0.0.1+local --hash=sha256:deadbeef", target_platforms = ["linux_x86_64"], whls = [], ), @@ -513,14 +397,7 @@ def _test_optional_hash(env): extra_pip_args = [], sdist = None, is_exposed = True, - srcs = struct( - marker = "", - requirement = "foo==0.0.4", - requirement_line = "foo==0.0.4 @ https://example.org/foo-0.0.4.whl", - shas = [], - version = "0.0.4", - url = "https://example.org/foo-0.0.4.whl", - ), + line = "foo==0.0.4", target_platforms = ["linux_x86_64"], whls = [struct( url = "https://example.org/foo-0.0.4.whl", @@ -534,14 +411,7 @@ def _test_optional_hash(env): extra_pip_args = [], sdist = None, is_exposed = True, - srcs = struct( - marker = "", - requirement = "foo==0.0.5", - requirement_line = "foo==0.0.5 @ https://example.org/foo-0.0.5.whl --hash=sha256:deadbeef", - shas = ["deadbeef"], - version = "0.0.5", - url = "https://example.org/foo-0.0.5.whl", - ), + line = "foo==0.0.5", target_platforms = ["linux_x86_64"], whls = [struct( url = "https://example.org/foo-0.0.5.whl", @@ -569,14 +439,7 @@ def _test_git_sources(env): extra_pip_args = [], is_exposed = True, sdist = None, - srcs = struct( - marker = "", - requirement = "foo @ git+https://github.com/org/foo.git@deadbeef", - requirement_line = "foo @ git+https://github.com/org/foo.git@deadbeef", - shas = [], - url = "git+https://github.com/org/foo.git@deadbeef", - version = "", - ), + line = "foo @ git+https://github.com/org/foo.git@deadbeef", target_platforms = ["linux_x86_64"], whls = [], ), From fb88eb9b0a1c9e444b4d54dcdcb72e653acddb18 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Mon, 12 May 2025 20:14:56 +0900 Subject: [PATCH 3/3] keep compatiblity with WORKSPACE --- python/private/pypi/parse_requirements.bzl | 5 ++++- python/private/pypi/pip_repository.bzl | 9 +++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/python/private/pypi/parse_requirements.bzl b/python/private/pypi/parse_requirements.bzl index 30d3af4875..bdfac46ed6 100644 --- a/python/private/pypi/parse_requirements.bzl +++ b/python/private/pypi/parse_requirements.bzl @@ -40,6 +40,7 @@ def parse_requirements( extra_pip_args = [], get_index_urls = None, evaluate_markers = None, + extract_url_srcs = True, logger = None): """Get the requirements with platforms that the requirements apply to. @@ -58,6 +59,8 @@ def parse_requirements( the platforms stored as values in the input dict. Returns the same dict, but with values being platforms that are compatible with the requirements line. + extract_url_srcs: A boolean to enable extracting URLs from requirement + lines to enable using bazel downloader. logger: repo_utils.logger or None, a simple struct to log diagnostic messages. Returns: @@ -206,7 +209,7 @@ def parse_requirements( ret_requirements.append( struct( distribution = r.distribution, - line = r.srcs.requirement if whls or sdist else r.srcs.requirement_line, + line = r.srcs.requirement if extract_url_srcs and (whls or sdist) else r.srcs.requirement_line, target_platforms = sorted(target_platforms), extra_pip_args = r.extra_pip_args, whls = whls, diff --git a/python/private/pypi/pip_repository.bzl b/python/private/pypi/pip_repository.bzl index 8ca94f7f9b..c8d23f471f 100644 --- a/python/private/pypi/pip_repository.bzl +++ b/python/private/pypi/pip_repository.bzl @@ -89,19 +89,20 @@ def _pip_repository_impl(rctx): python_interpreter_target = rctx.attr.python_interpreter_target, srcs = rctx.attr._evaluate_markers_srcs, ), + extract_url_srcs = False, ) selected_requirements = {} options = None repository_platform = host_platform(rctx) for name, requirements in requirements_by_platform.items(): - r = select_requirement( + requirement = select_requirement( requirements, platform = None if rctx.attr.download_only else repository_platform, ) - if not r: + if not requirement: continue - options = options or r.extra_pip_args - selected_requirements[name] = r.srcs.requirement_line + options = options or requirement.extra_pip_args + selected_requirements[name] = requirement.line bzl_packages = sorted(selected_requirements.keys())