Skip to content

Commit 10dba69

Browse files
[stubsabot] Support diff analysis for GitLab hosted projects (#14542)
1 parent c3a880b commit 10dba69

File tree

1 file changed

+99
-33
lines changed

1 file changed

+99
-33
lines changed

scripts/stubsabot.py

Lines changed: 99 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from dataclasses import dataclass, field
2323
from http import HTTPStatus
2424
from pathlib import Path
25-
from typing import Annotated, Any, ClassVar, NamedTuple, TypeVar
25+
from typing import Annotated, Any, ClassVar, Literal, NamedTuple, TypedDict, TypeVar
2626
from typing_extensions import Self, TypeAlias
2727

2828
if sys.version_info >= (3, 11):
@@ -326,57 +326,80 @@ def get_github_api_headers() -> Mapping[str, str]:
326326
return headers
327327

328328

329+
GitHost: TypeAlias = Literal["github", "gitlab"]
330+
331+
329332
@dataclass
330-
class GitHubInfo:
333+
class GitHostInfo:
334+
host: GitHost
331335
repo_path: str
332-
tags: list[dict[str, Any]] = field(repr=False)
336+
tags: list[str] = field(repr=False)
333337

334338

335-
async def get_github_repo_info(session: aiohttp.ClientSession, stub_info: StubMetadata) -> GitHubInfo | None:
339+
async def get_host_repo_info(session: aiohttp.ClientSession, stub_info: StubMetadata) -> GitHostInfo | None:
336340
"""
337-
If the project represented by `stub_info` is hosted on GitHub,
338-
return information regarding the project as it exists on GitHub.
341+
If the project represented by `stub_info` is publicly hosted (e.g. on GitHub)
342+
return information regarding the project as it exists on the public host.
339343
340344
Else, return None.
341345
"""
342-
if stub_info.upstream_repository:
343-
# We have various sanity checks for the upstream_repository field in ts_utils.metadata,
344-
# so no need to repeat all of them here
345-
split_url = urllib.parse.urlsplit(stub_info.upstream_repository)
346-
if split_url.netloc == "github.com":
347-
url_path = split_url.path.strip("/")
348-
assert len(Path(url_path).parts) == 2
349-
github_tags_info_url = f"https://api.github.com/repos/{url_path}/tags"
350-
async with session.get(github_tags_info_url, headers=get_github_api_headers()) as response:
351-
if response.status == HTTPStatus.OK:
352-
tags: list[dict[str, Any]] = await response.json()
353-
assert isinstance(tags, list)
354-
return GitHubInfo(repo_path=url_path, tags=tags)
346+
if not stub_info.upstream_repository:
347+
return None
348+
# We have various sanity checks for the upstream_repository field in ts_utils.metadata,
349+
# so no need to repeat all of them here
350+
split_url = urllib.parse.urlsplit(stub_info.upstream_repository)
351+
host = split_url.netloc.removesuffix(".com")
352+
if host not in ("github", "gitlab"):
353+
return None
354+
url_path = split_url.path.strip("/")
355+
assert len(Path(url_path).parts) == 2
356+
if host == "github":
357+
# https://docs.github.com/en/rest/git/tags
358+
info_url = f"https://api.github.com/repos/{url_path}/tags"
359+
headers = get_github_api_headers()
360+
else:
361+
assert host == "gitlab"
362+
# https://docs.gitlab.com/api/tags/
363+
project_id = urllib.parse.quote(url_path, safe="")
364+
info_url = f"https://gitlab.com/api/v4/projects/{project_id}/repository/tags"
365+
headers = None
366+
async with session.get(info_url, headers=headers) as response:
367+
if response.status == HTTPStatus.OK:
368+
# Conveniently both GitHub and GitLab use the same key name.
369+
tags = [tag["name"] for tag in await response.json()]
370+
return GitHostInfo(host=host, repo_path=url_path, tags=tags) # type: ignore[arg-type]
355371
return None
356372

357373

358-
class GitHubDiffInfo(NamedTuple):
374+
class GitHostDiffInfo(NamedTuple):
375+
host: GitHost
359376
repo_path: str
360377
old_tag: str
361378
new_tag: str
362-
diff_url: str
379+
380+
@property
381+
def diff_url(self) -> str:
382+
if self.host == "github":
383+
return f"https://github.com/{self.repo_path}/compare/{self.old_tag}...{self.new_tag}"
384+
else:
385+
assert self.host == "gitlab"
386+
return f"https://gitlab.com/{self.repo_path}/-/compare/{self.old_tag}...{self.new_tag}"
363387

364388

365389
async def get_diff_info(
366390
session: aiohttp.ClientSession, stub_info: StubMetadata, pypi_version: packaging.version.Version
367-
) -> GitHubDiffInfo | None:
391+
) -> GitHostDiffInfo | None:
368392
"""Return a tuple giving info about the diff between two releases, if possible.
369393
370394
Return `None` if the project isn't hosted on GitHub,
371395
or if a link pointing to the diff couldn't be found for any other reason.
372396
"""
373-
github_info = await get_github_repo_info(session, stub_info)
374-
if github_info is None:
397+
host_info = await get_host_repo_info(session, stub_info)
398+
if host_info is None:
375399
return None
376400

377401
versions_to_tags: dict[packaging.version.Version, str] = {}
378-
for tag in github_info.tags:
379-
tag_name = tag["name"]
402+
for tag_name in host_info.tags:
380403
# Some packages in typeshed have tag names
381404
# that are invalid to be passed to the Version() constructor,
382405
# e.g. v.1.4.2
@@ -395,11 +418,17 @@ async def get_diff_info(
395418
else:
396419
old_tag = versions_to_tags[old_version]
397420

398-
diff_url = f"https://github.com/{github_info.repo_path}/compare/{old_tag}...{new_tag}"
399-
return GitHubDiffInfo(repo_path=github_info.repo_path, old_tag=old_tag, new_tag=new_tag, diff_url=diff_url)
421+
return GitHostDiffInfo(host=host_info.host, repo_path=host_info.repo_path, old_tag=old_tag, new_tag=new_tag)
422+
400423

424+
FileStatus: TypeAlias = Literal["added", "modified", "removed", "renamed"]
401425

402-
FileInfo: TypeAlias = dict[str, Any]
426+
427+
class FileInfo(TypedDict):
428+
filename: str
429+
status: FileStatus
430+
additions: int
431+
deletions: int
403432

404433

405434
def _plural_s(num: int, /) -> str:
@@ -494,10 +523,10 @@ def __str__(self) -> str:
494523
return "Stubsabot analysis of the diff between the two releases:\n - " + "\n - ".join(data_points)
495524

496525

497-
async def analyze_diff(
498-
github_repo_path: str, distribution: str, old_tag: str, new_tag: str, *, session: aiohttp.ClientSession
526+
async def analyze_github_diff(
527+
repo_path: str, distribution: str, old_tag: str, new_tag: str, *, session: aiohttp.ClientSession
499528
) -> DiffAnalysis | None:
500-
url = f"https://api.github.com/repos/{github_repo_path}/compare/{old_tag}...{new_tag}"
529+
url = f"https://api.github.com/repos/{repo_path}/compare/{old_tag}...{new_tag}"
501530
async with session.get(url, headers=get_github_api_headers()) as response:
502531
response.raise_for_status()
503532
json_resp: dict[str, list[FileInfo]] = await response.json()
@@ -510,6 +539,42 @@ async def analyze_diff(
510539
return DiffAnalysis(py_files=py_files, py_files_stubbed_in_typeshed=py_files_stubbed_in_typeshed)
511540

512541

542+
async def analyze_gitlab_diff(
543+
repo_path: str, distribution: str, old_tag: str, new_tag: str, *, session: aiohttp.ClientSession
544+
) -> DiffAnalysis | None:
545+
# https://docs.gitlab.com/api/repositories/#compare-branches-tags-or-commits
546+
project_id = urllib.parse.quote(repo_path, safe="")
547+
url = f"https://gitlab.com/api/v4/projects/{project_id}/repository/compare?from={old_tag}&to={new_tag}"
548+
async with session.get(url) as response:
549+
response.raise_for_status()
550+
json_resp: dict[str, Any] = await response.json()
551+
assert isinstance(json_resp, dict)
552+
553+
py_files: list[FileInfo] = []
554+
for file_diff in json_resp["diffs"]:
555+
filename = file_diff["new_path"]
556+
if Path(filename).suffix != ".py":
557+
continue
558+
status: FileStatus
559+
if file_diff["new_file"]:
560+
status = "added"
561+
elif file_diff["renamed_file"]:
562+
status = "renamed"
563+
elif file_diff["deleted_file"]:
564+
status = "removed"
565+
else:
566+
status = "modified"
567+
diff_lines = file_diff["diff"].splitlines()
568+
additions = sum(1 for ln in diff_lines if ln.startswith("+"))
569+
deletions = sum(1 for ln in diff_lines if ln.startswith("-"))
570+
py_files.append(FileInfo(filename=filename, status=status, additions=additions, deletions=deletions))
571+
572+
stub_path = distribution_path(distribution)
573+
files_in_typeshed = set(stub_path.rglob("*.pyi"))
574+
py_files_stubbed_in_typeshed = [file for file in py_files if (stub_path / f"{file['filename']}i") in files_in_typeshed]
575+
return DiffAnalysis(py_files=py_files, py_files_stubbed_in_typeshed=py_files_stubbed_in_typeshed)
576+
577+
513578
def _add_months(date: datetime.date, months: int) -> datetime.date:
514579
month = date.month - 1 + months
515580
year = date.year + month // 12
@@ -627,8 +692,9 @@ async def determine_action_no_error_handling(
627692
if diff_info is None:
628693
diff_analysis: DiffAnalysis | None = None
629694
else:
695+
analyze_diff = {"github": analyze_github_diff, "gitlab": analyze_gitlab_diff}[diff_info.host]
630696
diff_analysis = await analyze_diff(
631-
github_repo_path=diff_info.repo_path,
697+
repo_path=diff_info.repo_path,
632698
distribution=distribution,
633699
old_tag=diff_info.old_tag,
634700
new_tag=diff_info.new_tag,

0 commit comments

Comments
 (0)