Skip to content

Commit ddcc0a4

Browse files
authored
✨ Feature: support link header pagination (#198)
1 parent f2bd60d commit ddcc0a4

File tree

7 files changed

+336
-11
lines changed

7 files changed

+336
-11
lines changed

codegen/templates/versions/rest.py.jinja

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,29 @@
44

55
import importlib
66
from weakref import WeakKeyDictionary, ref
7-
from typing import TYPE_CHECKING, Any, Literal, overload
7+
from typing import TYPE_CHECKING, Any, Union, Literal, TypeVar, Callable, Optional, Awaitable, overload
8+
from typing_extensions import ParamSpec
9+
10+
from githubkit.rest.paginator import Paginator
811

912
from . import VERSIONS, LATEST_VERSION, VERSION_TYPE
1013

1114
if TYPE_CHECKING:
12-
from githubkit import GitHubCore
15+
from githubkit import GitHubCore, Response
1316
{% for version, module in versions.items() %}
1417
from .{{ module }}.rest import RestNamespace as {{ pascal_case(module) }}RestNamespace
1518
{% endfor %}
1619

20+
21+
CP = ParamSpec("CP")
22+
CT = TypeVar("CT")
23+
RT = TypeVar("RT")
24+
25+
R = Union[
26+
Callable[CP, "Response[RT]"],
27+
Callable[CP, Awaitable["Response[RT]"]],
28+
]
29+
1730
if TYPE_CHECKING:
1831

1932
class _VersionProxy({{ pascal_case(versions[latest_version]) }}RestNamespace):
@@ -46,6 +59,33 @@ class RestVersionSwitcher(_VersionProxy):
4659
"Do not use the namespace after the client has been collected."
4760
)
4861

62+
@overload
63+
def paginate(
64+
self,
65+
request: "R[CP, list[RT]]",
66+
map_func: None = None,
67+
*args: CP.args,
68+
**kwargs: CP.kwargs,
69+
) -> "Paginator[RT]": ...
70+
71+
@overload
72+
def paginate(
73+
self,
74+
request: "R[CP, CT]",
75+
map_func: Callable[["Response[CT]"], list[RT]],
76+
*args: CP.args,
77+
**kwargs: CP.kwargs,
78+
) -> "Paginator[RT]": ...
79+
80+
def paginate(
81+
self,
82+
request: "R[CP, CT]",
83+
map_func: Optional[Callable[["Response[CT]"], list[RT]]] = None,
84+
*args: CP.args,
85+
**kwargs: CP.kwargs,
86+
) -> "Paginator[RT]":
87+
return Paginator(self, request, map_func, *args, **kwargs) # type: ignore
88+
4989
{% for version, module in versions.items() %}
5090
@overload
5191
def __call__(self, version: Literal["{{ version }}"]) -> "{{ pascal_case(module) }}RestNamespace":

docs/usage/graphql.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ for result in github.graphql.paginate(
120120
Note that the `result` is a dict containing the list of nodes/edges for each page and the `pageInfo` object. You should iterate over the `nodes` or `edges` list to get the actual data. For example:
121121

122122
```python
123-
for result in g.graphql.paginate(query, {"owner": "owner", "repo": "repo"}):
123+
for result in github.graphql.paginate(query, {"owner": "owner", "repo": "repo"}):
124124
for issue in result["repository"]["issues"]["nodes"]:
125125
print(issue)
126126
```

docs/usage/rest-api.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ Current supported versions are: (you can find it in the section `[[tool.codegen.
298298

299299
When a response from the REST API would include many results, GitHub will paginate the results and return a subset of the results. In this case, some APIs provide `page` and `per_page` parameters to control the pagination. See [GitHub Docs - Using pagination in the REST API](https://docs.github.com/en/rest/using-the-rest-api/using-pagination-in-the-rest-api) for more information.
300300

301-
githubkit provides a built-in pagination feature to handle this. You can use the `github.paginate` method to iterate over all the results:
301+
githubkit provides a built-in pagination feature to handle this. You can use the `github.rest.paginate` method to iterate over all the results:
302302

303303
> Pagination typing is checked with Pylance ([Pyright](https://github.com/microsoft/pyright)).
304304
@@ -307,7 +307,7 @@ githubkit provides a built-in pagination feature to handle this. You can use the
307307
```python hl_lines="3-5"
308308
from githubkit.versions.latest.models import Issue
309309

310-
for issue in github.paginate(
310+
for issue in github.rest.paginate(
311311
github.rest.issues.list_for_repo, owner="owner", repo="repo", state="open"
312312
):
313313
issue: Issue
@@ -319,7 +319,7 @@ githubkit provides a built-in pagination feature to handle this. You can use the
319319
```python hl_lines="3-5"
320320
from githubkit.versions.latest.models import Issue
321321

322-
async for issue in github.paginate(
322+
async for issue in github.rest.paginate(
323323
github.rest.issues.async_list_for_repo, owner="owner", repo="repo", state="open"
324324
):
325325
issue: Issue
@@ -333,7 +333,7 @@ You can also provide a custom map function to handle complex pagination (such as
333333
```python hl_lines="5"
334334
from githubkit.versions.latest.models import Repository
335335

336-
for accessible_repo in github.paginate(
336+
for accessible_repo in github.rest.paginate(
337337
github.rest.apps.list_installation_repos_for_authenticated_user,
338338
map_func=lambda r: r.parsed_data.repositories,
339339
installation_id=1,
@@ -347,7 +347,7 @@ You can also provide a custom map function to handle complex pagination (such as
347347
```python hl_lines="5"
348348
from githubkit.versions.latest.models import Repository
349349

350-
async for accessible_repo in github.paginate(
350+
async for accessible_repo in github.rest.paginate(
351351
github.rest.apps.async_list_installation_repos_for_authenticated_user,
352352
map_func=lambda r: r.parsed_data.repositories,
353353
installation_id=1,

githubkit/paginator.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from collections.abc import Awaitable
22
from typing import Any, Callable, Generic, Optional, TypeVar, Union, cast, overload
3-
from typing_extensions import ParamSpec, Self
3+
from typing_extensions import ParamSpec, Self, deprecated
44

55
from .response import Response
66
from .utils import is_async
@@ -16,6 +16,10 @@
1616
]
1717

1818

19+
@deprecated(
20+
"Legacy pagination based on page and per_page is deprecated. "
21+
"Use github.rest.paginate instead."
22+
)
1923
class Paginator(Generic[RT]):
2024
"""Paginate through the responses of the rest api request."""
2125

githubkit/rest/paginator.py

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
from collections.abc import Awaitable
2+
import re
3+
from typing import (
4+
TYPE_CHECKING,
5+
Any,
6+
Callable,
7+
Generic,
8+
Optional,
9+
TypeVar,
10+
Union,
11+
cast,
12+
overload,
13+
)
14+
from typing_extensions import ParamSpec, Self
15+
16+
import httpx
17+
18+
from githubkit.response import Response
19+
from githubkit.utils import is_async
20+
21+
if TYPE_CHECKING:
22+
from githubkit.versions import RestVersionSwitcher
23+
24+
CP = ParamSpec("CP")
25+
CT = TypeVar("CT")
26+
RT = TypeVar("RT")
27+
RTS = TypeVar("RTS")
28+
29+
R = Union[
30+
Callable[CP, Response[RT]],
31+
Callable[CP, Awaitable[Response[RT]]],
32+
]
33+
34+
# https://github.com/octokit/plugin-paginate-rest.js/blob/1f44b5469b31ddec9621000e6e1aee63c71ea8bf/src/iterator.ts#L40
35+
NEXT_LINK_PATTERN = r'<([^<>]+)>;\s*rel="next"'
36+
37+
38+
# https://docs.github.com/en/rest/using-the-rest-api/using-pagination-in-the-rest-api
39+
# https://github.com/octokit/plugin-paginate-rest.js/blob/1f44b5469b31ddec9621000e6e1aee63c71ea8bf/src/iterator.ts
40+
class Paginator(Generic[RT]):
41+
"""Paginate through the responses of the rest api request."""
42+
43+
@overload
44+
def __init__(
45+
self: "Paginator[RTS]",
46+
rest: "RestVersionSwitcher",
47+
request: R[CP, list[RTS]],
48+
map_func: None = None,
49+
*args: CP.args,
50+
**kwargs: CP.kwargs,
51+
): ...
52+
53+
@overload
54+
def __init__(
55+
self: "Paginator[RTS]",
56+
rest: "RestVersionSwitcher",
57+
request: R[CP, CT],
58+
map_func: Callable[[Response[CT]], list[RTS]],
59+
*args: CP.args,
60+
**kwargs: CP.kwargs,
61+
): ...
62+
63+
def __init__(
64+
self,
65+
rest: "RestVersionSwitcher",
66+
request: R[CP, CT],
67+
map_func: Optional[Callable[[Response[CT]], list[RT]]] = None,
68+
*args: CP.args,
69+
**kwargs: CP.kwargs,
70+
):
71+
self.rest = rest
72+
73+
self.request = request
74+
self.args = args
75+
self.kwargs = kwargs
76+
77+
self.map_func = map_func
78+
79+
self._initialized: bool = False
80+
self._request_method: Optional[str] = None
81+
self._response_model: Optional[Any] = None
82+
self._next_link: Optional[httpx.URL] = None
83+
84+
self._index: int = 0
85+
self._cached_data: list[RT] = []
86+
87+
@property
88+
def finalized(self) -> bool:
89+
"""Whether the paginator is finalized or not."""
90+
return self._initialized and self._next_link is None
91+
92+
def reset(self) -> None:
93+
"""Reset the paginator to the initial state."""
94+
95+
self._initialized = False
96+
self._next_link = None
97+
self._index = 0
98+
self._cached_data = []
99+
100+
def __next__(self) -> RT:
101+
while self._index >= len(self._cached_data):
102+
self._get_next_page()
103+
if self.finalized:
104+
raise StopIteration
105+
106+
current = self._cached_data[self._index]
107+
self._index += 1
108+
return current
109+
110+
def __iter__(self: Self) -> Self:
111+
if is_async(self.request):
112+
raise TypeError(f"Request method {self.request} is not an sync function")
113+
return self
114+
115+
async def __anext__(self) -> RT:
116+
while self._index >= len(self._cached_data):
117+
await self._aget_next_page()
118+
if self.finalized:
119+
raise StopAsyncIteration
120+
121+
current = self._cached_data[self._index]
122+
self._index += 1
123+
return current
124+
125+
def __aiter__(self: Self) -> Self:
126+
if not is_async(self.request):
127+
raise TypeError(f"Request method {self.request} is not an async function")
128+
return self
129+
130+
def _find_next_link(self, response: Response[Any]) -> Optional[httpx.URL]:
131+
"""Find the next link in the response headers."""
132+
if links := response.headers.get("link"):
133+
if match := re.search(NEXT_LINK_PATTERN, links):
134+
return httpx.URL(match.group(1))
135+
return None
136+
137+
def _apply_map_func(self, response: Response[Any]) -> list[RT]:
138+
if self.map_func is not None:
139+
result = self.map_func(response)
140+
if not isinstance(result, list):
141+
raise TypeError(f"Map function must return a list, got {type(result)}")
142+
else:
143+
result = cast(Response[list[RT]], response).parsed_data
144+
if not isinstance(result, list):
145+
raise TypeError(f"Response is not a list, got {type(result)}")
146+
return result
147+
148+
def _fill_cache_data(self, data: list[RT]) -> None:
149+
"""Fill the cache with the data."""
150+
self._cached_data = data
151+
self._index = 0
152+
153+
def _get_next_page(self) -> None:
154+
if not self._initialized:
155+
# First request
156+
response = cast(
157+
Response[Any],
158+
self.request(*self.args, **self.kwargs),
159+
)
160+
self._initialized = True
161+
self._request_method = response.raw_request.method
162+
else:
163+
# Next request
164+
if self._next_link is None:
165+
raise RuntimeError("Paginator is finalized, no more pages to fetch.")
166+
if self._request_method is None:
167+
raise RuntimeError("Request method is not set, this should not happen.")
168+
if self._response_model is None:
169+
raise RuntimeError("Response model is not set, this should not happen.")
170+
171+
# we request the next page with the same method and response model
172+
response = cast(
173+
Response[Any],
174+
self.rest._github.request(
175+
self._request_method,
176+
self._next_link,
177+
headers=self.kwargs.get("headers"), # type: ignore
178+
response_model=self._response_model, # type: ignore
179+
),
180+
)
181+
182+
self._next_link = self._find_next_link(response)
183+
self._fill_cache_data(self._apply_map_func(response))
184+
185+
async def _aget_next_page(self) -> None:
186+
if not self._initialized:
187+
# First request
188+
response = cast(
189+
Response[Any],
190+
await self.request(*self.args, **self.kwargs), # type: ignore
191+
)
192+
self._initialized = True
193+
self._request_method = response.raw_request.method
194+
else:
195+
# Next request
196+
if self._next_link is None:
197+
raise RuntimeError("Paginator is finalized, no more pages to fetch.")
198+
if self._request_method is None:
199+
raise RuntimeError("Request method is not set, this should not happen.")
200+
if self._response_model is None:
201+
raise RuntimeError("Response model is not set, this should not happen.")
202+
203+
response = cast(
204+
Response[Any],
205+
await self.rest._github.request(
206+
self._request_method,
207+
self._next_link,
208+
headers=self.kwargs.get("headers"), # type: ignore
209+
response_model=self._response_model, # type: ignore
210+
),
211+
)
212+
213+
self._next_link = self._find_next_link(response)
214+
self._fill_cache_data(self._apply_map_func(response))

0 commit comments

Comments
 (0)