From 12731143c8bba510bebe96a48c126902ca8b5449 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 19 May 2021 09:05:52 -0700 Subject: [PATCH 01/23] Renamed secret --- .github/workflows/deploy-demo.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-demo.yml b/.github/workflows/deploy-demo.yml index d38d8a1..0f27c9b 100644 --- a/.github/workflows/deploy-demo.yml +++ b/.github/workflows/deploy-demo.yml @@ -35,7 +35,7 @@ jobs: pip install bs4 - name: Create auth.json env: - GITHUB_ACCESS_TOKEN: ${{ secrets.GITHUB_ACCESS_TOKEN }} + GITHUB_ACCESS_TOKEN: ${{ secrets.GH_TOKEN }} run: | echo "{\"github_personal_token\": \"$GITHUB_ACCESS_TOKEN\"}" > auth.json - name: Fetch previous copy of database From ba8cf3e9bb5f4f8740bd4b9eed28f1464d7f6b9a Mon Sep 17 00:00:00 2001 From: Daniel Butler Date: Wed, 19 May 2021 12:07:09 -0400 Subject: [PATCH 02/23] fixing typo (#61) --- github_to_sqlite/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_to_sqlite/cli.py b/github_to_sqlite/cli.py index 8609db3..ae2cac4 100644 --- a/github_to_sqlite/cli.py +++ b/github_to_sqlite/cli.py @@ -244,7 +244,7 @@ def stargazers(db_path, repos, auth): help="Fetch HTML rendered README into 'readme_html' column", ) def repos(db_path, usernames, auth, repo, load, readme, readme_html): - "Save repos owened by the specified (or authenticated) username or organization" + "Save repos owned by the specified (or authenticated) username or organization" db = sqlite_utils.Database(db_path) token = load_token(auth) if load: From 70dffca351375e6f542969c72ebc43c6d393d99c Mon Sep 17 00:00:00 2001 From: Felix Rosencrantz Date: Wed, 19 May 2021 09:08:12 -0700 Subject: [PATCH 03/23] Remove unneeded exists=True for -a/--auth flag. (#59) The file does not need to exist when using an environment variable. --- github_to_sqlite/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/github_to_sqlite/cli.py b/github_to_sqlite/cli.py index ae2cac4..fbd3321 100644 --- a/github_to_sqlite/cli.py +++ b/github_to_sqlite/cli.py @@ -192,7 +192,7 @@ def starred(db_path, username, auth, load): @click.option( "-a", "--auth", - type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), + type=click.Path(file_okay=True, dir_okay=False, allow_dash=True), default="auth.json", help="Path to auth.json token file", ) @@ -585,7 +585,7 @@ def get(url, auth, paginate, nl, accept): @click.option( "-a", "--auth", - type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), + type=click.Path(file_okay=True, dir_okay=False, allow_dash=True), default="auth.json", help="Path to auth.json token file", ) From ed3752022e45b890af63996efec804725e95d0d4 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Tue, 17 Aug 2021 16:55:01 -0700 Subject: [PATCH 04/23] Switch to google-github-actions/setup-gcloud@master --- .github/workflows/deploy-demo.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-demo.yml b/.github/workflows/deploy-demo.yml index 0f27c9b..9e3169f 100644 --- a/.github/workflows/deploy-demo.yml +++ b/.github/workflows/deploy-demo.yml @@ -95,7 +95,7 @@ jobs: with: path: github.db - name: Set up Cloud Run - uses: GoogleCloudPlatform/github-actions/setup-gcloud@master + uses: google-github-actions/setup-gcloud@master with: version: '275.0.0' service_account_email: ${{ secrets.GCP_SA_EMAIL }} From 8b418b07730f3ad9aacc8b36ff92df888f2cc26c Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 17 Nov 2021 23:36:40 -0800 Subject: [PATCH 05/23] Add sort to dependents example link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d4c1f5b..0801ebb 100644 --- a/README.md +++ b/README.md @@ -208,7 +208,7 @@ The command accepts one or more repositories. Add `-v` for verbose output. -Example: [dependents table](https://github-to-sqlite.dogsheep.net/github/dependents) +Example: [dependents table](https://github-to-sqlite.dogsheep.net/github/dependents?_sort_desc=first_seen_utc) ## Fetching emojis From 7750ce88755f44aa10301889642205311a8c6c4d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 1 Dec 2021 11:34:22 -0800 Subject: [PATCH 06/23] Fixed incorrect help on get command --- github_to_sqlite/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_to_sqlite/cli.py b/github_to_sqlite/cli.py index fbd3321..70aa3b5 100644 --- a/github_to_sqlite/cli.py +++ b/github_to_sqlite/cli.py @@ -539,7 +539,7 @@ def emojis(db_path, auth, fetch): help="Accept header to send, e.g. application/vnd.github.VERSION.html", ) def get(url, auth, paginate, nl, accept): - "Save repos owened by the specified (or authenticated) username or organization" + "Make an authenticated HTTP GET against the specified URL" token = load_token(auth) first = True should_output_closing_brace = not nl From bc9763d1206df2cfb2dab4d6e0fb0c7f02408ba2 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 1 Dec 2021 11:36:52 -0800 Subject: [PATCH 07/23] Release 2.8.3 Refs #59, #61 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d33ead4..1a39f65 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup import os -VERSION = "2.8.2" +VERSION = "2.8.3" def get_long_description(): From 751bc900366ca52e662ea383b858cbf4365093d9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 6 Jan 2022 09:19:02 -0800 Subject: [PATCH 08/23] datasette-atom plugin I'm going to see if I can subscribe to new issues and issue comments across all of these repos. --- .github/workflows/deploy-demo.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy-demo.yml b/.github/workflows/deploy-demo.yml index 9e3169f..a4ec773 100644 --- a/.github/workflows/deploy-demo.yml +++ b/.github/workflows/deploy-demo.yml @@ -115,4 +115,5 @@ jobs: --install=datasette-json-html \ --install=datasette-vega \ --install=datasette-render-images \ - --install=datasette-graphql + --install=datasette-graphql \ + --install=datasette-atom From a6e237f75a4b86963d91dcb5c9582e3a1b3349d6 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 21 Mar 2022 18:59:45 -0700 Subject: [PATCH 09/23] google-github-actions/setup-gcloud@v0 --- .github/workflows/deploy-demo.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy-demo.yml b/.github/workflows/deploy-demo.yml index a4ec773..a99880f 100644 --- a/.github/workflows/deploy-demo.yml +++ b/.github/workflows/deploy-demo.yml @@ -95,7 +95,7 @@ jobs: with: path: github.db - name: Set up Cloud Run - uses: google-github-actions/setup-gcloud@master + uses: google-github-actions/setup-gcloud@v0 with: version: '275.0.0' service_account_email: ${{ secrets.GCP_SA_EMAIL }} From dbac2e5dd8a562b45d8255a265859cf8020ca22a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=82=AB=E3=82=B7=E3=82=AA=E3=80=80=E9=87=91=E5=9F=8E?= =?UTF-8?q?=E3=80=80=E5=A4=A7=E9=96=A2?= <1224205+empjustine@users.noreply.github.com> Date: Mon, 18 Jul 2022 16:40:11 -0300 Subject: [PATCH 10/23] Fixing 'NoneType' object has no attribute 'items' (#73) Traceback (most recent call last): File "/home/dogsheep/dogsheep/github-to-sqlite/bin/github-to-sqlite", line 8, in sys.exit(cli()) File "/home/dogsheep/dogsheep/github-to-sqlite/lib64/python3.10/site-packages/click/core.py", line 1130, in __call__ return self.main(*args, **kwargs) File "/home/dogsheep/dogsheep/github-to-sqlite/lib64/python3.10/site-packages/click/core.py", line 1055, in main rv = self.invoke(ctx) File "/home/dogsheep/dogsheep/github-to-sqlite/lib64/python3.10/site-packages/click/core.py", line 1657, in invoke return _process_result(sub_ctx.command.invoke(sub_ctx)) File "/home/dogsheep/dogsheep/github-to-sqlite/lib64/python3.10/site-packages/click/core.py", line 1404, in invoke return ctx.invoke(self.callback, **ctx.params) File "/home/dogsheep/dogsheep/github-to-sqlite/lib64/python3.10/site-packages/click/core.py", line 760, in invoke return __callback(*args, **kwargs) File "/home/dogsheep/dogsheep/github-to-sqlite/lib64/python3.10/site-packages/github_to_sqlite/cli.py", line 181, in starred utils.save_stars(db, user, stars) File "/home/dogsheep/dogsheep/github-to-sqlite/lib64/python3.10/site-packages/github_to_sqlite/utils.py", line 494, in save_stars repo_id = save_repo(db, repo) File "/home/dogsheep/dogsheep/github-to-sqlite/lib64/python3.10/site-packages/github_to_sqlite/utils.py", line 308, in save_repo to_save["owner"] = save_user(db, to_save["owner"]) File "/home/dogsheep/dogsheep/github-to-sqlite/lib64/python3.10/site-packages/github_to_sqlite/utils.py", line 229, in save_user for key, value in user.items() AttributeError: 'NoneType' object has no attribute 'items' --- github_to_sqlite/utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/github_to_sqlite/utils.py b/github_to_sqlite/utils.py index bae4ac6..c7d78d7 100644 --- a/github_to_sqlite/utils.py +++ b/github_to_sqlite/utils.py @@ -223,6 +223,11 @@ def save_pull_requests(db, pull_requests, repo): def save_user(db, user): + # Under some conditions, GitHub caches removed repositories with + # stars and ends up leaving dangling `None` user references. + if user is None: + return None + # Remove all url fields except avatar_url and html_url to_save = { key: value From ace13ec3d98090d99bd71871c286a4a612c96a50 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Mon, 18 Jul 2022 12:47:17 -0700 Subject: [PATCH 11/23] Drop py-gfm from demo, refs #74 --- .github/workflows/deploy-demo.yml | 1 - demo-metadata.json | 20 ++++---------------- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/.github/workflows/deploy-demo.yml b/.github/workflows/deploy-demo.yml index a99880f..5b40f13 100644 --- a/.github/workflows/deploy-demo.yml +++ b/.github/workflows/deploy-demo.yml @@ -108,7 +108,6 @@ jobs: -m demo-metadata.json \ --service github-to-sqlite \ --branch=main \ - --install=py-gfm \ --install=datasette-search-all>=0.3 \ --install=datasette-render-markdown>=1.1.2 \ --install=datasette-pretty-json \ diff --git a/demo-metadata.json b/demo-metadata.json index 293c947..c04aa5a 100644 --- a/demo-metadata.json +++ b/demo-metadata.json @@ -45,10 +45,7 @@ "span": [ "class" ] - }, - "extensions": [ - "mdx_gfm:GithubFlavoredMarkdownExtension" - ] + } } } }, @@ -89,10 +86,7 @@ "span": [ "class" ] - }, - "extensions": [ - "mdx_gfm:GithubFlavoredMarkdownExtension" - ] + } } } }, @@ -138,10 +132,7 @@ "span": [ "class" ] - }, - "extensions": [ - "mdx_gfm:GithubFlavoredMarkdownExtension" - ] + } } } }, @@ -180,10 +171,7 @@ "span": [ "class" ] - }, - "extensions": [ - "mdx_gfm:GithubFlavoredMarkdownExtension" - ] + } } } } From 0e45b72312a0756e5a562effbba08cb8de1e480b Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 28 Sep 2022 14:07:54 -0700 Subject: [PATCH 12/23] datasette-pretty-json>=0.2.2 Refs https://github.com/simonw/datasette-pretty-json/issues/2 --- .github/workflows/deploy-demo.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy-demo.yml b/.github/workflows/deploy-demo.yml index 5b40f13..1e77d0a 100644 --- a/.github/workflows/deploy-demo.yml +++ b/.github/workflows/deploy-demo.yml @@ -17,7 +17,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v1 with: - python-version: 3.8 + python-version: "3.10" - uses: actions/cache@v1 name: Configure pip caching with: @@ -110,7 +110,7 @@ jobs: --branch=main \ --install=datasette-search-all>=0.3 \ --install=datasette-render-markdown>=1.1.2 \ - --install=datasette-pretty-json \ + --install=datasette-pretty-json>=0.2.2 \ --install=datasette-json-html \ --install=datasette-vega \ --install=datasette-render-images \ From 626dd61f6070f95f7652104cb9f4100e9b88862f Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 14 Dec 2022 22:57:31 -0800 Subject: [PATCH 13/23] setup-gcloud fix --- .github/workflows/deploy-demo.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy-demo.yml b/.github/workflows/deploy-demo.yml index 1e77d0a..72a553d 100644 --- a/.github/workflows/deploy-demo.yml +++ b/.github/workflows/deploy-demo.yml @@ -17,7 +17,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v1 with: - python-version: "3.10" + python-version: "3.9" - uses: actions/cache@v1 name: Configure pip caching with: @@ -97,7 +97,7 @@ jobs: - name: Set up Cloud Run uses: google-github-actions/setup-gcloud@v0 with: - version: '275.0.0' + version: '318.0.0' service_account_email: ${{ secrets.GCP_SA_EMAIL }} service_account_key: ${{ secrets.GCP_SA_KEY }} - name: Deploy to Cloud Run From 073ae9b4c7cbd80355777b50eacee03828e0655e Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Thu, 15 Dec 2022 09:41:50 -0800 Subject: [PATCH 14/23] Drop Python 3.6, add 3.10 and 3.100 --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a177421..c42f0d0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8, 3.9] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} From ebd053ea3b9cb324759dc5d3ee74ee97aab9d89a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Apr 2023 13:39:53 -0700 Subject: [PATCH 15/23] Fix a deprecation warning --- github_to_sqlite/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/github_to_sqlite/utils.py b/github_to_sqlite/utils.py index c7d78d7..9e728fc 100644 --- a/github_to_sqlite/utils.py +++ b/github_to_sqlite/utils.py @@ -731,7 +731,7 @@ def scrape_dependents(repo, verbose=False): yield from repos # next page? try: - next_link = soup.select(".paginate-container")[0].find("a", text="Next") + next_link = soup.select(".paginate-container")[0].find("a", string="Next") except IndexError: break if next_link is not None: From 2e84e26c34bd14f6a786692764c0afafe94e80e5 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Apr 2023 13:40:48 -0700 Subject: [PATCH 16/23] Test on 3.7 to 3.11 --- .github/workflows/publish.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 3755c3a..1f31920 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.6, 3.7, 3.8, 3.9] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} @@ -37,7 +37,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v2 with: - python-version: '3.9' + python-version: "3.11" - uses: actions/cache@v2 name: Configure pip caching with: From 6eb97a2da73e1d71a53d3039474de34b0408f478 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 5 Apr 2023 14:15:16 -0700 Subject: [PATCH 17/23] Include headers in GitHubError, refs #79 --- github_to_sqlite/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/github_to_sqlite/utils.py b/github_to_sqlite/utils.py index 9e728fc..f934d76 100644 --- a/github_to_sqlite/utils.py +++ b/github_to_sqlite/utils.py @@ -74,16 +74,17 @@ class GitHubError(Exception): - def __init__(self, message, status_code): + def __init__(self, message, status_code, headers=None): self.message = message self.status_code = status_code + self.headers = headers @classmethod def from_response(cls, response): message = response.json()["message"] if "git repository is empty" in message.lower(): cls = GitHubRepositoryEmpty - return cls(message, response.status_code) + return cls(message, response.status_code, response.headers) class GitHubRepositoryEmpty(GitHubError): From 56a918f6a6285855fb1bd086b60b44144279a95a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Wed, 26 Apr 2023 10:36:17 -0700 Subject: [PATCH 18/23] Don't use branch main --- .github/workflows/deploy-demo.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/deploy-demo.yml b/.github/workflows/deploy-demo.yml index 72a553d..a960ed6 100644 --- a/.github/workflows/deploy-demo.yml +++ b/.github/workflows/deploy-demo.yml @@ -107,7 +107,6 @@ jobs: datasette publish cloudrun github.db \ -m demo-metadata.json \ --service github-to-sqlite \ - --branch=main \ --install=datasette-search-all>=0.3 \ --install=datasette-render-markdown>=1.1.2 \ --install=datasette-pretty-json>=0.2.2 \ From 0ab34dbefe05797495bdc0aa81964c7c7b81538a Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 10 Dec 2023 13:14:42 -0800 Subject: [PATCH 19/23] Upgrade GitHub Actions - drop 3.7, add 3.12 --- .github/workflows/deploy-demo.yml | 17 ++++++----------- .github/workflows/publish.yml | 30 ++++++++++-------------------- .github/workflows/readme-toc.yaml | 2 +- .github/workflows/test.yml | 15 +++++---------- 4 files changed, 22 insertions(+), 42 deletions(-) diff --git a/.github/workflows/deploy-demo.yml b/.github/workflows/deploy-demo.yml index a960ed6..1f1fb1a 100644 --- a/.github/workflows/deploy-demo.yml +++ b/.github/workflows/deploy-demo.yml @@ -12,19 +12,14 @@ jobs: scheduled: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 name: Check out repo - name: Set up Python - uses: actions/setup-python@v1 + uses: actions/setup-python@v4 with: - python-version: "3.9" - - uses: actions/cache@v1 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- + python-version: "3.12" + cache: pip + cache-dependency-path: setup.py - name: Install Python dependencies run: | python -m pip install --upgrade pip @@ -91,7 +86,7 @@ jobs: sqlite-utils rebuild-fts github.db # Populate _analyze_tables_ table sqlite-utils analyze-tables github.db --save - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: path: github.db - name: Set up Cloud Run diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 1f31920..c28f0fe 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -9,20 +9,15 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - uses: actions/cache@v2 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- + cache: pip + cache-dependency-path: setup.py - name: Install dependencies run: | pip install -e '.[test]' @@ -33,18 +28,13 @@ jobs: runs-on: ubuntu-latest needs: [test] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: "3.11" - - uses: actions/cache@v2 - name: Configure pip caching + uses: actions/setup-python@v4 with: - path: ~/.cache/pip - key: ${{ runner.os }}-publish-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-publish-pip- + python-version: "3.12" + cache: pip + cache-dependency-path: setup.py - name: Install dependencies run: | pip install setuptools wheel twine diff --git a/.github/workflows/readme-toc.yaml b/.github/workflows/readme-toc.yaml index 39c9028..3e81dd8 100644 --- a/.github/workflows/readme-toc.yaml +++ b/.github/workflows/readme-toc.yaml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out repo - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Update TOC run: npx markdown-toc README.md -i - name: Commit and push if README changed diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c42f0d0..c49fa4c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,20 +7,15 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - uses: actions/cache@v2 - name: Configure pip caching - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- + cache: pip + cache-dependency-path: setup.py - name: Install dependencies run: | pip install -e '.[test]' From 56f2aee4d267472f59f0d7f92c12e41e2b2f13c6 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 10 Dec 2023 13:21:09 -0800 Subject: [PATCH 20/23] Don't deploy demo on every push to main --- .github/workflows/deploy-demo.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/deploy-demo.yml b/.github/workflows/deploy-demo.yml index 1f1fb1a..4a7c49c 100644 --- a/.github/workflows/deploy-demo.yml +++ b/.github/workflows/deploy-demo.yml @@ -2,9 +2,6 @@ name: Build and deploy demo on: workflow_dispatch: - push: - branches: - - main schedule: - cron: '0 0 * * *' From a0a711b05c6f4667779528101ef621cd1a0bf97b Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Sun, 10 Dec 2023 16:22:03 -0500 Subject: [PATCH 21/23] More options for `pull-requests`: --state, --org, and --search (#80) * always ask for 100 items when paginating (helps #79) * fix typos in README.md * ignore test and build artifacts * --org and --state options for pull-requests * --search for pull-requests, but it can only get 1000 --- .gitignore | 3 ++- README.md | 22 ++++++++++++++---- github_to_sqlite/cli.py | 49 +++++++++++++++++++++++++++++++++------ github_to_sqlite/utils.py | 40 +++++++++++++++++++++++--------- 4 files changed, 90 insertions(+), 24 deletions(-) diff --git a/.gitignore b/.gitignore index 27b93de..d9e1f4d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,5 @@ venv .eggs .pytest_cache *.egg-info - +.coverage +build/ diff --git a/README.md b/README.md index 0801ebb..a45bfc0 100644 --- a/README.md +++ b/README.md @@ -82,13 +82,25 @@ You can use the `--pull-request` option one or more times to load specific pull Note that the `merged_by` column on the `pull_requests` table will only be populated for pull requests that are loaded using the `--pull-request` option - the GitHub API does not return this field for pull requests that are loaded in bulk. +You can load only pull requests in a certain state with the `--state` option: + + $ github-to-sqlite pull-requests --state=open github.db simonw/datasette + +Pull requests across an entire organization (or more than one) can be loaded with `--org`: + + $ github-to-sqlite pull-requests --state=open --org=psf --org=python github.db + +You can use a search query to find pull requests. Note that no more than 1000 will be loaded (this is a GitHub API limitation), and some data will be missing (base and head SHAs). When using searches, other filters are ignored; put all criteria into the search itself: + + $ github-to-sqlite pull-requests --search='org:python defaultdict state:closed created:<2023-09-01' github.db + Example: [pull_requests table](https://github-to-sqlite.dogsheep.net/github/pull_requests) ## Fetching issue comments for a repository The `issue-comments` command retrieves all of the comments on all of the issues in a repository. -It is recommended you run `issues` first, so that each imported comment can have a foreign key poining to its issue. +It is recommended you run `issues` first, so that each imported comment can have a foreign key pointing to its issue. $ github-to-sqlite issues github.db simonw/datasette $ github-to-sqlite issue-comments github.db simonw/datasette @@ -101,7 +113,7 @@ Example: [issue_comments table](https://github-to-sqlite.dogsheep.net/github/iss ## Fetching commits for a repository -The `commits` command retrieves details of all of the commits for one or more repositories. It currently fetches the sha, commit message and author and committer details - it does no retrieve the full commit body. +The `commits` command retrieves details of all of the commits for one or more repositories. It currently fetches the SHA, commit message and author and committer details; it does not retrieve the full commit body. $ github-to-sqlite commits github.db simonw/datasette simonw/sqlite-utils @@ -156,7 +168,7 @@ You can pass more than one username to fetch for multiple users or organizations $ github-to-sqlite repos github.db simonw dogsheep -Add the `--readme` option to save the README for the repo in a column called `readme`. Add `--readme-html` to save the HTML rendered version of the README into a collumn called `readme_html`. +Add the `--readme` option to save the README for the repo in a column called `readme`. Add `--readme-html` to save the HTML rendered version of the README into a column called `readme_html`. Example: [repos table](https://github-to-sqlite.dogsheep.net/github/repos) @@ -216,7 +228,7 @@ You can fetch a list of every emoji supported by GitHub using the `emojis` comma $ github-to-sqlite emojis github.db -This will create a table callad `emojis` with a primary key `name` and a `url` column. +This will create a table called `emojis` with a primary key `name` and a `url` column. If you add the `--fetch` option the command will also fetch the binary content of the images and place them in an `image` column: @@ -235,7 +247,7 @@ The `github-to-sqlite get` command provides a convenient shortcut for making aut This will make an authenticated call to the URL you provide and pretty-print the resulting JSON to the console. -You can ommit the `https://api.github.com/` prefix, for example: +You can omit the `https://api.github.com/` prefix, for example: $ github-to-sqlite get /gists diff --git a/github_to_sqlite/cli.py b/github_to_sqlite/cli.py index 70aa3b5..e6a2d88 100644 --- a/github_to_sqlite/cli.py +++ b/github_to_sqlite/cli.py @@ -1,5 +1,6 @@ import click import datetime +import itertools import pathlib import textwrap import os @@ -104,19 +105,53 @@ def issues(db_path, repo, issue_ids, auth, load): type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), help="Load pull-requests JSON from this file instead of the API", ) -def pull_requests(db_path, repo, pull_request_ids, auth, load): +@click.option( + "--org", + "orgs", + help="Fetch all pull requests from this GitHub organization", + multiple=True, +) +@click.option( + "--state", + help="Only fetch pull requests in this state", +) +@click.option( + "--search", + help="Find pull requests with a search query", +) +def pull_requests(db_path, repo, pull_request_ids, auth, load, orgs, state, search): "Save pull_requests for a specified repository, e.g. simonw/datasette" db = sqlite_utils.Database(db_path) token = load_token(auth) - repo_full = utils.fetch_repo(repo, token) - utils.save_repo(db, repo_full) if load: + repo_full = utils.fetch_repo(repo, token) + utils.save_repo(db, repo_full) pull_requests = json.load(open(load)) + utils.save_pull_requests(db, pull_requests, repo_full) + elif search: + repos_seen = set() + search += " is:pr" + pull_requests = utils.fetch_searched_pulls_or_issues(search, token) + for pull_request in pull_requests: + pr_repo_url = pull_request["repository_url"] + if pr_repo_url not in repos_seen: + pr_repo = utils.fetch_repo(url=pr_repo_url) + utils.save_repo(db, pr_repo) + repos_seen.add(pr_repo_url) + utils.save_pull_requests(db, [pull_request], pr_repo) else: - pull_requests = utils.fetch_pull_requests(repo, token, pull_request_ids) - - pull_requests = list(pull_requests) - utils.save_pull_requests(db, pull_requests, repo_full) + if orgs: + repos = itertools.chain.from_iterable( + utils.fetch_all_repos(token=token, org=org) + for org in orgs + ) + else: + repos = [utils.fetch_repo(repo, token)] + for repo_full in repos: + utils.save_repo(db, repo_full) + repo = repo_full["full_name"] + pull_requests = utils.fetch_pull_requests(repo, state, token, pull_request_ids) + utils.save_pull_requests(db, pull_requests, repo_full) utils.ensure_db_shape(db) diff --git a/github_to_sqlite/utils.py b/github_to_sqlite/utils.py index f934d76..c837690 100644 --- a/github_to_sqlite/utils.py +++ b/github_to_sqlite/utils.py @@ -2,6 +2,7 @@ import requests import re import time +import urllib.parse import yaml FTS_CONFIG = { @@ -170,8 +171,11 @@ def save_pull_requests(db, pull_requests, repo): # Add repo key pull_request["repo"] = repo["id"] # Pull request _links can be flattened to just their URL - pull_request["url"] = pull_request["_links"]["html"]["href"] - pull_request.pop("_links") + if "_links" in pull_request: + pull_request["url"] = pull_request["_links"]["html"]["href"] + pull_request.pop("_links") + else: + pull_request["url"] = pull_request["pull_request"]["html_url"] # Extract user pull_request["user"] = save_user(db, pull_request["user"]) labels = pull_request.pop("labels") @@ -179,8 +183,9 @@ def save_pull_requests(db, pull_requests, repo): if pull_request.get("merged_by"): pull_request["merged_by"] = save_user(db, pull_request["merged_by"]) # Head sha - pull_request["head"] = pull_request["head"]["sha"] - pull_request["base"] = pull_request["base"]["sha"] + if "head" in pull_request: + pull_request["head"] = pull_request["head"]["sha"] + pull_request["base"] = pull_request["base"]["sha"] # Extract milestone if pull_request["milestone"]: pull_request["milestone"] = save_milestone( @@ -292,12 +297,13 @@ def save_issue_comment(db, comment): return last_pk -def fetch_repo(full_name, token=None): +def fetch_repo(full_name=None, token=None, url=None): headers = make_headers(token) # Get topics: headers["Accept"] = "application/vnd.github.mercy-preview+json" - owner, slug = full_name.split("/") - url = "https://api.github.com/repos/{}/{}".format(owner, slug) + if url is None: + owner, slug = full_name.split("/") + url = "https://api.github.com/repos/{}/{}".format(owner, slug) response = requests.get(url, headers=headers) response.raise_for_status() return response.json() @@ -358,7 +364,7 @@ def fetch_issues(repo, token=None, issue_ids=None): yield from issues -def fetch_pull_requests(repo, token=None, pull_request_ids=None): +def fetch_pull_requests(repo, state=None, token=None, pull_request_ids=None): headers = make_headers(token) headers["accept"] = "application/vnd.github.v3+json" if pull_request_ids: @@ -370,11 +376,20 @@ def fetch_pull_requests(repo, token=None, pull_request_ids=None): response.raise_for_status() yield response.json() else: - url = "https://api.github.com/repos/{}/pulls?state=all&filter=all".format(repo) + state = state or "all" + url = f"https://api.github.com/repos/{repo}/pulls?state={state}" for pull_requests in paginate(url, headers): yield from pull_requests +def fetch_searched_pulls_or_issues(query, token=None): + headers = make_headers(token) + url = "https://api.github.com/search/issues?" + url += urllib.parse.urlencode({"q": query}) + for pulls_or_issues in paginate(url, headers): + yield from pulls_or_issues["items"] + + def fetch_issue_comments(repo, token=None, issue=None): assert "/" in repo headers = make_headers(token) @@ -445,13 +460,15 @@ def fetch_stargazers(repo, token=None): yield from stargazers -def fetch_all_repos(username=None, token=None): - assert username or token, "Must provide username= or token= or both" +def fetch_all_repos(username=None, token=None, org=None): + assert username or token or org, "Must provide username= or token= or org= or a combination" headers = make_headers(token) # Get topics for each repo: headers["Accept"] = "application/vnd.github.mercy-preview+json" if username: url = "https://api.github.com/users/{}/repos".format(username) + elif org: + url = "https://api.github.com/orgs/{}/repos".format(org) else: url = "https://api.github.com/user/repos" for repos in paginate(url, headers): @@ -469,6 +486,7 @@ def fetch_user(username=None, token=None): def paginate(url, headers=None): + url += ("&" if "?" in url else "?") + "per_page=100" while url: response = requests.get(url, headers=headers) # For HTTP 204 no-content this yields an empty list From 91188c517c1917381d54955380bf1a33708fe9d9 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 10 Dec 2023 13:37:22 -0800 Subject: [PATCH 22/23] Release 2.9 Refs #73, #79, #80 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1a39f65..de72b51 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup import os -VERSION = "2.8.3" +VERSION = "2.9" def get_long_description(): From eaef8ffd3f46be6c26062237ed88b4c2202a1c44 Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 14 Jan 2024 21:56:13 -0800 Subject: [PATCH 23/23] Disable scheduled publish This kept breaking due to rate limits. --- .github/workflows/deploy-demo.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy-demo.yml b/.github/workflows/deploy-demo.yml index 4a7c49c..c9440a6 100644 --- a/.github/workflows/deploy-demo.yml +++ b/.github/workflows/deploy-demo.yml @@ -2,8 +2,8 @@ name: Build and deploy demo on: workflow_dispatch: - schedule: - - cron: '0 0 * * *' +# schedule: +# - cron: '0 0 * * *' jobs: scheduled: