From 19e8934265d3ad00ff6d381df95f941b9039a1db Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Thu, 12 Oct 2023 18:37:01 +0200 Subject: [PATCH 1/8] Save state after each build. --- build_docs.py | 71 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/build_docs.py b/build_docs.py index efccaca..a80a4eb 100755 --- a/build_docs.py +++ b/build_docs.py @@ -24,6 +24,8 @@ from argparse import ArgumentParser from contextlib import suppress, contextmanager from dataclasses import dataclass +from datetime import datetime as dt, timezone +from time import perf_counter import filecmp import json import logging @@ -354,20 +356,6 @@ def locate_nearest_version(available_versions, target_version): return tuple_to_version(found) -def translation_branch(repo: Repository, needed_version: str): - """Some cpython versions may be untranslated, being either too old or - too new. - - This function looks for remote branches on the given repo, and - returns the name of the nearest existing branch. - - It could be enhanced to also search for tags. - """ - remote_branches = repo.run("branch", "-r").stdout - branches = re.findall(r"/([0-9]+\.[0-9]+)$", remote_branches, re.M) - return locate_nearest_version(branches, needed_version) - - @contextmanager def edit(file: Path): """Context manager to edit a file "in place", use it as: @@ -652,6 +640,7 @@ def full_build(self): def run(self) -> bool: """Build and publish a Python doc, for a language, and a version.""" + start_time = perf_counter() try: self.cpython_repo.switch(self.version.branch_or_tag) if self.language.tag != "en": @@ -659,6 +648,7 @@ def run(self) -> bool: self.build_venv() self.build() self.copy_build_to_webroot() + self.save_state(build_duration=perf_counter() - start_time) except Exception as err: logging.exception( "Exception while building %s version %s", @@ -676,10 +666,13 @@ def checkout(self) -> Path: return self.build_root / "cpython" def clone_translation(self): - """Clone the translation repository from github. + self.translation_repo.update() + self.translation_repo.switch(self.translation_branch) + + @property + def translation_repo(self): + """See PEP 545 for translations repository naming convention.""" - See PEP 545 for repository naming convention. - """ locale_repo = f"https://github.com/python/python-docs-{self.language.tag}.git" locale_clone_dir = ( self.build_root @@ -688,9 +681,21 @@ def clone_translation(self): / self.language.iso639_tag / "LC_MESSAGES" ) - repo = Repository(locale_repo, locale_clone_dir) - repo.update() - repo.switch(translation_branch(repo, self.version.name)) + return Repository(locale_repo, locale_clone_dir) + + @property + def translation_branch(self): + """Some cpython versions may be untranslated, being either too old or + too new. + + This function looks for remote branches on the given repo, and + returns the name of the nearest existing branch. + + It could be enhanced to also search for tags. + """ + remote_branches = self.translation_repo.run("branch", "-r").stdout + branches = re.findall(r"/([0-9]+\.[0-9]+)$", remote_branches, re.M) + return locate_nearest_version(branches, self.version.name) def build(self): """Build this version/language doc.""" @@ -922,6 +927,32 @@ def copy_build_to_webroot(self): self.language.tag, ) + def save_state(self, build_duration): + """Save current cpython sha1 and current translation sha1. + + Using this we can deduce if a rebuild is needed or not. + """ + state_file = self.build_root / "state.toml" + try: + states = tomlkit.parse(state_file.read_text(encoding="UTF-8")) + except FileNotFoundError: + states = tomlkit.document() + + state = {} + state["cpython_sha"] = self.cpython_repo.run("rev-parse", "HEAD").stdout.strip() + if self.language.tag != "en": + state["translation_sha"] = self.translation_repo.run( + "rev-parse", "HEAD" + ).stdout.strip() + state["last_build"] = dt.now(timezone.utc) + state["last_build_duration"] = build_duration + + states.setdefault("build", {}).setdefault(self.language.tag, {})[ + self.version.name + ] = state + + state_file.write_text(tomlkit.dumps(states), encoding="UTF-8") + def symlink(www_root: Path, language: Language, directory: str, name: str, group: str): """Used by major_symlinks and dev_symlink to maintain symlinks.""" From 4e866838cc2022f517964fc2cfa6581eb50f80b1 Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Fri, 13 Oct 2023 23:21:21 +0200 Subject: [PATCH 2/8] I've seen too much fetch failing. --- build_docs.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/build_docs.py b/build_docs.py index a80a4eb..b89428e 100755 --- a/build_docs.py +++ b/build_docs.py @@ -25,7 +25,7 @@ from contextlib import suppress, contextmanager from dataclasses import dataclass from datetime import datetime as dt, timezone -from time import perf_counter +from time import perf_counter, sleep import filecmp import json import logging @@ -280,6 +280,15 @@ class Repository: remote: str directory: Path + def fetch(self): + """Try (and retry) to run git fetch.""" + try: + return self.run("fetch") + except subprocess.CalledProcessError as err: + logging.error("'git fetch' failed (%s), retrying...", err.stderr) + sleep(5) + return self.run("fetch") + def run(self, *args): """Run git command in the clone repository.""" return run(("git", "-C", self.directory) + args) From f3c68358ab53c766b9de3507000689cf41772343 Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Fri, 13 Oct 2023 23:23:28 +0200 Subject: [PATCH 3/8] Trying to enhance log readability. --- build_docs.py | 52 ++++++++++++++++++++++----------------------------- 1 file changed, 22 insertions(+), 30 deletions(-) diff --git a/build_docs.py b/build_docs.py index b89428e..2dced6b 100755 --- a/build_docs.py +++ b/build_docs.py @@ -248,8 +248,6 @@ def run(cmd, cwd=None) -> subprocess.CompletedProcess: cmdstring, indent("\n".join(result.stdout.split("\n")[-20:]), " "), ) - else: - logging.debug("Run: %r OK", cmdstring) result.check_returncode() return result @@ -609,11 +607,15 @@ def parse_args(): def setup_logging(log_directory: Path): """Setup logging to stderr if ran by a human, or to a file if ran from a cron.""" if sys.stderr.isatty(): - logging.basicConfig(format="%(levelname)s:%(message)s", stream=sys.stderr) + logging.basicConfig( + format="%(asctime)s %(levelname)s: %(message)s", stream=sys.stderr + ) else: log_directory.mkdir(parents=True, exist_ok=True) handler = logging.handlers.WatchedFileHandler(log_directory / "docsbuild.log") - handler.setFormatter(logging.Formatter("%(levelname)s:%(asctime)s:%(message)s")) + handler.setFormatter( + logging.Formatter("%(asctime)s %(levelname)s: %(message)s") + ) logging.getLogger().addHandler(handler) logging.getLogger().setLevel(logging.DEBUG) @@ -650,6 +652,7 @@ def full_build(self): def run(self) -> bool: """Build and publish a Python doc, for a language, and a version.""" start_time = perf_counter() + logging.info("Running.") try: self.cpython_repo.switch(self.version.branch_or_tag) if self.language.tag != "en": @@ -659,11 +662,7 @@ def run(self) -> bool: self.copy_build_to_webroot() self.save_state(build_duration=perf_counter() - start_time) except Exception as err: - logging.exception( - "Exception while building %s version %s", - self.language.tag, - self.version.name, - ) + logging.exception("Badly handled exception, human, please help.") if sentry_sdk: sentry_sdk.capture_exception(err) return False @@ -708,11 +707,7 @@ def translation_branch(self): def build(self): """Build this version/language doc.""" - logging.info( - "Build start for version: %s, language: %s", - self.version.name, - self.language.tag, - ) + logging.info("Build start.") sphinxopts = list(self.language.sphinxopts) sphinxopts.extend(["-q"]) if self.language.tag != "en": @@ -788,11 +783,7 @@ def build(self): setup_switchers( self.versions, self.languages, self.checkout / "Doc" / "build" / "html" ) - logging.info( - "Build done for version: %s, language: %s", - self.version.name, - self.language.tag, - ) + logging.info("Build done.") def build_venv(self): """Build a venv for the specific Python version. @@ -813,11 +804,7 @@ def build_venv(self): def copy_build_to_webroot(self): """Copy a given build to the appropriate webroot with appropriate rights.""" - logging.info( - "Publishing start for version: %s, language: %s", - self.version.name, - self.language.tag, - ) + logging.info("Publishing start.") self.www_root.mkdir(parents=True, exist_ok=True) if self.language.tag == "en": target = self.www_root / self.version.name @@ -887,7 +874,7 @@ def copy_build_to_webroot(self): ] ) if self.full_build: - logging.debug("Copying dist files") + logging.debug("Copying dist files.") run( [ "chown", @@ -930,11 +917,7 @@ def copy_build_to_webroot(self): purge(*prefixes) for prefix in prefixes: purge(*[prefix + p for p in changed]) - logging.info( - "Publishing done for version: %s, language: %s", - self.version.name, - self.language.tag, - ) + logging.info("Publishing done") def save_state(self, build_duration): """Save current cpython sha1 and current translation sha1. @@ -1103,6 +1086,11 @@ def build_docs(args) -> bool: cpython_repo.update() while todo: version, language = todo.pop() + logging.root.handlers[0].setFormatter( + logging.Formatter( + f"%(asctime)s %(levelname)s {language.tag}/{version.name}: %(message)s" + ) + ) if sentry_sdk: with sentry_sdk.configure_scope() as scope: scope.set_tag("version", version.name) @@ -1111,6 +1099,10 @@ def build_docs(args) -> bool: version, versions, language, languages, cpython_repo, **vars(args) ) all_built_successfully &= builder.run() + logging.root.handlers[0].setFormatter( + logging.Formatter("%(asctime)s %(levelname)s: %(message)s") + ) + build_sitemap(versions, languages, args.www_root, args.group) build_404(args.www_root, args.group) build_robots_txt( From 0ec02ffa76e6ee6740d59fe2e352cdf0589c7fed Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Fri, 13 Oct 2023 23:24:08 +0200 Subject: [PATCH 4/8] Rebuild only if needed. --- build_docs.py | 47 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/build_docs.py b/build_docs.py index 2dced6b..25ef9f0 100755 --- a/build_docs.py +++ b/build_docs.py @@ -657,10 +657,13 @@ def run(self) -> bool: self.cpython_repo.switch(self.version.branch_or_tag) if self.language.tag != "en": self.clone_translation() - self.build_venv() - self.build() - self.copy_build_to_webroot() - self.save_state(build_duration=perf_counter() - start_time) + if self.should_rebuild(): + self.build_venv() + self.build() + self.copy_build_to_webroot() + self.save_state(build_duration=perf_counter() - start_time) + else: + logging.info("Nothing changed.") except Exception as err: logging.exception("Badly handled exception, human, please help.") if sentry_sdk: @@ -919,7 +922,35 @@ def copy_build_to_webroot(self): purge(*[prefix + p for p in changed]) logging.info("Publishing done") - def save_state(self, build_duration): + def should_rebuild(self): + state = self.load_state() + if not state: + return True + cpython_sha = self.cpython_repo.run("rev-parse", "HEAD").stdout.strip() + if self.language.tag != "en": + translation_sha = self.translation_repo.run( + "rev-parse", "HEAD" + ).stdout.strip() + if translation_sha != state["translation_sha"]: + return True + if cpython_sha != state["cpython_sha"]: + diff = self.cpython_repo.run( + "diff", "--name-only", state["cpython_sha"], cpython_sha + ).stdout + if "Doc/" in diff: + return True + return False + + def load_state(self) -> dict: + state_file = self.build_root / "state.toml" + try: + return tomlkit.loads(state_file.read_text(encoding="UTF-8"))[ + f"/{self.language.tag}/{self.version.name}/" + ] + except KeyError: + return {} + + def save_state(self, build_duration: float): """Save current cpython sha1 and current translation sha1. Using this we can deduce if a rebuild is needed or not. @@ -938,11 +969,7 @@ def save_state(self, build_duration): ).stdout.strip() state["last_build"] = dt.now(timezone.utc) state["last_build_duration"] = build_duration - - states.setdefault("build", {}).setdefault(self.language.tag, {})[ - self.version.name - ] = state - + states[f"/{self.language.tag}/{self.version.name}/"] = state state_file.write_text(tomlkit.dumps(states), encoding="UTF-8") From 7caa6a26dda4b0aa7dd481d7a61d33b5cadfad1d Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Thu, 19 Oct 2023 12:43:12 +0200 Subject: [PATCH 5/8] Better logging of re-runs. --- build_docs.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/build_docs.py b/build_docs.py index 25ef9f0..2d53afb 100755 --- a/build_docs.py +++ b/build_docs.py @@ -662,8 +662,6 @@ def run(self) -> bool: self.build() self.copy_build_to_webroot() self.save_state(build_duration=perf_counter() - start_time) - else: - logging.info("Nothing changed.") except Exception as err: logging.exception("Badly handled exception, human, please help.") if sentry_sdk: @@ -925,6 +923,7 @@ def copy_build_to_webroot(self): def should_rebuild(self): state = self.load_state() if not state: + logging.info("Should rebuild: no previous state found.") return True cpython_sha = self.cpython_repo.run("rev-parse", "HEAD").stdout.strip() if self.language.tag != "en": @@ -932,13 +931,24 @@ def should_rebuild(self): "rev-parse", "HEAD" ).stdout.strip() if translation_sha != state["translation_sha"]: + logging.info( + "Should rebuild: new translations (from %s to %s)", + state["translation_sha"], + translation_sha, + ) return True if cpython_sha != state["cpython_sha"]: diff = self.cpython_repo.run( "diff", "--name-only", state["cpython_sha"], cpython_sha ).stdout if "Doc/" in diff: + logging.info( + "Should rebuild: Doc/ has changed (from %s to %s)", + state["cpython_sha"], + cpython_sha, + ) return True + logging.info("Nothing changed, no rebuild needed.") return False def load_state(self) -> dict: From 660934ecf7759088757e44e222a5f4c76e20a86e Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Thu, 19 Oct 2023 12:48:53 +0200 Subject: [PATCH 6/8] Oops, duplicated method. --- build_docs.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/build_docs.py b/build_docs.py index 2d53afb..f0e2366 100755 --- a/build_docs.py +++ b/build_docs.py @@ -300,9 +300,6 @@ def get_ref(self, pattern): # Maybe it's a tag return self.run("show-ref", "-s", "tags/" + pattern).stdout.strip() - def fetch(self): - self.run("fetch") - def switch(self, branch_or_tag): """Reset and cleans the repository to the given branch or tag.""" self.run("reset", "--hard", self.get_ref(branch_or_tag), "--") From 6e0203284342a07e94bdb149bbe476ef26949823 Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Thu, 19 Oct 2023 12:51:43 +0200 Subject: [PATCH 7/8] Better import placement (would not run isort yet, too many changes). --- build_docs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build_docs.py b/build_docs.py index f0e2366..6dc6ae4 100755 --- a/build_docs.py +++ b/build_docs.py @@ -24,8 +24,6 @@ from argparse import ArgumentParser from contextlib import suppress, contextmanager from dataclasses import dataclass -from datetime import datetime as dt, timezone -from time import perf_counter, sleep import filecmp import json import logging @@ -39,9 +37,11 @@ import sys from bisect import bisect_left as bisect from collections import OrderedDict +from datetime import datetime as dt, timezone from pathlib import Path from string import Template from textwrap import indent +from time import perf_counter, sleep from typing import Iterable from urllib.parse import urljoin From f2c28dbb503231760ec99dfd5e37289bd38a40fd Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Fri, 20 Oct 2023 14:08:46 +0200 Subject: [PATCH 8/8] Less diff. --- build_docs.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/build_docs.py b/build_docs.py index 6dc6ae4..d6eeb6b 100755 --- a/build_docs.py +++ b/build_docs.py @@ -278,15 +278,6 @@ class Repository: remote: str directory: Path - def fetch(self): - """Try (and retry) to run git fetch.""" - try: - return self.run("fetch") - except subprocess.CalledProcessError as err: - logging.error("'git fetch' failed (%s), retrying...", err.stderr) - sleep(5) - return self.run("fetch") - def run(self, *args): """Run git command in the clone repository.""" return run(("git", "-C", self.directory) + args) @@ -300,6 +291,15 @@ def get_ref(self, pattern): # Maybe it's a tag return self.run("show-ref", "-s", "tags/" + pattern).stdout.strip() + def fetch(self): + """Try (and retry) to run git fetch.""" + try: + return self.run("fetch") + except subprocess.CalledProcessError as err: + logging.error("'git fetch' failed (%s), retrying...", err.stderr) + sleep(5) + return self.run("fetch") + def switch(self, branch_or_tag): """Reset and cleans the repository to the given branch or tag.""" self.run("reset", "--hard", self.get_ref(branch_or_tag), "--")