From 685b8eb6fe9b102e27fcb1519041f3e09fef8579 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sat, 20 Jul 2024 15:03:20 +0100 Subject: [PATCH 1/2] Update and format ``escape4chm`` --- Doc/.ruff.toml | 1 - Doc/tools/extensions/escape4chm.py | 90 +++++++++++++++++------------- 2 files changed, 52 insertions(+), 39 deletions(-) diff --git a/Doc/.ruff.toml b/Doc/.ruff.toml index 24f1c4f2ff6801..111ce03b91df38 100644 --- a/Doc/.ruff.toml +++ b/Doc/.ruff.toml @@ -5,7 +5,6 @@ line-length = 79 extend-exclude = [ "includes/*", # Temporary exclusions: - "tools/extensions/escape4chm.py", "tools/extensions/pyspecific.py", ] diff --git a/Doc/tools/extensions/escape4chm.py b/Doc/tools/extensions/escape4chm.py index 89970975b9032b..6b28dd9286d8ab 100644 --- a/Doc/tools/extensions/escape4chm.py +++ b/Doc/tools/extensions/escape4chm.py @@ -1,58 +1,72 @@ """ -Escape the `body` part of .chm source file to 7-bit ASCII, to fix visual -effect on some MBCS Windows systems. +Escape the `body` part of .chm source file to 7-bit ASCII, +to fix visual effects on some MBCS Windows systems. -https://bugs.python.org/issue32174 +https://github.com/python/cpython/issues/76355 """ -import pathlib +from __future__ import annotations + import re from html.entities import codepoint2name +from pathlib import Path +from typing import TYPE_CHECKING + +from sphinx.application import Sphinx +from sphinx.util import logging -from sphinx.util.logging import getLogger +if TYPE_CHECKING: + from typing import Any -# escape the characters which codepoint > 0x7F -def _process(string): - def escape(matchobj): - codepoint = ord(matchobj.group(0)) + from docutils import nodes + from sphinx.application import Sphinx + from sphinx.util.typing import ExtensionMetadata - name = codepoint2name.get(codepoint) - if name is None: - return '&#%d;' % codepoint - else: - return '&%s;' % name +logger = logging.getLogger(__name__) - return re.sub(r'[^\x00-\x7F]', escape, string) -def escape_for_chm(app, pagename, templatename, context, doctree): +def escape_for_chm( + app: Sphinx, + _page_name: str, + _template_name: str, + context: dict[str, Any], + _doctree: nodes.document, +) -> None: + """Escape the characters with a codepoint over ``0x7F``.""" # only works for .chm output - if getattr(app.builder, 'name', '') != 'htmlhelp': + if app.builder.name != "htmlhelp": return # escape the `body` part to 7-bit ASCII - body = context.get('body') + body = context.get("body") if body is not None: - context['body'] = _process(body) + context["body"] = re.sub(r"[^\x00-\x7F]", _escape, body) + + +def _escape(match: re.Match[str]) -> str: + codepoint = ord(match.group(0)) + if codepoint in codepoint2name: + return f"&{codepoint2name[codepoint]};" + return f"&#{codepoint};" -def fixup_keywords(app, exception): + +def fixup_keywords(app: Sphinx, exception: Exception) -> None: # only works for .chm output - if getattr(app.builder, 'name', '') != 'htmlhelp' or exception: + if exception or app.builder.name != "htmlhelp": return - getLogger(__name__).info('fixing HTML escapes in keywords file...') - outdir = pathlib.Path(app.builder.outdir) - outname = app.builder.config.htmlhelp_basename - with open(outdir / (outname + '.hhk'), 'rb') as f: - index = f.read() - with open(outdir / (outname + '.hhk'), 'wb') as f: - f.write(index.replace(b''', b''')) - -def setup(app): - # `html-page-context` event emitted when the HTML builder has - # created a context dictionary to render a template with. - app.connect('html-page-context', escape_for_chm) - # `build-finished` event emitted when all the files have been - # output. - app.connect('build-finished', fixup_keywords) - - return {'version': '1.0', 'parallel_read_safe': True} + logger.info("Fixing HTML escapes in keywords file...") + keywords_path = Path(app.outdir) / f"{app.config.htmlhelp_basename}.hhk" + index = keywords_path.read_bytes() + keywords_path.write_bytes(index.replace(b"'", b"'")) + + +def setup(app: Sphinx) -> ExtensionMetadata: + app.connect("html-page-context", escape_for_chm) + app.connect("build-finished", fixup_keywords) + + return { + "version": "1.0", + "parallel_read_safe": True, + "parallel_write_safe": True, + } From c66f6b74bfc8e86f58f99ff1425b08a3ae394875 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sat, 20 Jul 2024 15:11:17 +0100 Subject: [PATCH 2/2] Remove escape4chm --- Doc/conf.py | 1 - Doc/tools/extensions/escape4chm.py | 72 ------------------------------ 2 files changed, 73 deletions(-) delete mode 100644 Doc/tools/extensions/escape4chm.py diff --git a/Doc/conf.py b/Doc/conf.py index 17e98e1a01ed21..4841b69e380085 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -21,7 +21,6 @@ extensions = [ 'c_annotations', - 'escape4chm', 'glossary_search', 'lexers', 'pyspecific', diff --git a/Doc/tools/extensions/escape4chm.py b/Doc/tools/extensions/escape4chm.py deleted file mode 100644 index 6b28dd9286d8ab..00000000000000 --- a/Doc/tools/extensions/escape4chm.py +++ /dev/null @@ -1,72 +0,0 @@ -""" -Escape the `body` part of .chm source file to 7-bit ASCII, -to fix visual effects on some MBCS Windows systems. - -https://github.com/python/cpython/issues/76355 -""" - -from __future__ import annotations - -import re -from html.entities import codepoint2name -from pathlib import Path -from typing import TYPE_CHECKING - -from sphinx.application import Sphinx -from sphinx.util import logging - -if TYPE_CHECKING: - from typing import Any - - from docutils import nodes - from sphinx.application import Sphinx - from sphinx.util.typing import ExtensionMetadata - -logger = logging.getLogger(__name__) - - -def escape_for_chm( - app: Sphinx, - _page_name: str, - _template_name: str, - context: dict[str, Any], - _doctree: nodes.document, -) -> None: - """Escape the characters with a codepoint over ``0x7F``.""" - # only works for .chm output - if app.builder.name != "htmlhelp": - return - - # escape the `body` part to 7-bit ASCII - body = context.get("body") - if body is not None: - context["body"] = re.sub(r"[^\x00-\x7F]", _escape, body) - - -def _escape(match: re.Match[str]) -> str: - codepoint = ord(match.group(0)) - if codepoint in codepoint2name: - return f"&{codepoint2name[codepoint]};" - return f"&#{codepoint};" - - -def fixup_keywords(app: Sphinx, exception: Exception) -> None: - # only works for .chm output - if exception or app.builder.name != "htmlhelp": - return - - logger.info("Fixing HTML escapes in keywords file...") - keywords_path = Path(app.outdir) / f"{app.config.htmlhelp_basename}.hhk" - index = keywords_path.read_bytes() - keywords_path.write_bytes(index.replace(b"'", b"'")) - - -def setup(app: Sphinx) -> ExtensionMetadata: - app.connect("html-page-context", escape_for_chm) - app.connect("build-finished", fixup_keywords) - - return { - "version": "1.0", - "parallel_read_safe": True, - "parallel_write_safe": True, - }