Skip to content

DOC [PST] better integrate gallery with pydata-sphinx-theme #28415

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 16, 2024
Merged
44 changes: 38 additions & 6 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
"add_toctree_functions",
"allow_nan_estimators",
"doi_role",
"move_gallery_links",
"sphinx_issues",
]

Expand Down Expand Up @@ -164,7 +165,13 @@

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ["_build", "templates", "includes", "themes"]
exclude_patterns = [
"_build",
"templates",
"includes",
"themes",
"**/sg_execution_times.rst",
]

# The reST default role (used for this markup: `text`) to use for all
# documents.
Expand Down Expand Up @@ -269,8 +276,11 @@
"footer_start": ["copyright"],
"footer_center": [],
"footer_end": [],
# When specified as a dictionary, the keys should follow glob-style patterns, as in
# https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-exclude_patterns
# In particular, "**" specifies the default for all pages
# Use :html_theme.sidebar_secondary.remove: for file-wide removal
"secondary_sidebar_items": ["page-toc", "sourcelink"],
"secondary_sidebar_items": {"**": ["page-toc", "sourcelink"]},
"show_version_warning_banner": True,
"announcement": [],
}
Expand Down Expand Up @@ -628,14 +638,16 @@ def reset_sklearn_config(gallery_conf, fname):
sklearn.set_config(**default_global_config)


sg_examples_dir = "../examples"
sg_gallery_dir = "auto_examples"
sphinx_gallery_conf = {
"doc_module": "sklearn",
"backreferences_dir": os.path.join("modules", "generated"),
"show_memory": False,
"reference_url": {"sklearn": None},
"examples_dirs": ["../examples"],
"gallery_dirs": ["auto_examples"],
"subsection_order": SubSectionTitleOrder("../examples"),
"examples_dirs": [sg_examples_dir],
"gallery_dirs": [sg_gallery_dir],
"subsection_order": SubSectionTitleOrder(sg_examples_dir),
"within_subsection_order": SKExampleTitleSortKey,
"binder": {
"org": "scikit-learn",
Expand All @@ -649,14 +661,34 @@ def reset_sklearn_config(gallery_conf, fname):
"inspect_global_variables": False,
"remove_config_comments": True,
"plot_gallery": "True",
"recommender": {"enable": True, "n_examples": 5, "min_df": 12},
"recommender": {"enable": True, "n_examples": 4, "min_df": 12},
"reset_modules": ("matplotlib", "seaborn", reset_sklearn_config),
}
if with_jupyterlite:
sphinx_gallery_conf["jupyterlite"] = {
"notebook_modification_function": notebook_modification_function
}

# Secondary sidebar configuration for pages generated by sphinx-gallery

# For the index page of the gallery and each nested section, we hide the secondary
# sidebar by specifying an empty list (no components), because there is no meaningful
# in-page toc for these pages, and they are generated so "sourcelink" is not useful
# either.

# For each example page we keep default ["page-toc", "sourcelink"] specified by the
# "**" key. "page-toc" is wanted for these pages. "sourcelink" is also necessary since
# otherwise the secondary sidebar will degenerate when "page-toc" is empty, and the
# script `sphinxext/move_gallery_links.py` will fail (it assumes the existence of the
# secondary sidebar). The script will remove "sourcelink" in the end.

html_theme_options["secondary_sidebar_items"][f"{sg_gallery_dir}/index"] = []
for sub_sg_dir in (Path(".") / sg_examples_dir).iterdir():
if sub_sg_dir.is_dir():
html_theme_options["secondary_sidebar_items"][
f"{sg_gallery_dir}/{sub_sg_dir.name}/index"
] = []


# The following dictionary contains the information used to create the
# thumbnails for the front page of the scikit-learn home page.
Expand Down
187 changes: 187 additions & 0 deletions doc/sphinxext/move_gallery_links.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
"""
This script intends to better integrate sphinx-gallery into pydata-sphinx-theme. In
particular, it moves the download links and badge links in the footer of each generated
example page into the secondary sidebar, then removes the footer and the top note
pointing to the footer.

The download links are for Python source code and Jupyter notebook respectively, and
the badge links are for JupyterLite and Binder.

Currently this is achieved via post-processing the HTML generated by sphinx-gallery.
This hack can be removed if the following upstream issue is resolved:
https://github.com/sphinx-gallery/sphinx-gallery/issues/1258
"""

from pathlib import Path

from bs4 import BeautifulSoup
from sphinx.util.display import status_iterator
from sphinx.util.logging import getLogger

logger = getLogger(__name__)


def move_gallery_links(app, exception):
if exception is not None:
return

for gallery_dir in app.config.sphinx_gallery_conf["gallery_dirs"]:
html_gallery_dir = Path(app.builder.outdir, gallery_dir)

# Get all gallery example files to be tweaked; tuples (file, docname)
flat = []
for file in html_gallery_dir.rglob("*.html"):
if file.name in ("index.html", "sg_execution_times.html"):
# These are not gallery example pages, skip
continue

# Extract the documentation name from the path
docname = file.relative_to(app.builder.outdir).with_suffix("").as_posix()
if docname in app.config.html_context["redirects"]:
# This is a redirected page, skip
continue
if docname not in app.project.docnames:
# This should not happen, warn
logger.warning(f"Document {docname} not found but {file} exists")
continue
flat.append((file, docname))

for html_file, _ in status_iterator(
flat,
length=len(flat),
summary="Tweaking gallery links... ",
verbosity=app.verbosity,
stringify_func=lambda x: x[1], # display docname
):
with html_file.open("r", encoding="utf-8") as f:
html = f.read()
soup = BeautifulSoup(html, "html.parser")

# Find the secondary sidebar; it should exist in all gallery example pages
secondary_sidebar = soup.find("div", class_="sidebar-secondary-items")
if secondary_sidebar is None:
logger.warning(f"Secondary sidebar not found in {html_file}")
continue

def _create_secondary_sidebar_component(items):
"""Create a new component in the secondary sidebar.

`items` should be a list of dictionaries with "element" being the bs4
tag of the component and "title" being the title (None if not needed).
"""
component = soup.new_tag("div", **{"class": "sidebar-secondary-item"})
for item in items:
item_wrapper = soup.new_tag("div")
item_wrapper.append(item["element"])
if item["title"]:
item_wrapper["title"] = item["title"]
component.append(item_wrapper)
secondary_sidebar.append(component)

def _create_download_link(link, is_jupyter=False):
"""Create a download link to be appended to a component.

`link` should be the bs4 tag of the original download link, either for
the Python source code (is_jupyter=False) of for the Jupyter notebook
(is_jupyter=True). `link` will not be removed; instead the whole
footnote would be removed where `link` is located.

This returns a dictionary with "element" being the bs4 tag of the new
download link and "title" being the name of the file to download.
"""
new_link = soup.new_tag("a", href=link["href"], download="")

# Place a download icon at the beginning of the new link
download_icon = soup.new_tag("i", **{"class": "fa-solid fa-download"})
new_link.append(download_icon)

# Create the text of the new link; it is shortend to fit better into
# the secondary sidebar. The leading space before "Download ..." is
# intentional to create a small gap between the icon and the text,
# being consistent with the other pydata-sphinx-theme components
link_type = "Jupyter notebook" if is_jupyter else "source code"
new_text = soup.new_string(f" Download {link_type}")
new_link.append(new_text)

# Get the file name to download and use it as the title of the new link
# which will show up when hovering over the link; the file name is
# expected to be in the last span of `link`
link_spans = link.find_all("span")
title = link_spans[-1].text if link_spans else None

return {"element": new_link, "title": title}

def _create_badge_link(link):
"""Create a badge link to be appended to a component.

`link` should be the bs4 tag of the original badge link, either for
binder or JupyterLite. `link` will not be removed; instead the whole
footnote would be removed where `link` is located.

This returns a dictionary with "element" being the bs4 tag of the new
download link and "title" being `None` (no need).
"""
new_link = soup.new_tag("a", href=link["href"])

# The link would essentially be an anchor wrapper outside the image of
# the badge; we get the src and alt attributes by finding the original
# image and limit the height to 20px (fixed) so that the secondary
# sidebar will appear neater
badge_img = link.find("img")
new_img = soup.new_tag(
"img", src=badge_img["src"], alt=badge_img["alt"], height=20
)
new_link.append(new_img)

return {"element": new_link, "title": None}

try:
# `sg_note` is the "go to the end" note at the top of the page
# `sg_footer` is the footer with the download links and badge links
# These will be removed at the end if new links are successfully created
sg_note = soup.find("div", class_="sphx-glr-download-link-note")
sg_footer = soup.find("div", class_="sphx-glr-footer")

# Move the download links into the secondary sidebar
py_link = sg_footer.find("div", class_="sphx-glr-download-python").a
ipy_link = sg_footer.find("div", class_="sphx-glr-download-jupyter").a
_create_secondary_sidebar_component(
[
_create_download_link(py_link, is_jupyter=False),
_create_download_link(ipy_link, is_jupyter=True),
]
)

# Move the badge links into the secondary sidebar
lite_link = sg_footer.find("div", class_="lite-badge").a
binder_link = sg_footer.find("div", class_="binder-badge").a
_create_secondary_sidebar_component(
[_create_badge_link(lite_link), _create_badge_link(binder_link)]
)

# Remove the sourcelink component from the secondary sidebar; the reason
# we do not remove it by configuration is that we need the secondary
# sidebar to be present for this script to work, while in-page toc alone
# could have been empty
sourcelink = secondary_sidebar.find("div", class_="sourcelink")
if sourcelink is not None:
sourcelink.parent.extract() # because sourcelink has a wrapper div

# Remove the the top note and the whole footer
sg_note.extract()
sg_footer.extract()

except Exception as e:
# If any step fails we directly skip the file
logger.warning(f"Failed to tweak gallery links in {html_file}: {e}")
continue

# Write the modified file back
with html_file.open("w", encoding="utf-8") as f:
f.write(str(soup))


def setup(app):
# Default priority is 500 which sphinx-gallery uses for its build-finished events;
# we need a larger priority to run after sphinx-gallery (larger is later)
app.connect("build-finished", move_gallery_links, priority=900)
5 changes: 5 additions & 0 deletions examples/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,8 @@

Examples
========

This is the gallery of examples that showcase how scikit-learn can be used. Some
examples demonstrate the use of the :ref:`API <api_ref>` in general and some
demonstrate specific applications in tutorial form. Also check out our
:ref:`user guide <user_guide>` for more detailed illustrations.