Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Infer titles of pages based on full Markdown parsing #3191

Merged
merged 9 commits into from
May 29, 2023
Prev Previous commit
Next Next commit
Strip anchorlinks from titles
  • Loading branch information
oprypin committed Apr 23, 2023
commit 85d0eea6a80fed21422d9c3a6547a0deaedf8b95
17 changes: 10 additions & 7 deletions mkdocs/structure/pages.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from __future__ import annotations

import copy
import logging
import os
import posixpath
from typing import TYPE_CHECKING, Any, Callable, Mapping, MutableMapping, Optional, Union
from urllib.parse import unquote as urlunquote
from urllib.parse import urljoin, urlsplit, urlunsplit
from xml.etree.ElementTree import Element
from xml.etree import ElementTree as etree

import markdown
import markdown.extensions
Expand Down Expand Up @@ -58,7 +59,6 @@ def __init__(

# Placeholders to be filled in later in the build process.
self.markdown = None
self._title_allowed = False
self._title_from_render: Optional[str] = None
self.content = None
self.toc = [] # type: ignore
Expand Down Expand Up @@ -230,11 +230,9 @@ def read_source(self, config: MkDocsConfig) -> None:
raise

self.markdown, self.meta = meta.get_data(source)
self._title_allowed = True

def _set_title(self) -> None:
"""Soft-deprecated, do not use."""
self._title_allowed = True
self.render(
{'markdown_extensions': (), 'mdx_configs': None}, # type: ignore
Files([]),
Expand All @@ -253,7 +251,7 @@ def title(self) -> Optional[str]:
- content of the first H1 in Markdown content
- convert filename to title
"""
if not getattr(self, '_title_allowed', False):
if self.markdown is None:
return None

if 'title' in self.meta:
Expand Down Expand Up @@ -298,7 +296,7 @@ def __init__(self, file: File, files: Files) -> None:
self.file = file
self.files = files

def run(self, root: Element) -> Element:
def run(self, root: etree.Element) -> etree.Element:
"""
Update urls on anchors and images to make them relative

Expand Down Expand Up @@ -385,9 +383,14 @@ class _ExtractTitleTreeprocessor(markdown.treeprocessors.Treeprocessor):
def __init__(self, ext: _ExtractTitleExtension) -> None:
self.ext = ext

def run(self, root: Element) -> Element:
def run(self, root: etree.Element) -> etree.Element:
for el in root:
if el.tag == 'h1':
# Drop anchorlink from the element, if present.
if len(el) > 0 and el[-1].tag == 'a' and not (el.tail or '').strip():
el = copy.copy(el)
del el[-1]
# Extract the text only, recursively.
self.ext.title = _unescape(''.join(el.itertext()))
break
return root
32 changes: 32 additions & 0 deletions mkdocs/tests/structure/page_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import unittest
from unittest import mock

from mkdocs.config.defaults import MkDocsConfig
from mkdocs.structure.files import File, Files
from mkdocs.structure.pages import Page
from mkdocs.tests.base import dedent, load_config, tempdir
Expand Down Expand Up @@ -316,10 +317,41 @@ def test_page_title_from_setext_markdown(self, docs_dir):
cfg = load_config()
fl = File('testing_setext_title.md', docs_dir, docs_dir, use_directory_urls=True)
pg = Page(None, fl, cfg)
self.assertIsNone(pg.title)
pg.read_source(cfg)
self.assertEqual(pg.title, 'Testing setext title')
pg.render(cfg, fl)
self.assertEqual(pg.title, 'Welcome to MkDocs Setext')

@tempdir(files={'testing_setext_title.md': _SETEXT_CONTENT})
def test_page_title_from_markdown_stripped_anchorlinks(self, docs_dir):
cfg = MkDocsConfig()
cfg.site_name = 'example'
cfg.markdown_extensions = {'toc': {'permalink': '&'}}
self.assertEqual(cfg.validate(), ([], []))
fl = File('testing_setext_title.md', docs_dir, docs_dir, use_directory_urls=True)
pg = Page(None, fl, cfg)
pg.read_source(cfg)
pg.render(cfg, fl)
self.assertEqual(pg.title, 'Welcome to MkDocs Setext')

_FORMATTING_CONTENT = dedent(
'''
# Hello *beautiful* `world`

Hi.
'''
)

@tempdir(files={'testing_formatting.md': _FORMATTING_CONTENT})
def test_page_title_from_markdown_strip_formatting(self, docs_dir):
cfg = load_config()
fl = File('testing_formatting.md', docs_dir, docs_dir, use_directory_urls=True)
pg = Page(None, fl, cfg)
pg.read_source(cfg)
pg.render(cfg, fl)
self.assertEqual(pg.title, 'Hello beautiful world')

_ATTRLIST_CONTENT = dedent(
'''
# Welcome to MkDocs Attr { #welcome }
Expand Down