diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 1e30956fe24f83..a893c3846d6859 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -471,4 +471,4 @@ def handle_pi(self, data): pass def unknown_decl(self, data): - pass + pass \ No newline at end of file diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 61fa24fab574f2..7347bbd9242e94 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -295,6 +295,57 @@ def test_cdata_content(self): ("data", content), ("endtag", element_lower)]) + def test_raw_text_content(self): + # Tags should be treated as text in raw text and escapable raw text content. + content = """

tagshould be handled as text""" + elements = [ + "script", + "style", + "title", + "textarea", + "SCRIPT", + "STYLE", + "TITLE", + "TEXTAREA", + "Script", + "Style", + "Title", + "Textarea", + ] + for element in elements: + source = f"<{element}>{content}" + self._run_check(source, [ + ("starttag", element.lower(), []), + ("data", content) + ]) + + def test_escapable_raw_text_content(self): + # Charrefs should be escaped in esacapable raw text content. + class Collector(EventCollector): + pass + + content = "Timon & Pumba" + expected = "Timon & Pumba" + elements = [ + "title", + "textarea", + "TITLE", + "TEXTAREA", + "Title", + "Textarea", + ] + for element in elements: + source = f"<{element}>{content}" + self._run_check( + source, [ + ("starttag", element.lower(), []), + ('data', 'Timon '), + ('entityref', 'amp'), + ('data', ' Pumba') + ], + collector=Collector(convert_charrefs=False), + ) + def test_cdata_with_closing_tags(self): # see issue #13358 # make sure that HTMLParser calls handle_data only once for each CDATA. @@ -473,7 +524,7 @@ def test_slashes_in_starttag(self): ('starttag', 'a', [('foo', None), ('=', None), ('bar', None)]) ] self._run_check(html, expected) - #see issue #14538 + # see issue #14538 html = ('' '') expected = [