From 86c6f805ae8fcfd65cba588287854ea7690000da Mon Sep 17 00:00:00 2001 From: "aelaguiz@gmail.com" Date: Thu, 25 Dec 2014 14:50:24 -0600 Subject: [PATCH 1/9] WIP - Jinja parsing --- .gitignore | 2 + html5lib/constants.py | 5 ++- html5lib/html5parser.py | 50 ++++++++++++++++++++--- html5lib/tests/test_jinja.py | 77 ++++++++++++++++++++++++++++++++++++ html5lib/tokenizer.py | 47 +++++++++++++++++++++- 5 files changed, 172 insertions(+), 9 deletions(-) create mode 100644 html5lib/tests/test_jinja.py diff --git a/.gitignore b/.gitignore index 73d97fec..ce463d19 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,5 @@ stats.prof # We have no interest in built Sphinx files /doc/_build + +venv diff --git a/html5lib/constants.py b/html5lib/constants.py index 5735d7b6..30071abc 100644 --- a/html5lib/constants.py +++ b/html5lib/constants.py @@ -3093,7 +3093,10 @@ "JinjaVariableEndTag": 12, "JinjaVariable": 13, "JinjaFilter": 14, - "JinjaPipe": 15 + "JinjaPipe": 15, + "JinjaArgumentStartTag": 16, + "JinjaArgumentEndTag": 17, + "JinjaArgument": 18 } tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"], diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 9d836e16..fde79165 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -174,6 +174,9 @@ def mainLoop(self): JinjaVariable = tokenTypes["JinjaVariable"] JinjaPipe = tokenTypes["JinjaPipe"] JinjaFilter = tokenTypes["JinjaFilter"] + JinjaArgumentStartTag = tokenTypes["JinjaArgumentStartTag"] + JinjaArgumentEndTag = tokenTypes["JinjaArgumentEndTag"] + JinjaArgument = tokenTypes["JinjaArgument"] for token in self.normalizedTokens(): new_token = token @@ -190,8 +193,11 @@ def mainLoop(self): new_token = None else: if type in (JinjaVariableStartTag, JinjaVariableEndTag, JinjaVariable, JinjaFilter, JinjaPipe): - log.debug(u"Type is a jinja tag") + log.debug(u"Type is a jinja variable tag") phase = self.phases["inJinjaVariable"] + elif type in (JinjaArgumentStartTag, JinjaArgumentEndTag, JinjaArgument): + log.debug(u"Type is a jinja argument tag") + phase = self.phases["inJinjaArgument"] elif ( len(self.tree.openElements) == 0 or currentNodeNamespace == self.tree.defaultNamespace or @@ -236,6 +242,12 @@ def mainLoop(self): new_token = phase.processJinjaPipe(new_token) elif type == JinjaFilter: new_token = phase.processJinjaFilter(new_token) + elif type == JinjaArgumentStartTag: + new_token = phase.processJinjaArgumentStartTag(new_token) + elif type == JinjaArgumentEndTag: + new_token = phase.processJinjaArgumentEndTag(new_token) + elif type == JinjaArgument: + new_token = phase.processJinjaArgument(new_token) if (type == StartTagToken and token["selfClosing"] and not token["selfClosingAcknowledged"]): @@ -529,6 +541,15 @@ def processJinjaVariableEndTag(self, token): def processJinjaVariable(self, token): pass + def processJinjaArgumentStartTag(self, token): + pass + + def processJinjaArgumentEndTag(self, token): + pass + + def processJinjaArgument(self, token): + pass + def processJinjaPipe(self, token): pass @@ -554,18 +575,18 @@ def processEndTag(self, token): class InJinjaVariablePhase(Phase): def processJinjaVariableStartTag(self, token): log = logging.getLogger('html5lib') - log.debug(u"InJinja: Start Tag") + log.debug(u"InJinjaVariable: Start Tag") self.tree.reconstructActiveFormattingElements() self.tree.insertElement(token) def processJinjaVariableEndTag(self, token): log = logging.getLogger('html5lib') - log.debug(u"InJinja: End Tag {}".format(token["name"])) + log.debug(u"InJinjaVariable: End Tag {}".format(token["name"])) for node in self.tree.openElements[::-1]: - log.debug(u"InJinja: Open tag {} token {}".format(node, token)) + log.debug(u"InJinjaVariable: Open tag {} token {}".format(node, token)) if node.name == token["name"]: self.tree.generateImpliedEndTags(exclude=token["name"]) - log.debug(u"InJinja: Implied end tag {} {}".format(self.tree.openElements[-1].name, token["name"])) + log.debug(u"InJinjaVariable: Implied end tag {} {}".format(self.tree.openElements[-1].name, token["name"])) if self.tree.openElements[-1].name != token["name"]: self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) while self.tree.openElements.pop() != node: @@ -573,7 +594,7 @@ def processJinjaVariableEndTag(self, token): break else: if node.nameTuple in specialElements: - log.debug(u"Nametuple {} in {}".format(node.nameTuple, specialElements)) + log.debug(u"InJinjaVariable Nametuple {} in {}".format(node.nameTuple, specialElements)) self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) break @@ -589,6 +610,22 @@ def processJinjaFilter(self, token): element = self.tree.createElementWithoutNamespace(token) self.tree.openElements[-1].appendChild(element) + class InJinjaArgumentPhase(Phase): + def processJinjaArgumentStartTag(self, token): + log = logging.getLogger('html5lib') + log.debug(u"InJinjaArgument: Start Tag") + + def processJinjaArgumentEndTag(self, token): + log = logging.getLogger('html5lib') + log.debug(u"InJinjaArgument: End Tag {}".format(token["name"])) + + def processJinjaArgument(self, token): + log = logging.getLogger('html5lib') + log.debug(u"InJinjaArgument: Process Jinja Argument {}".format(token["name"])) + + element = self.tree.createElementWithoutNamespace(token) + self.tree.openElements[-1].childNodes[-1].appendChild(element) + class InitialPhase(Phase): def processSpaceCharacters(self, token): pass @@ -2794,6 +2831,7 @@ def processEndTag(self, token): # XXX "inHeadNoscript": InHeadNoScriptPhase, "afterHead": AfterHeadPhase, "inJinjaVariable": InJinjaVariablePhase, + "inJinjaArgument": InJinjaArgumentPhase, "inBody": InBodyPhase, "text": TextPhase, "inTable": InTablePhase, diff --git a/html5lib/tests/test_jinja.py b/html5lib/tests/test_jinja.py new file mode 100644 index 00000000..05413f7b --- /dev/null +++ b/html5lib/tests/test_jinja.py @@ -0,0 +1,77 @@ +import html5lib +import unittest +import logging + +log = logging.getLogger(__name__) + + +def dump(tree, tabs=0): + log.debug(u"{}Tag '{}' - {} children - Value = {}".format( + "".join(["\t" for i in range(tabs)]), tree.tag, len(tree), tree.attrib['value'] if 'value' in tree.attrib else None)) + + for child in tree: + dump(child, tabs + 1) + + +class JinjaTestCase(unittest.TestCase): + def test_var_1(self): + parser = html5lib.HTMLParser(strict=False) + + html_string = """

{{ hi }}

""" + + tree = parser.parseFragment(html_string) + + h1 = tree[0] + self.assertEqual(h1.tag, "{http://www.w3.org/1999/xhtml}h1") + + def test_filter_1(self): + parser = html5lib.HTMLParser(strict=False) + + html_string = """

{{ hi | yo }}

""" + + tree = parser.parseFragment(html_string) + + h1 = tree[0] + self.assertEqual(h1.tag, "{http://www.w3.org/1999/xhtml}h1") + + jt = h1[0] + + hi = jt[0] + pipe1 = jt[1] + yo = jt[2] + + self.assertEqual(hi.tag, 'jinjavariable') + self.assertEqual(hi.attrib['value'], 'hi') + self.assertEqual(pipe1.tag, 'jinjapipe') + self.assertEqual(pipe1.attrib['value'], '|') + self.assertEqual(yo.tag, 'jinjafilter') + self.assertEqual(yo.attrib['value'], 'yo') + + def test_filter_2(self): + parser = html5lib.HTMLParser(strict=False) + + html_string = """

{{ hi | yo("hi") }}

""" + + tree = parser.parseFragment(html_string) + dump(tree) + + h1 = tree[0] + self.assertEqual(h1.tag, "{http://www.w3.org/1999/xhtml}h1") + + jt = h1[0] + + hi = jt[0] + pipe1 = jt[1] + yo = jt[2] + + self.assertEqual(hi.tag, 'jinjavariable') + self.assertEqual(hi.attrib['value'], 'hi') + self.assertEqual(pipe1.tag, 'jinjapipe') + self.assertEqual(pipe1.attrib['value'], '|') + self.assertEqual(yo.tag, 'jinjafilter') + self.assertEqual(yo.attrib['value'], 'yo') + + arg1 = yo[0] + + self.assertEqual(arg1.tag, 'jinjaargument') + self.assertEqual(arg1.attrib['value'], '"hi"') diff --git a/html5lib/tokenizer.py b/html5lib/tokenizer.py index 09e705ff..23ded9fa 100644 --- a/html5lib/tokenizer.py +++ b/html5lib/tokenizer.py @@ -390,8 +390,17 @@ def jinjaVariableState(self): if data == "}": self.state = self.jinjaVariableEndState - #elif data == "(": - #self.state = self.jinjaArgState + elif data == "(": + self.currentToken = { + "type": tokenTypes["JinjaArgumentStartTag"], + "name": u"jinjaargumentstarttag", "data": {}, + "namespace": None, + "selfClosing": False + } + + self.tokenQueue.append(self.currentToken) + + self.state = self.jinjaArgState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-jinja-variable"}) @@ -433,6 +442,40 @@ def jinjaVariableState(self): return True + def jinjaArgState(self): + data = self.stream.char() + + log.debug(u"Arg {}".format(data)) + print "Got data", data + + if data == ")": + self.tokenQueue.append({ + "type": tokenTypes["JinjaArgumentEndTag"], + "name": u"jinjaargumentendtag", "data": [], + "selfClosing": False + }) + self.state = self.jinjaVariableState + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-jinja-argument"}) + self.state = self.dataState + elif data in spaceCharacters: + # Skip spaces + pass + else: + chars = self.stream.charsUntil(frozenset((",", ")"))) + + print "Got chars", chars + + self.currentToken = {"type": tokenTypes["JinjaArgument"], + "name": "jinjaargument", "selfClosing": True, "data": { + "value": data + chars, + "position": self.stream.position(), + }} + self.tokenQueue.append(self.currentToken) + + return True + def rcdataState(self): data = self.stream.char() if data == "&": From 24ecb5bc0ba5269b071060fa79b6468d0c9e1002 Mon Sep 17 00:00:00 2001 From: "aelaguiz@gmail.com" Date: Fri, 26 Dec 2014 06:40:09 -0600 Subject: [PATCH 2/9] Parsing works a lot better --- html5lib/constants.py | 2 +- html5lib/html5parser.py | 47 ++++++++++++-- html5lib/tests/test_jinja.py | 119 ++++++++++++++++++++++++++++++----- html5lib/tokenizer.py | 116 ++++++++++++++++++++++++++-------- 4 files changed, 240 insertions(+), 44 deletions(-) diff --git a/html5lib/constants.py b/html5lib/constants.py index 30071abc..4f5e7974 100644 --- a/html5lib/constants.py +++ b/html5lib/constants.py @@ -3088,7 +3088,7 @@ "ParseError": 7, "JinjaStatementStartTag": 8, "JinjaStatementEndTag": 9, - "JinjaStatementTag": 10, + "JinjaStatement": 10, "JinjaVariableStartTag": 11, "JinjaVariableEndTag": 12, "JinjaVariable": 13, diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index fde79165..e7c91503 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -168,7 +168,7 @@ def mainLoop(self): ParseErrorToken = tokenTypes["ParseError"] JinjaStatementStartTag = tokenTypes["JinjaStatementStartTag"] JinjaStatementEndTag = tokenTypes["JinjaStatementEndTag"] - JinjaStatementTag = tokenTypes["JinjaStatementTag"] + JinjaStatement = tokenTypes["JinjaStatement"] JinjaVariableStartTag = tokenTypes["JinjaVariableStartTag"] JinjaVariableEndTag = tokenTypes["JinjaVariableEndTag"] JinjaVariable = tokenTypes["JinjaVariable"] @@ -195,6 +195,9 @@ def mainLoop(self): if type in (JinjaVariableStartTag, JinjaVariableEndTag, JinjaVariable, JinjaFilter, JinjaPipe): log.debug(u"Type is a jinja variable tag") phase = self.phases["inJinjaVariable"] + elif type in (JinjaStatementStartTag, JinjaStatementEndTag, JinjaStatement): + log.debug(u"Type is a jinja statement tag") + phase = self.phases["inJinjaStatement"] elif type in (JinjaArgumentStartTag, JinjaArgumentEndTag, JinjaArgument): log.debug(u"Type is a jinja argument tag") phase = self.phases["inJinjaArgument"] @@ -230,8 +233,8 @@ def mainLoop(self): new_token = phase.processJinjaStatementStartTag(new_token) elif type == JinjaStatementEndTag: new_token = phase.processJinjaStatementEndTag(new_token) - elif type == JinjaStatementTag: - new_token = phase.processJinjaStatementTag(new_token) + elif type == JinjaStatement: + new_token = phase.processJinjaStatement(new_token) elif type == JinjaVariableStartTag: new_token = phase.processJinjaVariableStartTag(new_token) elif type == JinjaVariableEndTag: @@ -518,6 +521,8 @@ def processDoctype(self, token): self.parser.parseError("unexpected-doctype") def processCharacters(self, token): + log = logging.getLogger(u"html5lib") + log.debug(u"Inserting text {}.format(token)") self.tree.insertText(token["data"]) def processSpaceCharacters(self, token): @@ -529,7 +534,7 @@ def processJinjaStatementStartTag(self, token): def processJinjaStatementEndTag(self, token): pass - def processJinjaStatementTag(self, token): + def processJinjaStatement(self, token): pass def processJinjaVariableStartTag(self, token): @@ -610,6 +615,36 @@ def processJinjaFilter(self, token): element = self.tree.createElementWithoutNamespace(token) self.tree.openElements[-1].appendChild(element) + class InJinjaStatementPhase(Phase): + def processJinjaStatementStartTag(self, token): + log = logging.getLogger('html5lib') + log.debug(u"InJinjaStatement: Start Tag") + self.tree.reconstructActiveFormattingElements() + self.tree.insertElement(token) + + def processJinjaStatementEndTag(self, token): + log = logging.getLogger('html5lib') + log.debug(u"InJinjaStatement: End Tag {}".format(token["name"])) + for node in self.tree.openElements[::-1]: + log.debug(u"InJinjaStatement: Open tag {} token {}".format(node, token)) + if node.name == token["name"]: + self.tree.generateImpliedEndTags(exclude=token["name"]) + log.debug(u"InJinjaStatement: Implied end tag {} {}".format(self.tree.openElements[-1].name, token["name"])) + if self.tree.openElements[-1].name != token["name"]: + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + while self.tree.openElements.pop() != node: + pass + break + else: + if node.nameTuple in specialElements: + log.debug(u"InJinjaStatement Nametuple {} in {}".format(node.nameTuple, specialElements)) + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + break + + def processJinjaStatement(self, token): + element = self.tree.createElementWithoutNamespace(token) + self.tree.openElements[-1].appendChild(element) + class InJinjaArgumentPhase(Phase): def processJinjaArgumentStartTag(self, token): log = logging.getLogger('html5lib') @@ -1136,6 +1171,9 @@ def processSpaceCharactersDropNewline(self, token): self.tree.insertText(data) def processCharacters(self, token): + import logging + log = logging.getLogger(u"html5lib") + log.debug(u"In Body phase processing Characters {}".format(token)) if token["data"] == "\u0000": # The tokenizer should always emit null on its own return @@ -2831,6 +2869,7 @@ def processEndTag(self, token): # XXX "inHeadNoscript": InHeadNoScriptPhase, "afterHead": AfterHeadPhase, "inJinjaVariable": InJinjaVariablePhase, + "inJinjaStatement": InJinjaStatementPhase, "inJinjaArgument": InJinjaArgumentPhase, "inBody": InBodyPhase, "text": TextPhase, diff --git a/html5lib/tests/test_jinja.py b/html5lib/tests/test_jinja.py index 05413f7b..b3369fe0 100644 --- a/html5lib/tests/test_jinja.py +++ b/html5lib/tests/test_jinja.py @@ -6,33 +6,48 @@ def dump(tree, tabs=0): - log.debug(u"{}Tag '{}' - {} children - Value = {}".format( - "".join(["\t" for i in range(tabs)]), tree.tag, len(tree), tree.attrib['value'] if 'value' in tree.attrib else None)) + log.debug(u"{}Tag '{}' - {} children - Value = {} - Text = {}".format( + "".join(["\t" for i in range(tabs)]), tree.tag, len(tree), tree.attrib['value'] if 'value' in tree.attrib else None, tree.text)) for child in tree: dump(child, tabs + 1) class JinjaTestCase(unittest.TestCase): - def test_var_1(self): - parser = html5lib.HTMLParser(strict=False) + def setUp(self): + self.parser = html5lib.HTMLParser(strict=False, namespaceHTMLElements=False) + def test_var_1(self): html_string = """

{{ hi }}

""" - tree = parser.parseFragment(html_string) + tree = self.parser.parseFragment(html_string) h1 = tree[0] - self.assertEqual(h1.tag, "{http://www.w3.org/1999/xhtml}h1") + jt = h1[0] + var1 = jt[0] + self.assertEqual(h1.tag, "h1") + self.assertEqual(var1.tag, 'jinjavariable') + self.assertEqual(var1.attrib['value'], 'hi') - def test_filter_1(self): - parser = html5lib.HTMLParser(strict=False) + def test_var_2(self): + html_string = """

{{ a.b }}

""" + + tree = self.parser.parseFragment(html_string) + + h1 = tree[0] + jt = h1[0] + var1 = jt[0] + self.assertEqual(h1.tag, "h1") + self.assertEqual(var1.tag, 'jinjavariable') + self.assertEqual(var1.attrib['value'], 'a.b') + def test_filter_1(self): html_string = """

{{ hi | yo }}

""" - tree = parser.parseFragment(html_string) + tree = self.parser.parseFragment(html_string) h1 = tree[0] - self.assertEqual(h1.tag, "{http://www.w3.org/1999/xhtml}h1") + self.assertEqual(h1.tag, "h1") jt = h1[0] @@ -48,15 +63,40 @@ def test_filter_1(self): self.assertEqual(yo.attrib['value'], 'yo') def test_filter_2(self): - parser = html5lib.HTMLParser(strict=False) - html_string = """

{{ hi | yo("hi") }}

""" - tree = parser.parseFragment(html_string) + tree = self.parser.parseFragment(html_string) + dump(tree) + + h1 = tree[0] + self.assertEqual(h1.tag, "h1") + + jt = h1[0] + + hi = jt[0] + pipe1 = jt[1] + yo = jt[2] + + self.assertEqual(hi.tag, 'jinjavariable') + self.assertEqual(hi.attrib['value'], 'hi') + self.assertEqual(pipe1.tag, 'jinjapipe') + self.assertEqual(pipe1.attrib['value'], '|') + self.assertEqual(yo.tag, 'jinjafilter') + self.assertEqual(yo.attrib['value'], 'yo') + + arg1 = yo[0] + + self.assertEqual(arg1.tag, 'jinjaargument') + self.assertEqual(arg1.attrib['value'], '"hi"') + + def test_filter_3(self): + html_string = """

{{ hi | yo("hi", "mike") }}

""" + + tree = self.parser.parseFragment(html_string) dump(tree) h1 = tree[0] - self.assertEqual(h1.tag, "{http://www.w3.org/1999/xhtml}h1") + self.assertEqual(h1.tag, "h1") jt = h1[0] @@ -72,6 +112,57 @@ def test_filter_2(self): self.assertEqual(yo.attrib['value'], 'yo') arg1 = yo[0] + arg2 = yo[1] self.assertEqual(arg1.tag, 'jinjaargument') self.assertEqual(arg1.attrib['value'], '"hi"') + self.assertEqual(arg2.tag, 'jinjaargument') + self.assertEqual(arg2.attrib['value'], '"mike"') + + def test_jinja_block(self): + html_string = """ + {% block title %}Hi{% endblock %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + block = tree[0] + + self.assertEqual(block.tag, 'jinjablock') + self.assertEqual(block.text, 'Hi') + + def test_jinja_block_in_title(self): + html_string = """ + {% block title %}{% endblock %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + title = tree[0] + block = title[0] + + self.assertEqual(title.tag, 'title') + self.assertEqual(block.tag, 'jinjablock') + self.assertEqual(block.attrib['value'], 'title') + + def test_jinja_for(self): + html_string = """ + {% for a in b %} + {{ a }} + {% endfor %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + block = tree[0] + var = block[0] + var1 = var[0] + + self.assertEqual(block.tag, 'jinjafor') + self.assertEqual(block.attrib['value'], 'a in b') + self.assertEqual(var.tag, 'jinjavariabletag') + self.assertEqual(var1.tag, 'jinjavariable') + self.assertEqual(var1.attrib['value'], 'a') diff --git a/html5lib/tokenizer.py b/html5lib/tokenizer.py index 23ded9fa..4c2553f4 100644 --- a/html5lib/tokenizer.py +++ b/html5lib/tokenizer.py @@ -256,11 +256,14 @@ def emitCurrentToken(self): # Below are the various tokenizer states worked out. def dataState(self): data = self.stream.char() + log.debug(u"Tokenizer DataState {}".format(data)) + if data == "&": self.state = self.entityDataState elif data == "<": self.state = self.tagOpenState elif data == "{": + self.prevState = self.state self.state = self.jinjaOpenState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], @@ -280,7 +283,7 @@ def dataState(self): # have already been appended to lastFourChars and will have broken # any sequences else: - chars = self.stream.charsUntil(("&", "<", "\u0000")) + chars = self.stream.charsUntil(("&", "<", "\u0000", "{")) self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data + chars}) return True @@ -305,16 +308,65 @@ def jinjaOpenState(self): self.state = self.jinjaVariableState elif data == "%": - self.tokenQueue.append({ - "type": tokenTypes["JinjaStatementStartTag"], - "name": "{%", "data": {}, - "namespace": None, - "selfClosing": False - }) + self.state = self.jinjaStatementStartState + else: + self.stream.unget(data) + self.stream.unget("{") + chars = self.stream.charsUntil(("&", "<", "\u0000", "{")) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + chars}) + + return True + + def jinjaStatementStartState(self): + data = self.stream.char() + + if data in spaceCharacters: + pass + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-jinja-statement"}) + self.state = self.prevState + else: + block_type = data + self.stream.charsUntil(frozenset(("%")) | spaceCharacters) + + block_definition = self.stream.charsUntil(frozenset(("%", "\u0000"))) + + block_definition = block_definition.strip(" \t") + + if block_type.startswith("end"): + block_type = block_type.replace("end", "") + + self.tokenQueue.append({ + "type": tokenTypes["JinjaStatementEndTag"], + 'name': u"jinja{}".format(block_type.lower()), + "data": { + "position": self.stream.position() + }, + "selfClosing": False + }) + else: + self.tokenQueue.append({ + "type": tokenTypes["JinjaStatementStartTag"], + 'name': u"jinja{}".format(block_type.lower()), + "data": { + "value": block_definition, + "position": self.stream.position() + }, + "selfClosing": False + }) + + data = self.stream.char() + if data != '%': + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "no-close-of-jinja-statement"}) + data = self.stream.char() + if data != '}': + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "no-close-of-jinja-statement"}) - self.state = self.jinjaStatementState + self.state = self.dataState - #self.state = self.dataState return True def jinjaStatementEndState(self): @@ -324,15 +376,15 @@ def jinjaStatementEndState(self): if data == "}": self.tokenQueue.append({ "type": tokenTypes["JinjaStatementEndTag"], - "name": "%}", "data": [], + "name": "jinjastatementend", "data": [], "selfClosing": False }) - self.state = self.dataState + self.state = self.prevState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-jinja-statement-closing-tag-but-got-eof", "datavars": {"data": data}}) - self.state = self.dataState + self.state = self.prevState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-jinja-statement-closing-tag-but-got-char", @@ -340,7 +392,6 @@ def jinjaStatementEndState(self): self.stream.unget(data) self.state = self.bogusCommentState - #self.state = self.dataState return True def jinjaVariableEndState(self): @@ -353,12 +404,12 @@ def jinjaVariableEndState(self): "name": u"jinjavariabletag", "data": [], "selfClosing": False }) - self.state = self.dataState + self.state = self.prevState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-jinja-variable-closing-tag-but-got-eof", "datavars": {"data": data}}) - self.state = self.dataState + self.state = self.prevState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "expected-jinja-variable-closing-tag-but-got-char", @@ -366,7 +417,6 @@ def jinjaVariableEndState(self): self.stream.unget(data) self.state = self.bogusCommentState - #self.state = self.dataState return True def jinjaStatementState(self): @@ -377,11 +427,18 @@ def jinjaStatementState(self): elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-jinja-statement"}) - self.state = self.dataState + self.state = self.prevState else: chars = self.stream.charsUntil(("%", "\u0000")) - self.tokenQueue.append({"type": tokenTypes["JinjaStatementTag"], "data": - data + chars}) + self.tokenQueue.append({ + "type": tokenTypes["JinjaStatement"], + 'name': "jinjastatement", + "data": { + "value": data + chars, + "position": self.stream.position() + }, + "selfClosing": False + }) return True @@ -404,7 +461,7 @@ def jinjaVariableState(self): elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-jinja-variable"}) - self.state = self.dataState + self.state = self.prevState elif data in spaceCharacters: # Skip spaces pass @@ -446,7 +503,6 @@ def jinjaArgState(self): data = self.stream.char() log.debug(u"Arg {}".format(data)) - print "Got data", data if data == ")": self.tokenQueue.append({ @@ -458,15 +514,13 @@ def jinjaArgState(self): elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-jinja-argument"}) - self.state = self.dataState - elif data in spaceCharacters: + self.state = self.prevState + elif data in spaceCharacters or data in [',']: # Skip spaces pass else: chars = self.stream.charsUntil(frozenset((",", ")"))) - print "Got chars", chars - self.currentToken = {"type": tokenTypes["JinjaArgument"], "name": "jinjaargument", "selfClosing": True, "data": { "value": data + chars, @@ -482,6 +536,9 @@ def rcdataState(self): self.state = self.characterReferenceInRcdata elif data == "<": self.state = self.rcdataLessThanSignState + elif data == "{": + self.prevState = self.state + self.state = self.jinjaOpenState elif data == EOF: # Tokenization ends. return False @@ -514,6 +571,9 @@ def rawtextState(self): data = self.stream.char() if data == "<": self.state = self.rawtextLessThanSignState + elif data == "{": + self.prevState = self.state + self.state = self.jinjaOpenState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) @@ -532,6 +592,9 @@ def scriptDataState(self): data = self.stream.char() if data == "<": self.state = self.scriptDataLessThanSignState + elif data == "{": + self.prevState = self.state + self.state = self.jinjaOpenState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) @@ -551,6 +614,9 @@ def plaintextState(self): if data == EOF: # Tokenization ends. return False + elif data == "{": + self.prevState = self.state + self.state = self.jinjaOpenState elif data == "\u0000": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) From c12b08f036bc7020ac9b2d89d7571c56226f863f Mon Sep 17 00:00:00 2001 From: "aelaguiz@gmail.com" Date: Fri, 26 Dec 2014 12:00:57 -0600 Subject: [PATCH 3/9] Refactored unit tests --- html5lib/tests/test_jinja.py | 280 +++++++++++++++++++++++------------ 1 file changed, 189 insertions(+), 91 deletions(-) diff --git a/html5lib/tests/test_jinja.py b/html5lib/tests/test_jinja.py index b3369fe0..1bd53d67 100644 --- a/html5lib/tests/test_jinja.py +++ b/html5lib/tests/test_jinja.py @@ -15,52 +15,61 @@ def dump(tree, tabs=0): class JinjaTestCase(unittest.TestCase): def setUp(self): - self.parser = html5lib.HTMLParser(strict=False, namespaceHTMLElements=False) + self.parser = html5lib.HTMLParser(strict=True, namespaceHTMLElements=False) def test_var_1(self): html_string = """

{{ hi }}

""" tree = self.parser.parseFragment(html_string) - h1 = tree[0] - jt = h1[0] - var1 = jt[0] - self.assertEqual(h1.tag, "h1") - self.assertEqual(var1.tag, 'jinjavariable') - self.assertEqual(var1.attrib['value'], 'hi') + self.assertTree(tree, [{ + 'tag': 'h1', + 'children': [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'hi' + }] + }] + }]) def test_var_2(self): html_string = """

{{ a.b }}

""" tree = self.parser.parseFragment(html_string) - h1 = tree[0] - jt = h1[0] - var1 = jt[0] - self.assertEqual(h1.tag, "h1") - self.assertEqual(var1.tag, 'jinjavariable') - self.assertEqual(var1.attrib['value'], 'a.b') + self.assertTree(tree, [{ + 'tag': 'h1', + 'children': [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'a.b' + }] + }] + }]) def test_filter_1(self): html_string = """

{{ hi | yo }}

""" tree = self.parser.parseFragment(html_string) - h1 = tree[0] - self.assertEqual(h1.tag, "h1") - - jt = h1[0] - - hi = jt[0] - pipe1 = jt[1] - yo = jt[2] - - self.assertEqual(hi.tag, 'jinjavariable') - self.assertEqual(hi.attrib['value'], 'hi') - self.assertEqual(pipe1.tag, 'jinjapipe') - self.assertEqual(pipe1.attrib['value'], '|') - self.assertEqual(yo.tag, 'jinjafilter') - self.assertEqual(yo.attrib['value'], 'yo') + self.assertTree(tree, [{ + 'tag': 'h1', + 'children': [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'hi' + }, { + 'tag': 'jinjapipe', + 'value': '|' + }, { + 'tag': 'jinjafilter', + 'value': 'yo' + }] + }] + }]) def test_filter_2(self): html_string = """

{{ hi | yo("hi") }}

""" @@ -68,26 +77,26 @@ def test_filter_2(self): tree = self.parser.parseFragment(html_string) dump(tree) - h1 = tree[0] - self.assertEqual(h1.tag, "h1") - - jt = h1[0] - - hi = jt[0] - pipe1 = jt[1] - yo = jt[2] - - self.assertEqual(hi.tag, 'jinjavariable') - self.assertEqual(hi.attrib['value'], 'hi') - self.assertEqual(pipe1.tag, 'jinjapipe') - self.assertEqual(pipe1.attrib['value'], '|') - self.assertEqual(yo.tag, 'jinjafilter') - self.assertEqual(yo.attrib['value'], 'yo') - - arg1 = yo[0] - - self.assertEqual(arg1.tag, 'jinjaargument') - self.assertEqual(arg1.attrib['value'], '"hi"') + self.assertTree(tree, [{ + 'tag': 'h1', + 'children': [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'hi' + }, { + 'tag': 'jinjapipe', + 'value': '|' + }, { + 'tag': 'jinjafilter', + 'value': 'yo', + 'children': [{ + 'tag': 'jinjaargument', + 'value': '"hi"' + }] + }] + }] + }]) def test_filter_3(self): html_string = """

{{ hi | yo("hi", "mike") }}

""" @@ -95,29 +104,29 @@ def test_filter_3(self): tree = self.parser.parseFragment(html_string) dump(tree) - h1 = tree[0] - self.assertEqual(h1.tag, "h1") - - jt = h1[0] - - hi = jt[0] - pipe1 = jt[1] - yo = jt[2] - - self.assertEqual(hi.tag, 'jinjavariable') - self.assertEqual(hi.attrib['value'], 'hi') - self.assertEqual(pipe1.tag, 'jinjapipe') - self.assertEqual(pipe1.attrib['value'], '|') - self.assertEqual(yo.tag, 'jinjafilter') - self.assertEqual(yo.attrib['value'], 'yo') - - arg1 = yo[0] - arg2 = yo[1] - - self.assertEqual(arg1.tag, 'jinjaargument') - self.assertEqual(arg1.attrib['value'], '"hi"') - self.assertEqual(arg2.tag, 'jinjaargument') - self.assertEqual(arg2.attrib['value'], '"mike"') + self.assertTree(tree, [{ + 'tag': 'h1', + 'children': [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'hi' + }, { + 'tag': 'jinjapipe', + 'value': '|' + }, { + 'tag': 'jinjafilter', + 'value': 'yo', + 'children': [{ + 'tag': 'jinjaargument', + 'value': '"hi"' + }, { + 'tag': 'jinjaargument', + 'value': '"mike"' + }] + }] + }] + }]) def test_jinja_block(self): html_string = """ @@ -125,12 +134,11 @@ def test_jinja_block(self): """ tree = self.parser.parseFragment(html_string) - dump(tree) - - block = tree[0] - self.assertEqual(block.tag, 'jinjablock') - self.assertEqual(block.text, 'Hi') + self.assertTree(tree, [{ + 'tag': 'jinjablock', + 'text': 'Hi' + }]) def test_jinja_block_in_title(self): html_string = """ @@ -138,14 +146,14 @@ def test_jinja_block_in_title(self): """ tree = self.parser.parseFragment(html_string) - dump(tree) - - title = tree[0] - block = title[0] - self.assertEqual(title.tag, 'title') - self.assertEqual(block.tag, 'jinjablock') - self.assertEqual(block.attrib['value'], 'title') + self.assertTree(tree, [{ + 'tag': 'title', + 'children': [{ + 'tag': 'jinjablock', + 'value': 'title' + }] + }]) def test_jinja_for(self): html_string = """ @@ -155,14 +163,104 @@ def test_jinja_for(self): """ tree = self.parser.parseFragment(html_string) + + self.assertTree(tree, [{ + 'tag': 'jinjafor', + 'value': 'a in b', + 'children': [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'a' + }] + }] + }]) + + def test_complete_doc(self): + html_string = """ + + + + My Webpage + + + + +

My Webpage

+ {{ a_variable }} + + + """ + + tree = self.parser.parse(html_string) dump(tree) + self.assertTree(tree, [{ + 'tag': 'head', + 'children': [{ + 'tag': 'title', + 'text': 'My Webpage' + }] + }, { + 'tag': 'body', + 'children': [{ + 'tag': 'ul', + 'children': [{ + 'tag': 'jinjafor', + 'value': 'item in navigation', + 'children': [{ + 'tag': 'li', + 'children': [{ + 'tag': 'a', + 'children': [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'item.caption' + }] + }] + }] + }] + }] + }, { + 'tag': 'h1', + 'text': 'My Webpage' + }, { + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'a_variable' + }] + }] + }]) + + def test_jinja_if(self): + html_string = """ + {% if True %}yay{% endif %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjaif', + 'text': 'yay' + }]) + + def assertTree(self, root, spec): + self.assertEqual(len(root), len(spec)) + + for child, spec_child in zip(root, spec): + self.assertEqual(child.tag, spec_child['tag']) + + if 'text' in spec_child: + self.assertEqual(child.text, spec_child['text']) - block = tree[0] - var = block[0] - var1 = var[0] + if 'value' in spec_child: + self.assertEqual(child.attrib['value'], spec_child['value']) - self.assertEqual(block.tag, 'jinjafor') - self.assertEqual(block.attrib['value'], 'a in b') - self.assertEqual(var.tag, 'jinjavariabletag') - self.assertEqual(var1.tag, 'jinjavariable') - self.assertEqual(var1.attrib['value'], 'a') + if 'children' in spec_child: + self.assertTree(child, spec_child['children']) From 2ffcf5c1a5705fc761ff5d8f25a689fe19923801 Mon Sep 17 00:00:00 2001 From: "aelaguiz@gmail.com" Date: Fri, 26 Dec 2014 15:08:46 -0600 Subject: [PATCH 4/9] Fixed tests, added support for import extend & include --- html5lib/constants.py | 5 +- html5lib/html5parser.py | 24 +++++- html5lib/tests/test_jinja.py | 146 +++++++++++++++++++++++++++------ html5lib/tests/test_parser2.py | 2 +- html5lib/tokenizer.py | 49 +++++++++-- html5lib/treebuilders/etree.py | 1 - 6 files changed, 189 insertions(+), 38 deletions(-) diff --git a/html5lib/constants.py b/html5lib/constants.py index 4f5e7974..d92079a3 100644 --- a/html5lib/constants.py +++ b/html5lib/constants.py @@ -3096,7 +3096,10 @@ "JinjaPipe": 15, "JinjaArgumentStartTag": 16, "JinjaArgumentEndTag": 17, - "JinjaArgument": 18 + "JinjaArgument": 18, + "JinjaExtendTag": 19, + "JinjaIncludeTag": 20, + "JinjaImportTag": 21 } tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"], diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index e7c91503..09527cc6 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -177,6 +177,9 @@ def mainLoop(self): JinjaArgumentStartTag = tokenTypes["JinjaArgumentStartTag"] JinjaArgumentEndTag = tokenTypes["JinjaArgumentEndTag"] JinjaArgument = tokenTypes["JinjaArgument"] + JinjaExtendTag = tokenTypes["JinjaExtendTag"] + JinjaIncludeTag = tokenTypes["JinjaIncludeTag"] + JinjaImportTag = tokenTypes["JinjaImportTag"] for token in self.normalizedTokens(): new_token = token @@ -193,13 +196,10 @@ def mainLoop(self): new_token = None else: if type in (JinjaVariableStartTag, JinjaVariableEndTag, JinjaVariable, JinjaFilter, JinjaPipe): - log.debug(u"Type is a jinja variable tag") phase = self.phases["inJinjaVariable"] elif type in (JinjaStatementStartTag, JinjaStatementEndTag, JinjaStatement): - log.debug(u"Type is a jinja statement tag") phase = self.phases["inJinjaStatement"] elif type in (JinjaArgumentStartTag, JinjaArgumentEndTag, JinjaArgument): - log.debug(u"Type is a jinja argument tag") phase = self.phases["inJinjaArgument"] elif ( len(self.tree.openElements) == 0 or @@ -251,6 +251,12 @@ def mainLoop(self): new_token = phase.processJinjaArgumentEndTag(new_token) elif type == JinjaArgument: new_token = phase.processJinjaArgument(new_token) + elif type == JinjaExtendTag: + new_token = phase.processJinjaExtendTag(new_token) + elif type == JinjaIncludeTag: + new_token = phase.processJinjaIncludeTag(new_token) + elif type == JinjaImportTag: + new_token = phase.processJinjaImportTag(new_token) if (type == StartTagToken and token["selfClosing"] and not token["selfClosingAcknowledged"]): @@ -546,6 +552,18 @@ def processJinjaVariableEndTag(self, token): def processJinjaVariable(self, token): pass + def processJinjaExtendTag(self, token): + element = self.tree.createElementWithoutNamespace(token) + self.tree.openElements[-1].appendChild(element) + + def processJinjaIncludeTag(self, token): + element = self.tree.createElementWithoutNamespace(token) + self.tree.openElements[-1].appendChild(element) + + def processJinjaImportTag(self, token): + element = self.tree.createElementWithoutNamespace(token) + self.tree.openElements[-1].appendChild(element) + def processJinjaArgumentStartTag(self, token): pass diff --git a/html5lib/tests/test_jinja.py b/html5lib/tests/test_jinja.py index 1bd53d67..b362057f 100644 --- a/html5lib/tests/test_jinja.py +++ b/html5lib/tests/test_jinja.py @@ -15,7 +15,7 @@ def dump(tree, tabs=0): class JinjaTestCase(unittest.TestCase): def setUp(self): - self.parser = html5lib.HTMLParser(strict=True, namespaceHTMLElements=False) + self.parser = html5lib.HTMLParser(strict=True, namespaceHTMLElements=False, tree=html5lib.treebuilders.getTreeBuilder("etree", fullTree=True)) def test_var_1(self): html_string = """

{{ hi }}

""" @@ -197,42 +197,48 @@ def test_complete_doc(self): """ tree = self.parser.parse(html_string) - dump(tree) + self.assertTree(tree, [{ - 'tag': 'head', - 'children': [{ - 'tag': 'title', - 'text': 'My Webpage' - }] + 'tag': '', + 'text': 'html' }, { - 'tag': 'body', + 'tag': 'html', 'children': [{ - 'tag': 'ul', + 'tag': 'head', 'children': [{ - 'tag': 'jinjafor', - 'value': 'item in navigation', + 'tag': 'title', + 'text': 'My Webpage' + }] + }, { + 'tag': 'body', + 'children': [{ + 'tag': 'ul', 'children': [{ - 'tag': 'li', + 'tag': 'jinjafor', + 'value': 'item in navigation', 'children': [{ - 'tag': 'a', + 'tag': 'li', 'children': [{ - 'tag': 'jinjavariabletag', + 'tag': 'a', 'children': [{ - 'tag': 'jinjavariable', - 'value': 'item.caption' + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'item.caption' + }] }] }] }] }] - }] - }, { - 'tag': 'h1', - 'text': 'My Webpage' - }, { - 'tag': 'jinjavariabletag', - 'children': [{ - 'tag': 'jinjavariable', - 'value': 'a_variable' + }, { + 'tag': 'h1', + 'text': 'My Webpage' + }, { + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'a_variable' + }] }] }] }]) @@ -250,6 +256,89 @@ def test_jinja_if(self): 'text': 'yay' }]) + def test_jinja_if_lstrip(self): + html_string = """ + {%+ if True %}yay{% endif %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjaif', + 'text': 'yay', + 'attrs': { + 'lstrip': False + } + }]) + + def test_jinja_strip_blocks(self): + html_string = """ + {% for item in seq -%} + {{ item }} + {%- endfor %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjafor', + 'attrs': { + 'rstrip': True + }, + 'children': [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': 'item' + }] + }] + }]) + + def test_jinja_extend(self): + html_string = """ + {% extends "base.html" %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjaextends', + 'value': '"base.html"' + }]) + + def test_jinja_include(self): + html_string = """ + {% include ['special_sidebar.html', 'sidebar.html'] ignore missing %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjainclude', + 'value': "['special_sidebar.html', 'sidebar.html'] ignore missing" + }]) + + def test_jinja_import(self): + html_string = """ + {% import 'forms.html' as forms %} + {% from 'forms.html' import input as input_field, textarea %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjaimport', + 'value': "'forms.html' as forms" + }, { + 'tag': 'jinjaimport', + 'value': "'forms.html' import input as input_field, textarea" + }]) + def assertTree(self, root, spec): self.assertEqual(len(root), len(spec)) @@ -264,3 +353,10 @@ def assertTree(self, root, spec): if 'children' in spec_child: self.assertTree(child, spec_child['children']) + else: + self.assertEqual(len(child), 0) + + if 'attrs' in spec_child: + for k, v in spec_child['attrs'].iteritems(): + self.assertIn(k, child.attrib) + self.assertEqual(v, child.attrib[k]) diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py index 20bbdf31..c90ca678 100644 --- a/html5lib/tests/test_parser2.py +++ b/html5lib/tests/test_parser2.py @@ -40,7 +40,7 @@ def test_namespace_html_elements_1_dom(self): def test_namespace_html_elements_0_etree(self): parser = html5parser.HTMLParser(namespaceHTMLElements=True) doc = parser.parse("") - self.assertTrue(list(doc)[0].tag == "{%s}html" % (namespaces["html"],)) + self.assertEqual(list(doc)[0].tag, "{%s}html" % (namespaces["html"],)) def test_namespace_html_elements_1_etree(self): parser = html5parser.HTMLParser(namespaceHTMLElements=False) diff --git a/html5lib/tokenizer.py b/html5lib/tokenizer.py index 4c2553f4..9e6311fa 100644 --- a/html5lib/tokenizer.py +++ b/html5lib/tokenizer.py @@ -328,31 +328,66 @@ def jinjaStatementStartState(self): "eof-in-jinja-statement"}) self.state = self.prevState else: + attrs = {} + + if data in ['-', '+']: + attrs['lstrip'] = False + + data = self.stream.char() + while data in spaceCharacters: + data = self.stream.char() + block_type = data + self.stream.charsUntil(frozenset(("%")) | spaceCharacters) block_definition = self.stream.charsUntil(frozenset(("%", "\u0000"))) block_definition = block_definition.strip(" \t") + if block_definition and block_definition[-1] == '-': + attrs['rstrip'] = True + block_definition = block_definition[:-1].rstrip() + + attrs.update({ + "value": block_definition, + "position": self.stream.position() + }) + if block_type.startswith("end"): block_type = block_type.replace("end", "") + attrs['value'] = block_type.lower() self.tokenQueue.append({ "type": tokenTypes["JinjaStatementEndTag"], 'name': u"jinja{}".format(block_type.lower()), - "data": { - "position": self.stream.position() - }, + "data": attrs, "selfClosing": False }) + elif block_type == "extends": + self.tokenQueue.append({ + "type": tokenTypes["JinjaExtendTag"], + 'name': u"jinja{}".format(block_type.lower()), + "data": attrs, + "selfClosing": True + }) + elif block_type == "include": + self.tokenQueue.append({ + "type": tokenTypes["JinjaIncludeTag"], + 'name': u"jinja{}".format(block_type.lower()), + "data": attrs, + "selfClosing": True + }) + elif block_type in ["import", "from"]: + self.tokenQueue.append({ + "type": tokenTypes["JinjaImportTag"], + 'name': u"jinjaimport", + "data": attrs, + "selfClosing": True + }) else: self.tokenQueue.append({ "type": tokenTypes["JinjaStatementStartTag"], 'name': u"jinja{}".format(block_type.lower()), - "data": { - "value": block_definition, - "position": self.stream.position() - }, + "data": attrs, "selfClosing": False }) diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py index 03d51275..5d68fcd8 100644 --- a/html5lib/treebuilders/etree.py +++ b/html5lib/treebuilders/etree.py @@ -63,7 +63,6 @@ def _getAttributes(self): return self._element.attrib def _setAttributes(self, attributes): - log.debug(u"Attributes {}".format(attributes)) # Delete existing attributes first # XXX - there may be a better way to do this... for key in list(self._element.attrib.keys()): From c7461f968f2bf38d3e01fce117ff0aad76d3b327 Mon Sep 17 00:00:00 2001 From: "aelaguiz@gmail.com" Date: Fri, 26 Dec 2014 15:38:09 -0600 Subject: [PATCH 5/9] Works with inline if --- html5lib/tests/test_jinja.py | 30 ++++++++++++++++++++++++++++++ html5lib/tokenizer.py | 11 +++++++---- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/html5lib/tests/test_jinja.py b/html5lib/tests/test_jinja.py index b362057f..58278a50 100644 --- a/html5lib/tests/test_jinja.py +++ b/html5lib/tests/test_jinja.py @@ -21,6 +21,7 @@ def test_var_1(self): html_string = """

{{ hi }}

""" tree = self.parser.parseFragment(html_string) + dump(tree) self.assertTree(tree, [{ 'tag': 'h1', @@ -37,6 +38,7 @@ def test_var_2(self): html_string = """

{{ a.b }}

""" tree = self.parser.parseFragment(html_string) + dump(tree) self.assertTree(tree, [{ 'tag': 'h1', @@ -339,6 +341,34 @@ def test_jinja_import(self): 'value': "'forms.html' import input as input_field, textarea" }]) + def test_inline_if(self): + html_string = """ + {{ '[%s]' % page.title if page.title }} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': "'[%s]'" + }, { + 'tag': 'jinjavariable', + 'value': "%" + }, { + 'tag': 'jinjavariable', + 'value': "page.title" + }, { + 'tag': 'jinjavariable', + 'value': "if" + }, { + 'tag': 'jinjavariable', + 'value': "page.title" + }] + }]) + def assertTree(self, root, spec): self.assertEqual(len(root), len(spec)) diff --git a/html5lib/tokenizer.py b/html5lib/tokenizer.py index 9e6311fa..98ae4816 100644 --- a/html5lib/tokenizer.py +++ b/html5lib/tokenizer.py @@ -233,7 +233,7 @@ def processEntityInAttribute(self, allowedChar): """ self.consumeEntity(allowedChar=allowedChar, fromAttribute=True) - def emitCurrentToken(self): + def emitCurrentToken(self, resetState=True): """This method is a generic handler for emitting the tags. It also sets the state to "data" because that's what's needed after a token has been emitted. @@ -251,7 +251,9 @@ def emitCurrentToken(self): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "self-closing-flag-on-end-tag"}) self.tokenQueue.append(token) - self.state = self.dataState + + if resetState: + self.state = self.dataState # Below are the various tokenizer states worked out. def dataState(self): @@ -508,8 +510,9 @@ def jinjaVariableState(self): }} self.tokenQueue.append(self.currentToken) # If this is the first token after the variable start tag - elif self.currentToken['type'] == tokenTypes["JinjaVariableStartTag"]: - #log.debug(u"Got start tag {}".format(("|", "}", "\u0000") | spaceCharacters)) + elif self.currentToken['type'] == tokenTypes["JinjaVariableStartTag"]\ + or self.currentToken['type'] == tokenTypes["JinjaVariable"]: + #log.debug(u"Got start tag {}".format(("|", "}", "\u0000") | spaceCharacters)) chars = self.stream.charsUntil(frozenset(("(", "|", "}", "\u0000")) | spaceCharacters) self.currentToken = {"type": tokenTypes["JinjaVariable"], From 2db2cb07d3c3eafc080fff2f79fcda4d3b8bbe25 Mon Sep 17 00:00:00 2001 From: "aelaguiz@gmail.com" Date: Fri, 26 Dec 2014 16:08:59 -0600 Subject: [PATCH 6/9] Added jinja comment suport --- html5lib/constants.py | 3 +- html5lib/html5parser.py | 7 +++++ html5lib/inputstream.py | 3 ++ html5lib/tests/test_jinja.py | 13 +++++++++ html5lib/tokenizer.py | 55 ++++++++++++++++++++++++++++++++++++ 5 files changed, 80 insertions(+), 1 deletion(-) diff --git a/html5lib/constants.py b/html5lib/constants.py index d92079a3..d1477b31 100644 --- a/html5lib/constants.py +++ b/html5lib/constants.py @@ -3099,7 +3099,8 @@ "JinjaArgument": 18, "JinjaExtendTag": 19, "JinjaIncludeTag": 20, - "JinjaImportTag": 21 + "JinjaImportTag": 21, + "JinjaComment": 22 } tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"], diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 09527cc6..87580d19 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -180,6 +180,7 @@ def mainLoop(self): JinjaExtendTag = tokenTypes["JinjaExtendTag"] JinjaIncludeTag = tokenTypes["JinjaIncludeTag"] JinjaImportTag = tokenTypes["JinjaImportTag"] + JinjaComment = tokenTypes["JinjaComment"] for token in self.normalizedTokens(): new_token = token @@ -243,6 +244,8 @@ def mainLoop(self): new_token = phase.processJinjaVariable(new_token) elif type == JinjaPipe: new_token = phase.processJinjaPipe(new_token) + elif type == JinjaComment: + new_token = phase.processJinjaComment(new_token) elif type == JinjaFilter: new_token = phase.processJinjaFilter(new_token) elif type == JinjaArgumentStartTag: @@ -560,6 +563,10 @@ def processJinjaIncludeTag(self, token): element = self.tree.createElementWithoutNamespace(token) self.tree.openElements[-1].appendChild(element) + def processJinjaComment(self, token): + element = self.tree.createElementWithoutNamespace(token) + self.tree.openElements[-1].appendChild(element) + def processJinjaImportTag(self, token): element = self.tree.createElementWithoutNamespace(token) self.tree.openElements[-1].appendChild(element) diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py index 9e03b931..6eb74fbd 100644 --- a/html5lib/inputstream.py +++ b/html5lib/inputstream.py @@ -4,6 +4,7 @@ import codecs import re +import logging from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase from .constants import encodings, ReparseException @@ -43,6 +44,8 @@ class BufferedIOBase(object): # Cache for charsUntil() charsUntilRegEx = {} +log = logging.getLogger(u"html5lib") + class BufferedStream(object): """Buffering for streams that do not have buffering of their own diff --git a/html5lib/tests/test_jinja.py b/html5lib/tests/test_jinja.py index 58278a50..1397c905 100644 --- a/html5lib/tests/test_jinja.py +++ b/html5lib/tests/test_jinja.py @@ -369,6 +369,19 @@ def test_inline_if(self): }] }]) + def test_comment(self): + html_string = """ + {# {{ '[%s]' % page.title if page.title }} #} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjacomment', + 'value': "{{ '[%s]' % page.title if page.title }} " + }]) + def assertTree(self, root, spec): self.assertEqual(len(root), len(spec)) diff --git a/html5lib/tokenizer.py b/html5lib/tokenizer.py index 98ae4816..ac99e96f 100644 --- a/html5lib/tokenizer.py +++ b/html5lib/tokenizer.py @@ -311,6 +311,8 @@ def jinjaOpenState(self): self.state = self.jinjaVariableState elif data == "%": self.state = self.jinjaStatementStartState + elif data == "#": + self.state = self.jinjaCommentStartState else: self.stream.unget(data) self.stream.unget("{") @@ -406,6 +408,59 @@ def jinjaStatementStartState(self): return True + def jinjaCommentStartState(self): + data = self.stream.char() + + if data in spaceCharacters: + pass + elif data is EOF: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "eof-in-jinja-statement"}) + self.state = self.prevState + else: + comment_text = data + self.stream.charsUntil(frozenset(("#", "\u0000"))) + next_two = self.stream.char() + + if next_two: + next_two += self.stream.char() + + if not next_two or len(next_two) < 2: + log.debug(u"Comment text {} = {}".format(comment_text, len(self.stream.chunk))) + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-jinja-comment-closing-tag-but-got-eof", + "datavars": {"data": data}}) + self.state = self.bogusCommentState + return True + + while next_two != "#}": + comment_text += self.stream.chunk + self.stream.charsUntil(frozenset(("#", "\u0000"))) + + next_two = self.stream.char() + + if next_two: + next_two += self.stream.char() + + if not next_two or len(next_two) < 2: + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": + "expected-jinja-comment-closing-tag-but-got-eof", + "datavars": {"data": data}}) + self.state = self.bogusCommentState + return True + + self.tokenQueue.append({ + "type": tokenTypes["JinjaComment"], + 'name': u"jinjacomment", + "data": { + "value": comment_text, + "position": self.stream.position() + }, + "selfClosing": True + }) + + self.state = self.dataState + + return True + def jinjaStatementEndState(self): # We got a { data = self.stream.char() From 1eb1beeeb683c851c8351f153d0d8f3d6353201c Mon Sep 17 00:00:00 2001 From: "aelaguiz@gmail.com" Date: Fri, 26 Dec 2014 19:02:59 -0600 Subject: [PATCH 7/9] Fixed if conditions --- html5lib/html5parser.py | 73 ++++++++++------------ html5lib/tests/test_jinja.py | 115 +++++++++++++++++++++++++++++------ html5lib/tokenizer.py | 5 +- 3 files changed, 131 insertions(+), 62 deletions(-) diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 87580d19..f4bfdb6f 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -456,7 +456,6 @@ def resetInsertionMode(self): new_phase = self.phases["inBody"] break - #log.debug(u"Changing phase to {}".format(new_phase)) self.phase = new_phase def parseRCDataRawtext(self, token, contentType): @@ -474,7 +473,6 @@ def parseRCDataRawtext(self, token, contentType): self.originalPhase = self.phase - log.debug(u"Changing phase to text") self.phase = self.phases["text"] @@ -530,8 +528,6 @@ def processDoctype(self, token): self.parser.parseError("unexpected-doctype") def processCharacters(self, token): - log = logging.getLogger(u"html5lib") - log.debug(u"Inserting text {}.format(token)") self.tree.insertText(token["data"]) def processSpaceCharacters(self, token): @@ -604,19 +600,13 @@ def processEndTag(self, token): class InJinjaVariablePhase(Phase): def processJinjaVariableStartTag(self, token): - log = logging.getLogger('html5lib') - log.debug(u"InJinjaVariable: Start Tag") self.tree.reconstructActiveFormattingElements() self.tree.insertElement(token) def processJinjaVariableEndTag(self, token): - log = logging.getLogger('html5lib') - log.debug(u"InJinjaVariable: End Tag {}".format(token["name"])) for node in self.tree.openElements[::-1]: - log.debug(u"InJinjaVariable: Open tag {} token {}".format(node, token)) if node.name == token["name"]: self.tree.generateImpliedEndTags(exclude=token["name"]) - log.debug(u"InJinjaVariable: Implied end tag {} {}".format(self.tree.openElements[-1].name, token["name"])) if self.tree.openElements[-1].name != token["name"]: self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) while self.tree.openElements.pop() != node: @@ -624,7 +614,6 @@ def processJinjaVariableEndTag(self, token): break else: if node.nameTuple in specialElements: - log.debug(u"InJinjaVariable Nametuple {} in {}".format(node.nameTuple, specialElements)) self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) break @@ -642,27 +631,51 @@ def processJinjaFilter(self, token): class InJinjaStatementPhase(Phase): def processJinjaStatementStartTag(self, token): - log = logging.getLogger('html5lib') - log.debug(u"InJinjaStatement: Start Tag") + if token['name'] == 'jinjaelse': + self.closeOpenIf(token) + elif token['name'] == 'jinjaelif': + self.closeOpenIf(token) + self.tree.reconstructActiveFormattingElements() self.tree.insertElement(token) + def closeOpenIf(self, token): + import logging + log = logging.getLogger(u"html5lib") + + for node in self.tree.openElements[::-1]: + log.debug(u"Prev {} Cur {}".format(node.name, token['name'])) + + if node.name == token["name"] or (node.name in ["jinjaif", "jinjaelif"] and token["name"] in ["jinjaelse", "jinjaelif"]): + self.tree.generateImpliedEndTags(exclude=token["name"]) + + if self.tree.openElements[-1].name in ["jinjaif", "jinjaelif"] and token["name"] in ["jinjaelse", "jinjaelif"]: + pass + elif self.tree.openElements[-1].name != token["name"]: + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + while self.tree.openElements.pop() != node: + pass + + break + else: + if node.nameTuple in specialElements: + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + break + def processJinjaStatementEndTag(self, token): - log = logging.getLogger('html5lib') - log.debug(u"InJinjaStatement: End Tag {}".format(token["name"])) for node in self.tree.openElements[::-1]: - log.debug(u"InJinjaStatement: Open tag {} token {}".format(node, token)) - if node.name == token["name"]: + if node.name == token["name"] or (node.name in ["jinjaelse", "jinjaelif"] and token["name"] == "jinjaif"): self.tree.generateImpliedEndTags(exclude=token["name"]) - log.debug(u"InJinjaStatement: Implied end tag {} {}".format(self.tree.openElements[-1].name, token["name"])) - if self.tree.openElements[-1].name != token["name"]: + + if self.tree.openElements[-1].name in ["jinjaelse", "jinjaelif"] and token["name"] == "jinjaif": + pass + elif self.tree.openElements[-1].name != token["name"]: self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) while self.tree.openElements.pop() != node: pass break else: if node.nameTuple in specialElements: - log.debug(u"InJinjaStatement Nametuple {} in {}".format(node.nameTuple, specialElements)) self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) break @@ -671,18 +684,7 @@ def processJinjaStatement(self, token): self.tree.openElements[-1].appendChild(element) class InJinjaArgumentPhase(Phase): - def processJinjaArgumentStartTag(self, token): - log = logging.getLogger('html5lib') - log.debug(u"InJinjaArgument: Start Tag") - - def processJinjaArgumentEndTag(self, token): - log = logging.getLogger('html5lib') - log.debug(u"InJinjaArgument: End Tag {}".format(token["name"])) - def processJinjaArgument(self, token): - log = logging.getLogger('html5lib') - log.debug(u"InJinjaArgument: Process Jinja Argument {}".format(token["name"])) - element = self.tree.createElementWithoutNamespace(token) self.tree.openElements[-1].childNodes[-1].appendChild(element) @@ -979,8 +981,6 @@ def startTagOther(self, token): def endTagHead(self, token): node = self.parser.tree.openElements.pop() assert node.name == "head", "Expected head got %s" % node.name - log = logging.getLogger(u"html5lib") - log.debug(u"Switching phase to afterHead") self.parser.phase = self.parser.phases["afterHead"] def endTagHtmlBodyBr(self, token): @@ -991,8 +991,6 @@ def endTagOther(self, token): self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) def anythingElse(self): - log = logging.getLogger(u"html5lib") - log.debug(u"Implied end head tag") self.endTagHead(impliedTagToken("head")) # XXX If we implement a parser for which scripting is disabled we need to @@ -1063,8 +1061,6 @@ def endTagOther(self, token): def anythingElse(self): self.tree.insertElement(impliedTagToken("body", "StartTag")) - log = logging.getLogger(u"html5lib") - log.debug(u"Changing phase to body") self.parser.phase = self.parser.phases["inBody"] self.parser.framesetOK = True @@ -1196,9 +1192,6 @@ def processSpaceCharactersDropNewline(self, token): self.tree.insertText(data) def processCharacters(self, token): - import logging - log = logging.getLogger(u"html5lib") - log.debug(u"In Body phase processing Characters {}".format(token)) if token["data"] == "\u0000": # The tokenizer should always emit null on its own return diff --git a/html5lib/tests/test_jinja.py b/html5lib/tests/test_jinja.py index 1397c905..9eec5340 100644 --- a/html5lib/tests/test_jinja.py +++ b/html5lib/tests/test_jinja.py @@ -17,6 +17,28 @@ class JinjaTestCase(unittest.TestCase): def setUp(self): self.parser = html5lib.HTMLParser(strict=True, namespaceHTMLElements=False, tree=html5lib.treebuilders.getTreeBuilder("etree", fullTree=True)) + def assertTree(self, root, spec): + self.assertEqual(len(root), len(spec)) + + for child, spec_child in zip(root, spec): + self.assertEqual(child.tag, spec_child['tag']) + + if 'text' in spec_child: + self.assertEqual(child.text, spec_child['text']) + + if 'value' in spec_child: + self.assertEqual(child.attrib['value'], spec_child['value']) + + if 'children' in spec_child: + self.assertTree(child, spec_child['children']) + else: + self.assertEqual(len(child), 0) + + if 'attrs' in spec_child: + for k, v in spec_child['attrs'].iteritems(): + self.assertIn(k, child.attrib) + self.assertEqual(v, child.attrib[k]) + def test_var_1(self): html_string = """

{{ hi }}

""" @@ -142,6 +164,18 @@ def test_jinja_block(self): 'text': 'Hi' }]) + def test_jinja_block_named(self): + html_string = """ + {% block title %}Hi{% endblock title %} + """ + + tree = self.parser.parseFragment(html_string) + + self.assertTree(tree, [{ + 'tag': 'jinjablock', + 'text': 'Hi' + }]) + def test_jinja_block_in_title(self): html_string = """ {% block title %}{% endblock %} @@ -258,6 +292,41 @@ def test_jinja_if(self): 'text': 'yay' }]) + def test_jinja_if_else(self): + html_string = """ + {% if True %}yay{% else %}boo{% endif %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjaif', + 'text': 'yay' + }, { + 'tag': 'jinjaelse', + 'text': 'boo' + }]) + + def test_jinja_if_elif_else(self): + html_string = """ + {% if True %}yay{% elif False %}too{% else %}boo{% endif %} + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + + self.assertTree(tree, [{ + 'tag': 'jinjaif', + 'text': 'yay' + }, { + 'tag': 'jinjaelif', + 'text': 'too' + }, { + 'tag': 'jinjaelse', + 'text': 'boo' + }]) + def test_jinja_if_lstrip(self): html_string = """ {%+ if True %}yay{% endif %} @@ -382,24 +451,34 @@ def test_comment(self): 'value': "{{ '[%s]' % page.title if page.title }} " }]) - def assertTree(self, root, spec): - self.assertEqual(len(root), len(spec)) - - for child, spec_child in zip(root, spec): - self.assertEqual(child.tag, spec_child['tag']) - - if 'text' in spec_child: - self.assertEqual(child.text, spec_child['text']) + def test_file(self): + html_string = """ +

{{ (term_price.price / term_price.term.num_cycles) | currency }}/month

- if 'value' in spec_child: - self.assertEqual(child.attrib['value'], spec_child['value']) + """ - if 'children' in spec_child: - self.assertTree(child, spec_child['children']) - else: - self.assertEqual(len(child), 0) + tree = self.parser.parseFragment(html_string) + dump(tree) - if 'attrs' in spec_child: - for k, v in spec_child['attrs'].iteritems(): - self.assertIn(k, child.attrib) - self.assertEqual(v, child.attrib[k]) + self.assertTree(tree, [{ + 'tag': 'h4', + 'children': [{ + 'tag': 'jinjavariabletag', + 'children': [{ + 'tag': 'jinjavariable', + 'value': '(term_price.price' + }, { + 'tag': 'jinjavariable', + 'value': '/' + }, { + 'tag': 'jinjavariable', + 'value': 'term_price.term.num_cycles)' + }, { + 'tag': 'jinjapipe', + 'value': '|' + }, { + 'tag': 'jinjafilter', + 'value': 'currency' + }] + }] + }]) diff --git a/html5lib/tokenizer.py b/html5lib/tokenizer.py index ac99e96f..411bd026 100644 --- a/html5lib/tokenizer.py +++ b/html5lib/tokenizer.py @@ -258,7 +258,6 @@ def emitCurrentToken(self, resetState=True): # Below are the various tokenizer states worked out. def dataState(self): data = self.stream.char() - log.debug(u"Tokenizer DataState {}".format(data)) if data == "&": self.state = self.entityDataState @@ -539,7 +538,7 @@ def jinjaVariableState(self): if data == "}": self.state = self.jinjaVariableEndState - elif data == "(": + elif data == "(" and self.currentToken['type'] in [tokenTypes["JinjaVariable"], tokenTypes["JinjaFilter"]]: self.currentToken = { "type": tokenTypes["JinjaArgumentStartTag"], "name": u"jinjaargumentstarttag", "data": {}, @@ -595,8 +594,6 @@ def jinjaVariableState(self): def jinjaArgState(self): data = self.stream.char() - log.debug(u"Arg {}".format(data)) - if data == ")": self.tokenQueue.append({ "type": tokenTypes["JinjaArgumentEndTag"], From 7a5a4902d5b3ee8134140696e6d25059ceb161cd Mon Sep 17 00:00:00 2001 From: "aelaguiz@gmail.com" Date: Fri, 26 Dec 2014 19:05:23 -0600 Subject: [PATCH 8/9] WIP --- html5lib/html5parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index f4bfdb6f..4b4021da 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -185,7 +185,7 @@ def mainLoop(self): for token in self.normalizedTokens(): new_token = token while new_token is not None: - log.debug(u"Token {} Phase = {}".format(new_token, self.phase)) + #log.debug(u"Token {} Phase = {}".format(new_token, self.phase)) currentNode = self.tree.openElements[-1] if self.tree.openElements else None currentNodeNamespace = currentNode.namespace if currentNode else None currentNodeName = currentNode.name if currentNode else None @@ -644,7 +644,7 @@ def closeOpenIf(self, token): log = logging.getLogger(u"html5lib") for node in self.tree.openElements[::-1]: - log.debug(u"Prev {} Cur {}".format(node.name, token['name'])) + #log.debug(u"Prev {} Cur {}".format(node.name, token['name'])) if node.name == token["name"] or (node.name in ["jinjaif", "jinjaelif"] and token["name"] in ["jinjaelse", "jinjaelif"]): self.tree.generateImpliedEndTags(exclude=token["name"]) From 197daa13badc3913915bdb24abfeac87d5e9b527 Mon Sep 17 00:00:00 2001 From: "aelaguiz@gmail.com" Date: Sat, 27 Dec 2014 16:41:14 -0600 Subject: [PATCH 9/9] More tolerant of jinja fragments --- html5lib/html5parser.py | 18 ++++++---- html5lib/tests/test_jinja.py | 65 ++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 6 deletions(-) diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 4b4021da..9768ab06 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -185,7 +185,7 @@ def mainLoop(self): for token in self.normalizedTokens(): new_token = token while new_token is not None: - #log.debug(u"Token {} Phase = {}".format(new_token, self.phase)) + log.debug(u"Token {} Phase = {}".format(new_token, self.phase)) currentNode = self.tree.openElements[-1] if self.tree.openElements else None currentNodeNamespace = currentNode.namespace if currentNode else None currentNodeName = currentNode.name if currentNode else None @@ -640,8 +640,8 @@ def processJinjaStatementStartTag(self, token): self.tree.insertElement(token) def closeOpenIf(self, token): - import logging - log = logging.getLogger(u"html5lib") + #import logging + #log = logging.getLogger(u"html5lib") for node in self.tree.openElements[::-1]: #log.debug(u"Prev {} Cur {}".format(node.name, token['name'])) @@ -663,6 +663,9 @@ def closeOpenIf(self, token): break def processJinjaStatementEndTag(self, token): + import logging + log = logging.getLogger(u"html5lib") + for node in self.tree.openElements[::-1]: if node.name == token["name"] or (node.name in ["jinjaelse", "jinjaelif"] and token["name"] == "jinjaif"): self.tree.generateImpliedEndTags(exclude=token["name"]) @@ -671,13 +674,13 @@ def processJinjaStatementEndTag(self, token): pass elif self.tree.openElements[-1].name != token["name"]: self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + while self.tree.openElements.pop() != node: pass break else: - if node.nameTuple in specialElements: - self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) - break + log.debug(u"Node {}".format(node.name)) + self.tree.openElements.pop() def processJinjaStatement(self, token): element = self.tree.createElementWithoutNamespace(token) @@ -1173,6 +1176,9 @@ def processEOF(self): "tfoot", "th", "thead", "tr", "body", "html")) for node in self.tree.openElements[::-1]: + if node.name.startswith("jinja"): + continue + if node.name not in allowed_elements: self.parser.parseError("expected-closing-tag-but-got-eof") break diff --git a/html5lib/tests/test_jinja.py b/html5lib/tests/test_jinja.py index 9eec5340..9194faf8 100644 --- a/html5lib/tests/test_jinja.py +++ b/html5lib/tests/test_jinja.py @@ -482,3 +482,68 @@ def test_file(self): }] }] }]) + + def test_embedded_block(self): + html_string = """ + Whatever + + """ + + tree = self.parser.parseFragment(html_string) + dump(tree) + #self.fail() + + #self.assertTree(tree, [{ + #'tag': 'jinjacomment', + #'value': "{{ '[%s]' % page.title if page.title }} " + #}]) + + def test_open_block(self): + html_string = """ + + + {% extends "base.html" %} + + {% block header_tag %} +
+ {% endblock %} + + {% block header_content %} + {{ super() }} + +
+
+

Get handpicked books delivered every month.

+

Praesent dapibus, neque id cursus faucibus, tortor neque egestas augue, eu vulputate magna eros eu erat. Faucibus, tortor praesent neque id dapibus.

+
+ +
+
+ + {% endblock %} + + {% block page_content %} + + + +
+
+
+

What is BookSea?

+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Donec odio. Quisque volutpat tmattis eros. Nullam malesuada erat ut turpis. Suspendisse urna nibh, viverra non, semper suscipit, posuere a, pede. Donec nec justo eget felis facilisis fermentum. Aliquam porttitor mauris sit amet orci. Aenean dignissim pellentesque felis.Morbi in sem quis dui placerat ornare. Pellentesque odio nisi, euismod in, pharetra a, ultricies in, diam. Sed arcu. Cras consequat.

+

Praesent dapibus, neque id cursus faucibus, tortor neque egestas augue, eu vulputate magna eros eu erat. Aliquam erat volutpat. Nam dui mi, tincidunt quis, accumsan porttitor, facilisis luctus, metus.

+
+
+
+ + {% include "components/descriptions.html" %} + """ + tree = self.parser.parseFragment(html_string) + dump(tree)