From d8329c4a383109500b9215c851d4ee64d159c4b0 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Sun, 8 May 2016 00:15:16 +0100 Subject: [PATCH 1/2] lxml should only ever return an ASCII string as a str See --- html5lib/treewalkers/lxmletree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/html5lib/treewalkers/lxmletree.py b/html5lib/treewalkers/lxmletree.py index 173fa082..36850086 100644 --- a/html5lib/treewalkers/lxmletree.py +++ b/html5lib/treewalkers/lxmletree.py @@ -15,7 +15,7 @@ def ensure_str(s): elif isinstance(s, text_type): return s else: - return s.decode("utf-8", "strict") + return s.decode("ascii", "strict") class Root(object): From 10f4d57dceb1aac84390238ec9fd66c80006a0d1 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Mon, 9 May 2016 20:46:42 -0700 Subject: [PATCH 2/2] fixup! lxml should only ever return an ASCII string as a str