diff --git a/php/library/HTML5/TreeBuilder.php b/php/library/HTML5/TreeBuilder.php
index c6e4b1f..359e225 100644
--- a/php/library/HTML5/TreeBuilder.php
+++ b/php/library/HTML5/TreeBuilder.php
@@ -31,6 +31,7 @@
// XERROR - with regards to parse errors
// XSCRIPT - with regards to scripting mode
// XENCODING - with regards to encoding (for reparsing tests)
+// XSKETCHY - godawful workarounds
class HTML5_TreeBuilder {
public $stack = array();
@@ -128,6 +129,15 @@ private function strConst($number) {
const NS_XML = 'http://www.w3.org/XML/1998/namespace';
const NS_XMLNS = 'http://www.w3.org/2000/xmlns/';
+ public $nsToPrefix = array(
+ self::NS_HTML => '',
+ self::NS_MATHML => 'math:',
+ self::NS_SVG => 'svg:',
+ self::NS_XLINK => 'xlink:',
+ self::NS_XML => 'xml:',
+ self::NS_XMLNS => 'xmlns:',
+ );
+
public function __construct() {
$this->mode = self::INITIAL;
$this->dom = new DOMDocument;
@@ -140,7 +150,6 @@ public function __construct() {
// Process tag tokens
public function emitToken($token, $mode = null) {
- // XXX: ignore parse errors... why are we emitting them, again?
if ($token['type'] === HTML5_Tokenizer::PARSEERROR) return;
if ($mode === null) $mode = $this->mode;
@@ -195,9 +204,6 @@ public function emitToken($token, $mode = null) {
* doctype attribute of the Document object. */
if (!isset($token['public'])) $token['public'] = null;
if (!isset($token['system'])) $token['system'] = null;
- // Yes this is hacky. I'm kind of annoyed that I can't appendChild
- // a doctype to DOMDocument. Maybe I haven't chanted the right
- // syllables.
$impl = new DOMImplementation();
// This call can fail for particularly pathological cases (namely,
// the qualifiedName parameter ($token['name']) could be missing.
@@ -1753,7 +1759,7 @@ public function emitToken($token, $mode = null) {
* elements with an entry for the new element, and
* let node be the new element. */
// we don't know what the token is anymore
- $clone = $node->cloneNode();
+ $clone = $this->cloneNode($node);
$a_pos = array_search($node, $this->a_formatting, true);
$s_pos = array_search($node, $this->stack, true);
$this->a_formatting[$a_pos] = $clone;
@@ -1794,7 +1800,7 @@ public function emitToken($token, $mode = null) {
/* 8. Create an element for the token for which the
* formatting element was created. */
- $clone = $formatting_element->cloneNode();
+ $clone = $this->cloneNode($formatting_element);
/* 9. Take all of the child nodes of the furthest
block and append them to the element created in the
@@ -3177,7 +3183,7 @@ private function reconstructActiveFormattingElements() {
}
/* 8. Perform a shallow clone of the element entry to obtain clone. */
- $clone = $entry->cloneNode();
+ $clone = $this->cloneNode($entry);
/* 9. Append clone to the current node and push it onto the stack
of open elements so that it is the new current node. */
@@ -3672,22 +3678,25 @@ public function insertForeignElement($token, $namespaceURI) {
if (!empty($token['attr'])) {
foreach ($token['attr'] as $kp) {
$attr = $kp['name'];
+ // XSKETCHY: this entire thing is a hack to get around
+ // DOM's really bad XML implementation
if (is_array($attr)) {
$ns = $attr[2];
$attr = $attr[1];
} else {
$ns = self::NS_HTML;
}
- if (!$el->hasAttributeNS($ns, $attr)) {
- // XSKETCHY: work around godawful libxml bug
- if ($ns === self::NS_XLINK) {
- $el->setAttribute('xlink:'.$attr, $kp['value']);
- } elseif ($ns === self::NS_HTML) {
- // Another godawful libxml bug
- $el->setAttribute($attr, $kp['value']);
- } else {
- $el->setAttributeNS($ns, $attr, $kp['value']);
+ if ($ns === self::NS_XML) {
+ // this is special cased since DOM converts xml:lang
+ // into lang
+ $el->setAttributeNS($ns, $attr, $kp['value']);
+ } else {
+ $prefix = $this->nsToPrefix[$ns];
+ $el->setAttribute($prefix.$attr, $kp['value']);
+ if (!isset($el->html5_namespaced)) {
+ $el->html5_namespaced = array();
}
+ $el->html5_namespaced[$prefix.$attr] = true;
}
}
}
@@ -3701,6 +3710,14 @@ public function insertForeignElement($token, $namespaceURI) {
* value is not the XLink Namespace, that is a parse error. */
}
+ private function cloneNode($node) {
+ $clone = $node->cloneNode();
+ if (isset($node->html5_namespaced)) {
+ $clone->html5_namespaced = $node->html5_namespaced;
+ }
+ return $clone;
+ }
+
public function save() {
$this->dom->normalize();
if (!$this->fragment) {
diff --git a/php/tests/HTML5/TestData.php b/php/tests/HTML5/TestData.php
index 2f72142..5cea72f 100644
--- a/php/tests/HTML5/TestData.php
+++ b/php/tests/HTML5/TestData.php
@@ -117,22 +117,16 @@ public static function strDom($node, $prefix = '| ') {
}
$text = "<{$ns}{$next->tagName}>";
foreach ($next->attributes as $attr) {
- $ans = '';
- switch ($attr->namespaceURI) {
- case HTML5_TreeBuilder::NS_MATHML:
- $ans = 'math '; break;
- case HTML5_TreeBuilder::NS_SVG:
- $ans = 'svg '; break;
- case HTML5_TreeBuilder::NS_XLINK:
- $ans = 'xlink '; break;
- case HTML5_TreeBuilder::NS_XML:
- $ans = 'xml '; break;
- case HTML5_TreeBuilder::NS_XMLNS:
- $ans = 'xmlns '; break;
+ // XSKETCHY
+ $name = $attr->name;
+ if ($attr->namespaceURI === HTML5_TreeBuilder::NS_XML) {
+ $name = "xml $name";
+ } else {
+ if (isset($next->html5_namespaced[$name])) {
+ $name = str_replace(':', ' ', $name);
+ }
}
- // XSKETCHY: needed for our horrible xlink hack
- $name = str_replace(':', ' ', $attr->localName);
- $subnodes[] = "{$ans}{$name}=\"{$attr->value}\"";
+ $subnodes[] = "{$name}=\"{$attr->value}\"";
}
sort($subnodes);
break;