;;;; HTML5 parser for Common Lisp ;;;; ;;;; Copyright (C) 2012 Thomas Bakketun ;;;; Copyright (C) 2012 Asgeir Bjørlykke ;;;; Copyright (C) 2012 Mathias Hellevang ;;;; Copyright (C) 2012 Stian Sletner ;;;; ;;;; This library is free software: you can redistribute it and/or modify ;;;; it under the terms of the GNU Lesser General Public License as published ;;;; by the Free Software Foundation, either version 3 of the License, or ;;;; (at your option) any later version. ;;;; ;;;; This library is distributed in the hope that it will be useful, ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;;; GNU General Public License for more details. ;;;; ;;;; You should have received a copy of the GNU General Public License ;;;; along with this library. If not, see . (in-package :html5-parser) ;; external interface (defun parse-html5 (source &key encoding strictp container dom) (parse-html5-from-source source :encoding encoding :strictp strictp :container container :dom dom)) (defun parse-html5-fragment (source &key encoding strictp (container "div") dom) (parse-html5-from-source source :encoding encoding :strictp strictp :container container :dom dom)) (defgeneric transform-html5-dom (to-type node &key) (:method ((to-type cons) node &key) (apply #'transform-html5-dom (car to-type) node (cdr to-type))) (:method (to-type node &key &allow-other-keys) (error "No TRANSFORM-HTML5-DOM method defined for dom type ~S." to-type))) ;; internal (defun parse-html5-from-source (source &key container encoding strictp dom) (let ((*parser* (make-instance 'html-parser :strict strictp))) (parser-parse source :fragment-p container :encoding encoding) (with-slots (open-elements errors) *parser* (let ((document (if container (let ((fragment (make-fragment (document*)))) (node-reparent-children (first open-elements) fragment) fragment) (document*)))) (values (if dom (transform-html5-dom dom document) document) (reverse errors)))))) (defvar *phase*) (defun ascii-ichar= (char1 char2) "ASCII case-insensitive char=" (or (char= char1 char2) (and (or (char<= #\A char1 #\Z) (char<= #\A char2 #\Z)) (char= (char-downcase char1) (char-downcase char2))))) (defun ascii-istring= (string1 string2) "ASCII case-insensitive string=" (every #'ascii-ichar= string1 string2)) (defun cdata-switch-helper () (and (last-open-element) (not (equal (node-namespace (last-open-element)) (slot-value *parser* 'html-namespace))))) (defun parser-parse (source &key fragment-p encoding) (with-slots (inner-html-mode container tokenizer) *parser* (setf inner-html-mode fragment-p) (when (stringp fragment-p) (setf container fragment-p)) (setf tokenizer (make-html-tokenizer source :encoding encoding :cdata-switch-helper #'cdata-switch-helper)) (parser-reset) (loop ;; The input stream will throw please-reparse with result true ;; if the encoding is changed while (catch 'please-reparse (main-loop) nil) do (parser-reset)))) (defun parser-reset () (with-slots (open-elements active-formatting-elements head-pointer form-pointer insert-from-table first-start-tag errors compat-mode inner-html-mode inner-html container tokenizer phase last-phase before-rcdata-phase frameset-ok html-namespace) *parser* (setf open-elements '()) (setf active-formatting-elements '()) (setf head-pointer nil) (setf form-pointer nil) (setf insert-from-table nil) (setf first-start-tag nil) (setf errors '()) (setf compat-mode :no-quirks) (cond (inner-html-mode (setf inner-html (string-downcase container)) (cond ((member inner-html +cdata-elements+ :test #'string=) (setf (slot-value tokenizer 'state) :rcdata-state)) ((member inner-html +rcdata-elements+ :test #'string=) (setf (slot-value tokenizer 'state) :rawtext-state)) ((string= inner-html "plaintext") (setf (slot-value tokenizer 'state) :plaintext-state))) (insert-root (implied-tag-token "html" :start-tag)) (setf phase :before-head) (reset-insertion-mode)) (t (setf inner-html nil) (setf phase :initial))) (setf last-phase nil) (setf before-rcdata-phase nil) (setf frameset-ok t))) (defun is-html-integration-point (element) (if (and (string= (node-name element) "annotation-xml") (string= (node-namespace element) (find-namespace "mathml"))) (and (element-attribute element "encoding") (member (ascii-upper-2-lower (element-attribute element "encoding")) '("text/html" "application/xhtml+xml") :test #'string=)) (member (node-name-tuple element) +html-integration-point-elements+ :test #'equal))) (defun is-math-ml-text-integration-point (element) (member (node-name-tuple element) +mathml-text-integration-point-elements+ :test #'equal)) (defun main-loop () (with-slots (tokenizer phase) *parser* (map-tokens tokenizer (lambda (token) (process-token (normalize-token token)))) (loop with reprocess = t with phases = '() while reprocess do (push phase phases) (setf reprocess (process-eof nil :phase phase)) (when reprocess (assert (not (member phase phases))))))) (defun process-token (token) (with-slots (tokenizer last-open-element html-namespace) *parser* (let ((new-token token) (type)) (loop while new-token do (let* ((current-node (last-open-element)) (current-node-namespace (if current-node (node-namespace current-node))) (current-node-name (if current-node (node-name current-node)))) (setf type (getf new-token :type)) (cond ((eql type :parse-error) (parser-parse-error (getf token :data) (getf token :datavars)) (setf new-token nil)) (t (let (phase) (if (or (null (slot-value *parser* 'open-elements)) (equal current-node-namespace html-namespace) (and (is-math-ml-text-integration-point current-node) (or (and (eql type :start-tag) (not (member (getf token :name) '("mglyph" "malignmark") :test #'string=))) (eql type :characters) (eql type :space-characters))) (and (equal current-node-namespace (find-namespace "mathml")) (equal current-node-name "annotation-xml") (eql type :start-tag) (equal (getf token :name) "svg")) (and (is-html-integration-point current-node) (member type '(:start-tag :characters :space-characters)))) (setf phase (slot-value *parser* 'phase)) (setf phase :in-foreign-content)) ;(format t "~&phase ~S token ~S~%" phase new-token) (setf new-token (ecase type (:characters (process-characters new-token :phase phase)) (:space-characters (process-space-characters new-token :phase phase)) (:start-tag (process-start-tag new-token :phase phase)) (:end-tag (process-end-tag new-token :phase phase)) (:comment (process-comment new-token :phase phase)) (:doctype (process-doctype new-token :phase phase)))) ;(format t " phase returned ~S new-token ~S~%" phase new-token) )))) (when (and (eql type :start-tag) (getf token :self-closing) (not (getf token :self-closing-acknowledged))) (parser-parse-error :non-void-element-with-trailing-solidus `(:name ,(getf token :name)))))))) (defun parser-parse-error (error-code &optional datavars) (with-slots (errors) *parser* (push (list error-code datavars) errors))) ;; TODO rename to a longer and more descriptive name when we are done writing the code (defun perror (error-code &rest datavars) (parser-parse-error error-code datavars)) (defun normalize-token (token) (when (getf token :start-tag) ;; Remove duplicate attributes (setf (getf token :data) (remove-duplicates (getf token :data) :key #'car :test #'string= :from-end t))) token) (defun adjust-attributes (token replacements) (setf (getf token :data) (loop for (name . value) in (getf token :data) collect (cons (or (cdr (assoc name replacements :test #'string=)) name) value)))) (defun adjust-math-ml-attributes (token) (adjust-attributes token '(("definitionurl" ."definitionURL")))) (defun adjust-svg-attributes (token) (adjust-attributes token '(("attributename" . "attributeName") ("attributetype" . "attributeType") ("basefrequency" . "baseFrequency") ("baseprofile" . "baseProfile") ("calcmode" . "calcMode") ("clippathunits" . "clipPathUnits") ("contentscripttype" . "contentScriptType") ("contentstyletype" . "contentStyleType") ("diffuseconstant" . "diffuseConstant") ("edgemode" . "edgeMode") ("externalresourcesrequired" . "externalResourcesRequired") ("filterres" . "filterRes") ("filterunits" . "filterUnits") ("glyphref" . "glyphRef") ("gradienttransform" . "gradientTransform") ("gradientunits" . "gradientUnits") ("kernelmatrix" . "kernelMatrix") ("kernelunitlength" . "kernelUnitLength") ("keypoints" . "keyPoints") ("keysplines" . "keySplines") ("keytimes" . "keyTimes") ("lengthadjust" . "lengthAdjust") ("limitingconeangle" . "limitingConeAngle") ("markerheight" . "markerHeight") ("markerunits" . "markerUnits") ("markerwidth" . "markerWidth") ("maskcontentunits" . "maskContentUnits") ("maskunits" . "maskUnits") ("numoctaves" . "numOctaves") ("pathlength" . "pathLength") ("patterncontentunits" . "patternContentUnits") ("patterntransform" . "patternTransform") ("patternunits" . "patternUnits") ("pointsatx" . "pointsAtX") ("pointsaty" . "pointsAtY") ("pointsatz" . "pointsAtZ") ("preservealpha" . "preserveAlpha") ("preserveaspectratio" . "preserveAspectRatio") ("primitiveunits" . "primitiveUnits") ("refx" . "refX") ("refy" . "refY") ("repeatcount" . "repeatCount") ("repeatdur" . "repeatDur") ("requiredextensions" . "requiredExtensions") ("requiredfeatures" . "requiredFeatures") ("specularconstant" . "specularConstant") ("specularexponent" . "specularExponent") ("spreadmethod" . "spreadMethod") ("startoffset" . "startOffset") ("stddeviation" . "stdDeviation") ("stitchtiles" . "stitchTiles") ("surfacescale" . "surfaceScale") ("systemlanguage" . "systemLanguage") ("tablevalues" . "tableValues") ("targetx" . "targetX") ("targety" . "targetY") ("textlength" . "textLength") ("viewbox" . "viewBox") ("viewtarget" . "viewTarget") ("xchannelselector" . "xChannelSelector") ("ychannelselector" . "yChannelSelector") ("zoomandpan" . "zoomAndPan")))) (defun adjust-foreign-attributes (token) (adjust-attributes token `(("xlink:actuate" . ("xlink" "actuate" ,(find-namespace "xlink"))) ("xlink:arcrole" . ("xlink" "arcrole" ,(find-namespace "xlink"))) ("xlink:href" . ("xlink" "href" ,(find-namespace "xlink"))) ("xlink:role" . ("xlink" "role" ,(find-namespace "xlink"))) ("xlink:show" . ("xlink" "show" ,(find-namespace "xlink"))) ("xlink:title" . ("xlink" "title" ,(find-namespace "xlink"))) ("xlink:type" . ("xlink" "type" ,(find-namespace "xlink"))) ("xml:base" . ("xml" "base" ,(find-namespace "xml"))) ("xml:lang" . ("xml" "lang" ,(find-namespace "xml"))) ("xml:space" . ("xml" "space" ,(find-namespace "xml"))) ("xmlns" . (nil "xmlns" ,(find-namespace "xmlns"))) ("xmlns:xlink" . ("xmlns" "xlink" ,(find-namespace "xmlns")))))) (defun reset-insertion-mode () (with-slots (inner-html html-namespace phase open-elements) *parser* (let ((last nil) (new-phase nil) (new-modes '(("select" . :in-select) ("td" . :in-cell) ("th" . :in-cell) ("tr" . :in-row) ("tbody" . :in-table-body) ("thead" . :in-table-body) ("tfoot" . :in-table-body) ("caption" . :in-caption) ("colgroup" . :in-column-group) ("table" . :in-table) ("head" . :in-body) ("body" . :in-body) ("frameset" . :in-frameset) ("html" . :before-head)))) (loop for node in (reverse open-elements) for node-name = (node-name node) do (when (eql node (first open-elements)) (assert inner-html) (setf last t) (setf node-name inner-html)) ;; Check for conditions that should only happen in the innerHTML ;; case (when (member node-name '("select" "colgroup" "head" "html") :test #'string=) (assert inner-html)) (unless (and (not last) (string/= (node-namespace node) html-namespace)) (let ((match (cdr (assoc node-name new-modes :test #'string=)))) (when match (setf new-phase match) (return)) (when last (setf new-phase :in-body) (return))))) (setf phase new-phase)))) (defun parse-rc-data-raw-text (token content-type) (assert (member content-type '(:rawtext :rcdata))) (with-slots (tokenizer original-phase phase) *parser* (insert-element token) (setf (tokenizer-state tokenizer) (ecase content-type (:rawtext :rawtext-state) (:rcdata :rcdata-state))) (setf original-phase phase) (setf phase :text) nil)) ;; Phases -------------------------------------------------------------------- (defun implied-tag-token (name &optional (type :end-tag)) (list :type type :name name :data '() :self-closing nil)) (defun implied-tag-token/full (name type &key (attributes '()) (self-closing nil)) (list :type type :name name :data attributes :self-closing self-closing)) (eval-when (:compile-toplevel :execute) (defun phase-process-method-name (function-name) (intern (concatenate 'string "%" (symbol-name function-name)) (symbol-package function-name)))) (defvar *phase-indent* 0) (defun call-phase-method (name phase token) ;(format *trace-output* "~&~vTcall: ~S ~S ~S" *phase-indent* name phase token) ;(break) (let ((result (let ((*phase-indent* (+ 4 *phase-indent*))) (funcall name phase token)))) ;(format *trace-output* "~&~vTreturn: ~S ~S" *phase-indent* name result) result)) (defmacro define-phase-process-functions (&body defs) `(progn ,@(loop for function-name in defs for method-name = (phase-process-method-name function-name) collect `(defgeneric ,method-name (phase token)) collect `(defun ,function-name (token &key (phase *phase*)) (call-phase-method #',method-name phase token))))) (define-phase-process-functions add-formatting-element end-tag-applet-marquee-object end-tag-block end-tag-body end-tag-br end-tag-caption end-tag-col end-tag-colgroup end-tag-form end-tag-formatting end-tag-frameset end-tag-head end-tag-heading end-tag-html end-tag-html-body-br end-tag-ignore end-tag-imply end-tag-imply-head end-tag-list-item end-tag-optgroup end-tag-option end-tag-other end-tag-p end-tag-script end-tag-select end-tag-table end-tag-table-cell end-tag-table-row-group end-tag-tr insert-text process-characters process-comment process-doctype process-end-tag process-eof process-space-characters process-start-tag start-tag-a start-tag-applet-marquee-object start-tag-base-link-command start-tag-body start-tag-button start-tag-caption start-tag-close-p start-tag-col start-tag-colgroup start-tag-form start-tag-formatting start-tag-frame start-tag-frameset start-tag-from-head start-tag-head start-tag-heading start-tag-hr start-tag-html start-tag-i-frame start-tag-image start-tag-imply-tbody start-tag-input start-tag-is-index start-tag-list-item start-tag-math start-tag-meta start-tag-misplaced start-tag-no-script-no-frames-style start-tag-nobr start-tag-noframes start-tag-opt start-tag-optgroup start-tag-option start-tag-other start-tag-param-source start-tag-plaintext start-tag-pre-listing start-tag-process-in-head start-tag-rawtext start-tag-row-group start-tag-rp-rt start-tag-script start-tag-select start-tag-style-script start-tag-svg start-tag-table start-tag-table-cell start-tag-table-element start-tag-table-other start-tag-textarea start-tag-title start-tag-tr start-tag-void-formatting start-tag-xmp) (defmacro def (phase name (&rest slots) &body body) `(defmethod ,(phase-process-method-name name) ((*phase* (eql ,phase)) token) (with-slots (,@slots) *parser* ,@body))) (defmacro tagname-dispatch (phase name &body cases) `(def ,phase ,name () (let ((tagname (getf token :name))) (declare (ignorable tagname)) ,(let* ((default '(error "Unhandled tag ~S" tagname)) (string-cases (loop for (tagnames function) in cases append (cond ((stringp tagnames) `((,tagnames (,function token)))) ((consp tagnames) (loop for tag in tagnames collect `(,tag (,function token)))) ((eql 'default tagnames) (setf default `(,function token)) nil) (t (error "Invalid tag name clause ~S" tagnames)))))) (if (not string-cases) default `(string-case:string-case (tagname :default ,default) ,@string-cases)))))) ;; Default methods (defmethod %process-comment (*phase* token) ;; For most phases the following is correct. Where it's not it will be ;; overridden. (insert-comment token (last-open-element)) nil) (defmethod %process-doctype (*phase* token) (parser-parse-error :unexpected-doctype) nil) (defmethod %process-characters (*phase* token) (parser-insert-text (getf token :data)) nil) (defmethod %process-space-characters (*phase* token) (parser-insert-text (getf token :data)) nil) (defmethod %start-tag-html (*phase* token) (with-slots (first-start-tag open-elements) *parser* (when (and (not first-start-tag) (string= (getf token :name) "html")) (parser-parse-error :non-html-root)) ;; XXX Need a check here to see if the first start tag token emitted is ;; this token... If it's not, invoke self.parser.parseError(). (let ((root-element (first open-elements))) (loop for (name . value) in (getf token :data) do (unless (element-attribute root-element name) (setf (element-attribute root-element name) value)))) (setf first-start-tag nil) nil)) ;; InitialPhase (def :initial process-space-characters () nil) (def :initial process-comment () (insert-comment token (document*)) nil) (def :initial process-doctype (compat-mode phase) (destructuring-bind (&key name public-id system-id correct &allow-other-keys) token (when (or (string/= name "html") public-id (and system-id (string/= system-id "about:legacy-compat"))) (parser-parse-error :unknown-doctype)) (unless public-id (setf public-id "")) (insert-doctype token) (setf public-id (ascii-upper-2-lower public-id)) (cond ((or (not correct) (string/= name "html") (cl-ppcre:scan +quirks-mode-doctypes-regexp+ public-id) (member public-id '("-//w3o//dtd w3 html strict 3.0//en//" "-/w3c/dtd html 4.0 transitional/en" "html") :test #'string=) (and (not system-id) (cl-ppcre:scan '(:sequence :start-anchor (:alternation "-//w3c//dtd html 4.01 frameset//" "-//w3c//dtd html 4.01 transitional//")) public-id)) (and system-id (equal (ascii-upper-2-lower system-id) "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"))) (setf compat-mode :quirks)) ((or (cl-ppcre:scan '(:sequence :start-anchor (:alternation "-//w3c//dtd xhtml 1.0 frameset//" "-//w3c//dtd xhtml 1.0 transitional//")) public-id) (and system-id (cl-ppcre:scan '(:sequence :start-anchor (:alternation "-//w3c//dtd html 4.01 frameset//" "-//w3c//dtd html 4.01 transitional//")) public-id))) (setf compat-mode :limited-quirks))) (setf phase :before-html) nil)) (flet ((anything-else () (with-slots (compat-mode phase) *parser* (setf compat-mode :quirks) (setf phase :before-html)))) (def :initial process-characters () (parser-parse-error :expected-doctype-but-got-chars) (anything-else) token) (def :initial process-start-tag () (parser-parse-error :expected-doctype-but-got-start-tag (list :name (getf token :name))) (anything-else) token) (def :initial process-end-tag () (parser-parse-error :expected-doctype-but-got-end-tag (list :name (getf token :name))) (anything-else) token) (def :initial process-eof () (parser-parse-error :expected-doctype-but-got-eof) (anything-else) t)) ;; BeforeHtmlPhase (flet ((insert-html-element () (insert-root (implied-tag-token "html" :start-tag)) (setf (parser-phase *parser*) :before-head))) (def :before-html process-eof () (insert-html-element) t) (def :before-html process-comment () (insert-comment token (document*)) nil) (def :before-html process-space-characters () nil) (def :before-html process-characters () (insert-html-element) token) (def :before-html process-start-tag (first-start-tag) (when (string= (getf token :name) "html") (setf first-start-tag t)) (insert-html-element) token) (def :before-html process-end-tag () (cond ((not (member (getf token :name) '("head" "body" "html" "br") :test #'string=)) (parser-parse-error :unexpected-end-tag-before-html `(:name ,(getf token :name))) nil) (t (insert-html-element) token)))) ;; BeforeHeadPhase (tagname-dispatch :before-head process-start-tag ("html" start-tag-html) ("head" start-tag-head token) (default start-tag-other)) (tagname-dispatch :before-head process-end-tag (("head" "body" "html" "br") end-tag-imply-head) (default end-tag-other)) (def :before-head process-eof () (start-tag-head (implied-tag-token "head" :start-tag)) t) (def :before-head process-space-characters () nil) (def :before-head process-characters () (start-tag-head (implied-tag-token "head" :start-tag)) token) (def :before-head start-tag-html () (process-start-tag token :phase :in-body)) (def :before-head start-tag-head (head-pointer) (insert-element token) (setf head-pointer (last-open-element)) (setf (parser-phase *parser*) :in-head) nil) (def :before-head start-tag-other () (start-tag-head (implied-tag-token "head" :start-tag)) token) (def :before-head end-tag-imply-head () (start-tag-head (implied-tag-token "head" :start-tag)) token) (def :before-head end-tag-other () (parser-parse-error :end-tag-after-implied-root `(:name ,(getf token :name))) nil) ;; InHeadPhase (tagname-dispatch :in-head process-start-tag ("html" start-tag-html) ("title" start-tag-title) (("noscript" "noframes" "style") start-tag-no-script-no-frames-style) ("script" start-tag-script) (("base" "basefont" "bgsound" "command" "link") start-tag-base-link-command) ("meta" start-tag-meta) ("head" start-tag-head) (default start-tag-other)) (tagname-dispatch :in-head process-end-tag ("head" end-tag-head) (("br" "html" "body") end-tag-html-body-br) (default end-tag-other)) (flet ((anything-else () (end-tag-head (implied-tag-token "head")))) ;; the real thing (def :in-head process-eof () (anything-else) t) (def :in-head process-characters () (anything-else) token) (def :in-head start-tag-html () (process-start-tag token :phase :in-body)) (def :in-head start-tag-head () (parser-parse-error :two-heads-are-not-better-than-one) nil) (def :in-head start-tag-base-link-command (open-elements) (insert-element token) (pop-end open-elements) (setf (getf token :self-closing-acknowledged) t) nil) (defun parse-content-attr (string) "The algorithm for extracting an encoding from a meta element" (let ((position 0)) ; Step 1 (labels ((char-at (index) (and (< position (length string)) (char string index))) (skip-space () (loop while (member (char-at position) +space-characters+) do (incf position)))) ;; Step 2 (loop (setf position (search "charset" string :start2 position)) (unless position (return-from parse-content-attr)) ;; Set position to after charset (incf position 7) ;; Step 3 (skip-space) ;; Step 4 (when (eql (char-at position) #\=) (return)) (decf position)) ;; Step 5 (incf position) (skip-space) ;; Step 6 (let ((next-char (char-at position))) (cond ((or (eql #\' next-char) (eql #\" next-char)) (incf position) (let ((end (position next-char string :start position))) (when end (subseq string position end)))) (next-char (let ((start position)) (loop until (or (= position (length string)) (member (char-at position) +space-characters+)) do (incf position)) (subseq string start position)))))))) (def :in-head start-tag-meta (tokenizer open-elements) (insert-element token) (pop-end open-elements) (setf (getf token :self-closing-acknowledged) t) (let ((attributes (getf token :data))) (when (eql (cdr (html5-stream-encoding (tokenizer-stream tokenizer))) :tentative) (cond ((assoc "charset" attributes :test #'string=) (html5-stream-change-encoding (tokenizer-stream tokenizer) (cdr (assoc "charset" attributes :test #'string=)))) ((and (assoc "http-equiv" attributes :test #'string=) (ascii-istring= (cdr (assoc "http-equiv" attributes :test #'string=)) "Content-Type") (assoc "content" attributes :test #'string=)) (let* ((content (cdr (assoc "content" attributes :test #'string=))) (new-encoding (parse-content-attr content))) (if new-encoding (html5-stream-change-encoding (tokenizer-stream tokenizer) new-encoding) (parser-parse-error :invalid-encoding-declaration `(:content ,content)))))))) nil) (def :in-head start-tag-title () (parse-rc-data-raw-text token :rcdata) nil) (def :in-head start-tag-no-script-no-frames-style () ;; Need to decide whether to implement the scripting-disabled case (parse-rc-data-raw-text token :rawtext)) (def :in-head start-tag-script (tokenizer original-phase phase) (insert-element token) (setf (tokenizer-state tokenizer) :script-data-state) (setf original-phase phase) (setf phase :text) nil) (def :in-head start-tag-other () (anything-else) token) (def :in-head end-tag-head (phase open-elements) (let ((node (pop-end open-elements))) (assert (string= (node-name node) "head") () "Expected head got ~S" (node-name node)) (setf phase :after-head) nil)) (def :in-head end-tag-html-body-br () (anything-else) token) (def :in-head end-tag-other () (parser-parse-error :unexpected-end-tag `(:name ,(getf token :name))) nil)) ;; XXX If we implement a parser for which scripting is disabled we need to ;; implement this phase. ;; ;; InHeadNoScriptPhase ;; AfterHeadPhase (tagname-dispatch :after-head process-start-tag ("html" start-tag-html) ("body" start-tag-body) ("frameset" start-tag-frameset) (("base" "basefont" "bgsound" "link" "meta" "noframes" "script" "style" "title") start-tag-from-head) ("head" start-tag-head) (default start-tag-other)) (tagname-dispatch :after-head process-end-tag (("body" "html" "br") end-tag-html-body-br) (default end-tag-other)) (flet ((anything-else () (with-slots (phase frameset-ok) *parser* (insert-element (implied-tag-token "body" :start-tag)) (setf phase :in-body) (setf frameset-ok t)))) (def :after-head process-eof () (anything-else) t) (def :after-head process-characters () (anything-else) token) (def :after-head start-tag-html () (process-start-tag token :phase :in-body)) (def :after-head start-tag-body (phase frameset-ok) (setf frameset-ok nil) (insert-element token) (setf phase :in-body) nil) (def :after-head start-tag-frameset (phase) (insert-element token) (setf phase :in-frameset) nil) (def :after-head start-tag-from-head (head-pointer open-elements) (parser-parse-error :unexpected-start-tag-out-of-my-head `(:name ,(getf token :name))) (push-end head-pointer open-elements) (process-start-tag token :phase :in-head) (loop for node in (reverse open-elements) do (when (string= "head" (node-name node)) (setf open-elements (remove node open-elements :test #'equal)) (return))) nil) (def :after-head start-tag-head () (parser-parse-error :unexpected-start-tag `(:name ,(getf token :name))) nil) (def :after-head start-tag-other () (anything-else) token) (def :after-head end-tag-html-body-br () (anything-else) token) (def :after-head end-tag-other () (parser-parse-error :unexpected-end-tag `(:name ,(getf token :name))) nil)) ;; InBodyPhase (tagname-dispatch :in-body process-start-tag ("html" start-tag-html) (("base" "basefont" "bgsound" "command" "link" "meta" "noframes" "script" "style" "title") start-tag-process-in-head) ("body" start-tag-body) ("frameset" start-tag-frameset) (("address" "article" "aside" "blockquote" "center" "details" "dir" "div" "dl" "fieldset" "figcaption" "figure" "footer" "header" "hgroup" "menu" "nav" "ol" "p" "section" "summary" "ul") start-tag-close-p) (#.+heading-elements+ start-tag-heading) (("pre" "listing") start-tag-pre-listing) ("form" start-tag-form) (("li" "dd" "dt") start-tag-list-item) ("plaintext" start-tag-plaintext) ("a" start-tag-a) (("b" "big" "code" "em" "font" "i" "s" "small" "strike" "strong" "tt" "u") start-tag-formatting) ("nobr" start-tag-nobr) ("button" start-tag-button) (("applet" "marquee" "object") start-tag-applet-marquee-object) ("xmp" start-tag-xmp) ("table" start-tag-table) (("area" "br" "embed" "img" "keygen" "wbr") start-tag-void-formatting) (("param" "source" "track") start-tag-param-source) ("input" start-tag-input) ("hr" start-tag-hr) ("image" start-tag-image) ("isindex" start-tag-is-index) ("textarea" start-tag-textarea) ("iframe" start-tag-i-frame) (("noembed" "noscript") start-tag-rawtext) ("select" start-tag-select) (("rp" "rt") start-tag-rp-rt) (("option" "optgroup") start-tag-opt) (("math") start-tag-math) (("svg") start-tag-svg) (("caption" "col" "colgroup" "frame" "head" "tbody" "td" "tfoot" "th" "thead" "tr") start-tag-misplaced) (default start-tag-other)) (tagname-dispatch :in-body process-end-tag ("body" end-tag-body) ("html" end-tag-html) (("address" "article" "aside" "blockquote" "button" "center" "details" "dir" "div" "dl" "fieldset" "figcaption" "figure" "footer" "header" "hgroup" "listing" "menu" "nav" "ol" "pre" "section" "summary" "ul") end-tag-block) ("form" end-tag-form) ("p" end-tag-p) (("dd" "dt" "li") end-tag-list-item) (#.+heading-elements+ end-tag-heading) (("a" "b" "big" "code" "em" "font" "i" "nobr" "s" "small" "strike" "strong" "tt" "u") end-tag-formatting) (("applet" "marquee" "object") end-tag-applet-marquee-object) ("br" end-tag-br) (default end-tag-other)) (flet ((is-matching-formatting-element (node1 node2) (and (equal (node-name node1) (node-name node2)) (equal (node-namespace node1) (node-namespace node2)) (node-attributes= node1 node2)))) (def :in-body add-formatting-element (reverse active-formatting-elements) (insert-element token) (let ((element (last-open-element)) matching-elements) (loop for node in (reverse active-formatting-elements) do (if (eq node :marker) (return) (when (is-matching-formatting-element node element) (push-end node matching-elements)))) (assert (<= (length matching-elements) 3)) (when (= (length matching-elements) 3) (setf active-formatting-elements (remove (car (last matching-elements)) active-formatting-elements))) (assert element) (push-end element active-formatting-elements)) nil)) (def :in-body process-eof (open-elements) (let ((allowed-elements '("dd" "dt" "li" "p" "tbody" "td" "tfoot" "th" "thead" "tr" "body" "html"))) (loop for node in (reverse open-elements) do (when (not (member (node-name node) allowed-elements :test #'string=)) (parser-parse-error :expected-closing-tag-but-got-eof) (return)))) nil) (def :in-body process-characters (frameset-ok) (let ((data (getf token :data))) (if (equal data (string #\u0000)) nil (progn (reconstruct-active-formatting-elements) (parser-insert-text data) ;;This must be bad for performance (when (and frameset-ok (notevery (lambda (char) (find char +space-characters+)) data)) (setf frameset-ok nil)) nil)))) (def :in-body process-space-characters (in-body-process-space-characters-mode) (ecase in-body-process-space-characters-mode (:non-pre (reconstruct-active-formatting-elements) (parser-insert-text (getf token :data))) (:drop-newline (let ((data (getf token :data))) (setf in-body-process-space-characters-mode :non-pre) (when (and (plusp (length data)) (char= #\Newline (char data 0)) (member (node-name (last-open-element)) '("pre" "listing" "textarea") :test #'string=) (not (node-has-content (last-open-element)))) (setf data (subseq data 1))) (when (plusp (length data)) (reconstruct-active-formatting-elements) (parser-insert-text data))))) nil) (def :in-body start-tag-process-in-head () (process-start-tag token :phase :in-head)) (def :in-body start-tag-body (frameset-ok open-elements) (parser-parse-error :unexpected-start-tag `(:name ,(getf token :name))) (if (or (= 1 (length open-elements)) (string/= (node-name (second open-elements)) "body")) (assert (slot-value *parser* 'inner-html)) (progn (setf frameset-ok nil) (loop for (name . value) in (getf token :data) do (unless (element-attribute (second open-elements) name) (setf (element-attribute (second open-elements) name) value))))) nil) (def :in-body start-tag-frameset (frameset-ok phase open-elements) (parser-parse-error :unexpected-start-tag `(:name ,(getf token :name))) (cond ((or (= 1 (length open-elements)) (string/= (node-name (second open-elements)) "body")) (assert (slot-value *parser* 'inner-html))) ((not frameset-ok) nil) (t (when (node-parent (second open-elements)) (node-remove-child (node-parent (second open-elements)) (second open-elements))) (loop until (string= (node-name (last-open-element)) "html") do (pop-end open-elements)) (insert-element token) (setf phase :in-frameset))) nil) (def :in-body start-tag-close-p () (when (element-in-scope "p" "button") (end-tag-p (implied-tag-token "p"))) (insert-element token) nil) (def :in-body start-tag-pre-listing (in-body-process-space-characters-mode frameset-ok) (when (element-in-scope "p" "button") (end-tag-p (implied-tag-token "p"))) (insert-element token) (setf frameset-ok nil) (setf in-body-process-space-characters-mode :drop-newline) nil) (def :in-body start-tag-form (form-pointer) (if form-pointer (parser-parse-error :unexpected-start-tag `(:name ,(getf token :name))) (progn (when (element-in-scope "p" "button") (end-tag-p (implied-tag-token "p"))) (insert-element token) (setf form-pointer (last-open-element)))) nil) (def :in-body start-tag-list-item (phase frameset-ok open-elements) (setf frameset-ok nil) (let ((stop-names (cond ((string= (getf token :name) "li") '("li")) ((string= (getf token :name) "dt") '("dt" "dd")) ((string= (getf token :name) "dd") '("dt" "dd"))))) (loop for node in (reverse open-elements) do (cond ((member (node-name node) stop-names :test #'string=) (process-end-tag (implied-tag-token (node-name node)) :phase phase) (return)) ((and (member (node-name-tuple node) +special-elements+ :test #'equal) (not (member (node-name node) '("address" "div" "p") :test #'string=))) (return))))) (when (element-in-scope "p" "button") (process-end-tag (implied-tag-token "p") :phase phase)) (insert-element token) nil) (def :in-body start-tag-plaintext (tokenizer) (when (element-in-scope "p" "button") (end-tag-p (implied-tag-token "p"))) (insert-element token) (setf (tokenizer-state tokenizer) :plaintext-state) nil) (def :in-body start-tag-heading (open-elements) (when (element-in-scope "p" "button") (end-tag-p (implied-tag-token "p"))) (when (member (node-name (last-open-element)) +heading-elements+ :test #'string=) (perror :unexpected-start-tag :name (getf token :name)) (pop-end open-elements)) (insert-element token) nil) (def :in-body start-tag-a (open-elements active-formatting-elements) (let ((afe-a-element (element-in-active-formatting-elements "a"))) (when afe-a-element (perror :unexpected-start-tag-implies-end-tag :start-name "a" :end-name "a") (end-tag-formatting (implied-tag-token "a")) (when (member afe-a-element open-elements) (setf open-elements (remove afe-a-element open-elements))) (when (member afe-a-element active-formatting-elements) (setf active-formatting-elements (remove afe-a-element active-formatting-elements)))) (reconstruct-active-formatting-elements) (add-formatting-element token)) nil) (def :in-body start-tag-formatting () (reconstruct-active-formatting-elements) (add-formatting-element token) nil) (def :in-body start-tag-nobr () (reconstruct-active-formatting-elements) (when (element-in-scope "nobr") (perror :unexpected-start-tag-implies-end-tag :start-name "nobr" :end-name "nobr") (process-end-tag (implied-tag-token "nobr")) ;; XXX Need tests that trigger the following (reconstruct-active-formatting-elements)) (add-formatting-element token) nil) (def :in-body start-tag-button (frameset-ok) (cond ((element-in-scope "button") (perror :unexpected-start-tag-implies-end-tag :start-name "button" :end-name "button") (process-end-tag (implied-tag-token "button")) token) (t (reconstruct-active-formatting-elements) (insert-element token) (setf frameset-ok nil) nil))) (def :in-body start-tag-applet-marquee-object (frameset-ok active-formatting-elements) (reconstruct-active-formatting-elements) (insert-element token) (push-end :marker active-formatting-elements) (setf frameset-ok nil) nil) (def :in-body start-tag-xmp (frameset-ok) (when (element-in-scope "p" "button") (end-tag-p (implied-tag-token "p"))) (reconstruct-active-formatting-elements) (setf frameset-ok nil) (parse-rc-data-raw-text token :rawtext) nil) (def :in-body start-tag-table (frameset-ok compat-mode phase) (when (not (eq compat-mode :quirks)) (when (element-in-scope "p" "button") (end-tag-p (implied-tag-token "p")))) (insert-element token) (setf frameset-ok nil) (setf phase :in-table) nil) (def :in-body start-tag-void-formatting (frameset-ok open-elements) (reconstruct-active-formatting-elements) (insert-element token) (pop-end open-elements) (setf (getf token :self-closing-acknowledged) t) (setf frameset-ok nil) nil) (def :in-body start-tag-input (frameset-ok) (let ((old-frameset-ok frameset-ok)) (start-tag-void-formatting token) (let ((type (assoc "type" (getf token :data) :test #'string=))) (when (and type (string= (ascii-upper-2-lower (cdr type)) "hidden")) ;;input type=hidden doesn't change framesetOK (setf frameset-ok old-frameset-ok)))) nil) (def :in-body start-tag-param-source (open-elements) (insert-element token) (pop-end open-elements) (setf (getf token :self-closing-acknowledged) t) nil) (def :in-body start-tag-hr (frameset-ok open-elements) (when (element-in-scope "p" "button") (end-tag-p (implied-tag-token "p"))) (insert-element token) (pop-end open-elements) (setf (getf token :self-closing-acknowledged) t) (setf frameset-ok nil) nil) (def :in-body start-tag-image () (perror :unexpected-start-tag-treated-as :original-name "image" :new-name "img") (process-start-tag (implied-tag-token/full "img" :start-tag :attributes (getf token :data) :self-closing (getf token :self-closing))) nil) (def :in-body start-tag-is-index (form-pointer) (block nil (perror :deprecated-tag :name "isindex") (when form-pointer (return nil)) (let (attrs) (when (assoc "action" (getf token :data) :test #'string=) (setf attrs (list (assoc "action" (getf token :data) :test #'string=)))) (process-start-tag (implied-tag-token/full "form" :start-tag :attributes attrs))) (process-start-tag (implied-tag-token "hr" :start-tag)) (process-start-tag (implied-tag-token "label" :start-tag)) ;; XXX Localization ... (let ((prompt (if (assoc "prompt" (getf token :data) :test #'string=) (cdr (assoc "prompt" (getf token :data) :test #'string=)) "This is a searchable index. Enter search keywords: "))) (process-characters (list :type :characters :data prompt))) (let ((attrs (append (remove-if (lambda (el) (member (car el) '("action" "prompt" "name") :test #'string=)) (copy-list (getf token :data))) (copy-list '(("name" . "isindex")))))) (process-start-tag (implied-tag-token/full "input" :start-tag :attributes attrs :self-closing (getf token :self-closing)))) (process-end-tag (implied-tag-token "label")) (process-start-tag (implied-tag-token "hr" :start-tag)) (process-end-tag (implied-tag-token "form"))) nil) (def :in-body start-tag-textarea (tokenizer in-body-process-space-characters-mode frameset-ok) (insert-element token) (setf (tokenizer-state tokenizer) :rcdata-state) (setf in-body-process-space-characters-mode :drop-newline) (setf frameset-ok nil) nil) (def :in-body start-tag-i-frame (frameset-ok) (setf frameset-ok nil) (start-tag-rawtext token) nil) (def :in-body start-tag-rawtext () ;;;iframe, noembed noframes, noscript(if scripting enabled) (parse-rc-data-raw-text token :rawtext) nil) (def :in-body start-tag-opt (phase) (when (string= (node-name (last-open-element)) "option") (process-end-tag (implied-tag-token "option") :phase phase)) (reconstruct-active-formatting-elements) (insert-element token) nil) (def :in-body start-tag-select (frameset-ok) (reconstruct-active-formatting-elements) (insert-element token) (setf frameset-ok nil) (if (member (parser-phase *parser*) '(:in-table :in-caption :in-column-group :in-table-body :in-row :in-cell)) (setf (parser-phase *parser*) :in-select-in-table) (setf (parser-phase *parser*) :in-select)) nil) (def :in-body start-tag-rp-rt () (when (element-in-scope "ruby") (generate-implied-end-tags) (when (string/= (node-name (last-open-element)) "ruby") (perror :expected-ruby-tag))) (insert-element token) nil) (def :in-body start-tag-math (open-elements) (reconstruct-active-formatting-elements) (adjust-math-ml-attributes token) (adjust-foreign-attributes token) (setf (getf token :namespace) (find-namespace "mathml")) (insert-element token) ;;Need to get the parse error right for the case where the token ;;has a namespace not equal to the xmlns attribute (when (getf token :self-closing) (pop-end open-elements) (setf (getf token :self-closing-acknowledged) t)) nil) (def :in-body start-tag-svg (open-elements) (reconstruct-active-formatting-elements) (adjust-svg-attributes token) (adjust-foreign-attributes token) (setf (getf token :namespace) (find-namespace "svg")) (insert-element token) ;;Need to get the parse error right for the case where the token ;;has a namespace not equal to the xmlns attribute (when (getf token :self-closing) (pop-end open-elements) (setf (getf token :self-closing-acknowledged) t)) nil) (def :in-body start-tag-misplaced () ;;; Elements that should be children of other elements that have a ;;; different insertion mode; here they are ignored ;;; "caption", "col", "colgroup", "frame", "frameset", "head", ;;; "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", ;;; "tr", "noscript" (perror :unexpected-start-tag-ignored :name (getf token :name)) nil) (def :in-body start-tag-other () (reconstruct-active-formatting-elements) (insert-element token) nil) (def :in-body end-tag-p (open-elements) (cond ((not (element-in-scope "p" "button")) (start-tag-close-p (implied-tag-token "p" :start-tag)) (perror :unexpected-end-tag :name "p") (end-tag-p (implied-tag-token "p"))) (t (generate-implied-end-tags "p") (when (string/= (node-name (last-open-element)) "p") (perror :unexpected-end-tag :name "p")) (let ((node (pop-end open-elements))) (loop until (string= (node-name node) "p") do (setf node (pop-end open-elements)))))) nil) (def :in-body end-tag-body (open-elements) (block nil (when (not (element-in-scope "body")) (perror :unexpected-scope) (return nil)) (when (string/= (node-name (last-open-element)) "body") (loop for node in (cddr open-elements) do (when (member (node-name node) '("dd" "dt" "li" "optgroup" "option" "p" "rp" "rt" "tbody" "td" "tfoot" "th" "thead" "tr" "body" "html") :test #'string=) ;;Not sure this is the correct name for the parse error (perror :expected-one-end-tag-but-got-another :expected-name "body" :got-name (node-name node)) (return))))) (setf (parser-phase *parser*) :after-body) nil) (def :in-body end-tag-html () ;;We repeat the test for the body end tag token being ignored here (cond ((element-in-scope "body") (end-tag-body (implied-tag-token "body")) token) (t nil))) (def :in-body end-tag-block (in-body-process-space-characters-mode open-elements) ;;Put us back in the right whitespace handling mode (when (string= (getf token :name) "pre") (setf in-body-process-space-characters-mode :non-pre)) (let ((in-scope (element-in-scope (getf token :name)))) (when in-scope (generate-implied-end-tags)) (when (string/= (node-name (last-open-element)) (getf token :name)) (perror :end-tag-too-early :name (getf token :name))) (when in-scope (let ((node (pop-end open-elements))) (loop until (string= (node-name node) (getf token :name)) do (setf node (pop-end open-elements)))))) nil) (def :in-body end-tag-form (form-pointer open-elements) (let ((node form-pointer)) (setf form-pointer nil) (if (or (null node) (not (element-in-scope (node-name node)))) (perror :unexpected-end-tag :name "form") (progn (generate-implied-end-tags) (when (not (equal (last-open-element) node)) (perror :end-tag-too-early-ignored :name "form")) (setf open-elements (remove node open-elements))))) nil) ;;; Note to self: ;;; - A token is a plist. ;;; - A property is an alist. ;;; - A node is an object. ;;; - An element is a node. (def :in-body end-tag-list-item (open-elements) (let ((variant (if (string= (getf token :name) "li") "list" nil))) (if (not (element-in-scope (getf token :name) variant)) (perror :unexpected-end-tag :name (getf token :name)) (progn (generate-implied-end-tags (getf token :name)) (when (string/= (node-name (last-open-element)) (getf token :name)) (perror :end-tag-too-early :name (getf token :name))) (let ((node (pop-end open-elements))) (loop until (string= (node-name node) (getf token :name)) do (setf node (pop-end open-elements))))))) nil) (def :in-body end-tag-heading (open-elements) (loop for item in +heading-elements+ do (when (element-in-scope item) (generate-implied-end-tags) (return))) (when (string/= (node-name (last-open-element)) (getf token :name)) (perror :end-tag-too-early :name (getf token :name))) (loop for item in +heading-elements+ do (when (element-in-scope item) (let ((item (pop-end open-elements))) (loop until (member (node-name item) +heading-elements+ :test #'string=) do (setf item (pop-end open-elements)))))) nil) (defmacro insert-elt-at (object index place) (let ((tmp (gensym "TMP")) (object-symbol (gensym "OBJECT")) (index-symbol (gensym "INDEX"))) `(let ((,object-symbol ,object) (,index-symbol ,index) (,tmp ,place)) (setf ,place (append (subseq ,tmp 0 (min ,index-symbol (length ,tmp))) (list ,object-symbol) (nthcdr ,index-symbol ,tmp)))))) (def :in-body end-tag-formatting (active-formatting-elements open-elements) ;; The much-feared adoption agency algorithm ;; http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency ;; XXX Better parseError messages appreciated. (loop named outer with name = (getf token :name) with outer-loop-counter = 0 with formatting-element with afe-index with furthest-block with bookmark with last-node with inner-loop-counter with index with node with common-ancestor with clone while (< outer-loop-counter 8) do (incf outer-loop-counter) ;; Step 1 paragraph 1 (setf formatting-element (element-in-active-formatting-elements name)) (cond ((or (not formatting-element) (and (member formatting-element open-elements) (not (element-in-scope (node-name formatting-element))))) (perror :adoption-agency-1.1 :name name) (return-from outer nil)) ;; Step 1 paragraph 2 ((not (member formatting-element open-elements)) (perror :adoption-agency-1.2 :name name) (setf active-formatting-elements (remove formatting-element active-formatting-elements)) (return-from outer nil))) ;; Step 1 paragraph 3 (unless (eql formatting-element (last-open-element)) (perror :adoption-agency-1.3 :name name)) ;; Step 2 ;; Start of the adoption agency algorithm proper (setf afe-index (position formatting-element open-elements)) (setf furthest-block nil) (loop for element in (subseq open-elements afe-index) do (when (member (node-name-tuple element) +special-elements+ :test #'equal) (setf furthest-block element) (return))) ;; Step 3 (when (null furthest-block) (loop for element = (pop-end open-elements) until (eql formatting-element element) finally (setf active-formatting-elements (remove element active-formatting-elements))) (return-from outer nil)) (setf common-ancestor (elt open-elements (- afe-index 1))) ;; Step 5 ;;if furthestBlock.parent: ;; furthestBlock.parent.removeChild(furthestBlock) ;; Step 5 ;; The bookmark is supposed to help us ;; identify where to reinsert nodes in step ;; 12. We have to ensure that we reinsert ;; nodes after the node before the active ;; formatting element. Note the bookmark can ;; move in step 7.4 (setf bookmark (position formatting-element active-formatting-elements)) ;; Step 6 (setf node furthest-block) (setf last-node node) (setf inner-loop-counter 0) (setf index (position node open-elements)) (loop named inner while (< inner-loop-counter 3) do (block continue (incf inner-loop-counter) ;; Node is element before node in open elements (decf index) (setf node (elt open-elements index)) (when (not (member node active-formatting-elements)) (setf open-elements (remove node open-elements)) (return-from continue)) ;; Step 6.3 (when (eql node formatting-element) (return-from inner)) ;; Step 6.4 (when (eql last-node furthest-block) (setf bookmark (1+ (position node active-formatting-elements)))) ;; Step 6.5 (setf clone (node-clone* node)) ;; Replace node with clone (symbol-macrolet ((af active-formatting-elements) (oe open-elements)) (setf (elt af (position node af)) clone) (setf (elt oe (position node oe)) clone)) (setf node clone) ;; Step 6.6 ;; Remove lastNode from its parents, if any (when (node-parent last-node) (node-remove-child (node-parent last-node) last-node)) (node-append-child node last-node) ;; Step 7.7 (setf last-node node) ;; End of inner loop )) ;; Step 7 ;; Foster parent lastNode if commonAncestor is a ;; table, tbody, tfoot, thead, or tr we need to ;; foster parent the lastNode (when (node-parent last-node) (node-remove-child (node-parent last-node) last-node)) (if (member (node-name common-ancestor) '("table" "tbody" "tfoot" "thead" "tr") :test #'string=) (multiple-value-bind (parent insert-before) (get-table-misnested-nodeposition) (node-insert-before* parent last-node insert-before)) (node-append-child* common-ancestor last-node)) ;; Step 8 (setf clone (node-clone* formatting-element)) ;; Step 9 (node-reparent-children furthest-block clone) ;; Step 10 (node-append-child* furthest-block clone) ;; Step 11 (setf active-formatting-elements (remove formatting-element active-formatting-elements)) (insert-elt-at clone bookmark active-formatting-elements) ;; Step 12 (setf open-elements (remove formatting-element open-elements)) (insert-elt-at clone (1+ (position furthest-block open-elements)) open-elements)) nil) (def :in-body end-tag-applet-marquee-object (open-elements) (when (element-in-scope (getf token :name)) (generate-implied-end-tags)) (when (string/= (node-name (last-open-element)) (getf token :name)) (perror :end-tag-too-early :name (getf token :name))) (when (element-in-scope (getf token :name)) (let ((element (pop-end open-elements))) (loop until (string= (node-name element) (getf token :name)) do (setf element (pop-end open-elements)))) (clear-active-formatting-elements)) nil) (def :in-body end-tag-br (open-elements) (perror :unexpected-end-tag-treated-as :original-name "br" :new-name "br element") (reconstruct-active-formatting-elements) (insert-element (implied-tag-token "br" :start-tag)) (pop-end open-elements) nil) (def :in-body end-tag-other (open-elements) (loop for node in (reverse open-elements) do (cond ((string= (node-name node) (getf token :name)) (generate-implied-end-tags (getf token :name)) (when (string/= (node-name (last-open-element)) (getf token :name)) (perror :unexpected-end-tag :name (getf token :name))) (loop while (not (eq node (pop-end open-elements)))) (return)) (t (when (member (node-name-tuple node) +special-elements+ :test #'equal) (perror :unexpected-end-tag :name (getf token :name)) (return))))) nil) ;; TextPhase (tagname-dispatch :text process-start-tag (default start-tag-other)) (tagname-dispatch :text process-end-tag ("script" end-tag-script) (default end-tag-other)) (def :text process-characters () (parser-insert-text (getf token :data)) nil) (def :text process-eof (phase original-phase open-elements) (perror :expected-named-closing-tag-but-got-eof (node-name (last-open-element))) (pop-end open-elements) (setf phase original-phase) t) (def :text start-tag-other () (error "Tried to process start tag ~S in RCDATA/RAWTEXT mode" (getf token :name))) (def :text end-tag-script (phase original-phase open-elements) (assert (string= (node-name (pop-end open-elements)) "script")) (setf phase original-phase) ;; The rest of this method is all stuff that only happens if ;; document.write works nil) (def :text end-tag-other (phase original-phase open-elements) (pop-end open-elements) (setf phase original-phase) nil) ;; InTablePhase ;; http://www.whatwg.org/specs/web-apps/current-work/#in-table (tagname-dispatch :in-table process-start-tag ("html" start-tag-html) ("caption" start-tag-caption) ("colgroup" start-tag-colgroup) ("col" start-tag-col) (("tbody" "tfoot" "thead") start-tag-row-group) (("td" "th" "tr") start-tag-imply-tbody) ("table" start-tag-table) (("style" "script") start-tag-style-script) ("input" start-tag-input) ("form" start-tag-form) (default start-tag-other)) (tagname-dispatch :in-table process-end-tag ("table" end-Tag-Table) (("body" "caption" "col" "colgroup" "html" "tbody" "td" "tfoot" "th" "thead" "tr") end-Tag-Ignore) (default end-tag-other)) (flet ((clear-stack-to-table-context () ;; clear the stack back to a table context (loop until (member (node-name (last-open-element)) '("table" "html") :test #'string=) do ;;(perror :unexpected-implied-end-tag-in-table ;; :name (node-name* (last-open-element))) (pop-end (slot-value *parser* 'open-elements))) ;; When the current node is it's an innerHTML case )) (def :in-table process-eof (inner-html) (if (string/= (node-name (last-open-element)) "html") (perror :eof-in-table) (assert inner-html)) ;; Stop parsing nil) (def :in-table process-space-characters (phase original-phase) (setf original-phase phase) (setf phase :in-table-text) (process-space-characters token :phase phase) nil) (def :in-table process-characters (phase original-phase) (setf original-phase phase) (setf phase :in-table-text) (process-characters token :phase phase) nil) (def :in-table insert-text (insert-from-table) ;; If we get here there must be at least one non-whitespace character ;; Do the table magic! (setf insert-from-table t) (process-characters token :phase :in-body) (setf insert-from-table nil) nil) (def :in-table start-tag-caption (phase active-formatting-elements) (clear-stack-to-table-context) (push-end :marker active-formatting-elements) (insert-element token) (setf phase :in-caption) nil) (def :in-table start-tag-colgroup (phase) (clear-stack-to-table-context) (insert-element token) (setf phase :in-column-group) nil) (def :in-table start-tag-col () (start-tag-colgroup (implied-tag-token "colgroup" :start-tag)) token) (def :in-table start-tag-row-group (phase) (clear-stack-to-table-context) (insert-element token) (setf phase :in-table-body) nil) (def :in-table start-tag-imply-tbody () (start-tag-row-group (implied-tag-token "tbody" :start-tag)) token) (def :in-table start-tag-table (phase inner-html) (perror :unexpected-start-tag-implies-end-tag :start-name "table" :end-name "table") (process-end-tag (implied-tag-token "table") :phase phase) (unless inner-html token)) (def :in-table start-tag-style-script () (process-start-tag token :phase :in-head)) (def :in-table start-tag-input (open-elements) (let ((type (assoc "type" (getf token :data) :test #'string=))) (cond ((and type (string= (ascii-upper-2-lower (cdr type)) "hidden")) (perror :unexpected-hidden-input-in-table) (insert-element token) ;; XXX associate with form (pop-end open-elements)) (t (start-tag-other token)))) nil) (def :in-table start-tag-form (form-pointer open-elements) (perror :unexpected-form-in-table) (unless form-pointer (insert-element token) (setf form-pointer (last-open-element)) (pop-end open-elements)) nil) (def :in-table start-tag-other (insert-from-table) (perror :unexpected-start-tag-implies-table-voodoo :name (getf token :name)) ;; Do the table magic! (setf insert-from-table t) (process-start-tag token :phase :in-body) (setf insert-from-table nil) nil) (def :in-table end-tag-table (inner-html open-elements) (cond ((element-in-scope "table" "table") (generate-implied-end-tags) (unless (equal (node-name (last-open-element)) "table") (perror :end-tag-too-early-named :got-name "table" :expected-name (node-name (last-open-element)))) (loop until (equal (node-name (last-open-element)) "table") do (pop-end open-elements)) (pop-end open-elements) (reset-insertion-mode)) (t ;; innerHTML case (assert inner-html) (perror :end-tag-table-in-table-inner-html-case))) nil) (def :in-table end-tag-ignore () (perror :unexpected-end-tag :name (getf token :name)) nil) (def :in-table end-tag-other (insert-from-table) (perror :unexpected-end-tag-implies-table-voodoo :name (getf token :name)) ;; Do the table magic! (setf insert-from-table t) (process-end-tag token :phase :in-body) (setf insert-from-table nil) nil)) ;; InTableTextPhase (defun flush-characters () (with-slots (character-tokens) *parser* (let ((data (apply #'concatenate 'string (loop for item in (reverse character-tokens) collect (getf item :data))))) (if (not (only-space-characters-p data)) (insert-text (list :type :characters :data data) :phase :in-table) (parser-insert-text data))) (setf character-tokens nil))) (def :in-table-text process-comment (phase original-phase) (flush-characters) (setf phase original-phase) token) (def :in-table-text process-eof (phase original-phase) (flush-characters) (setf phase original-phase) t) (def :in-table-text process-characters (character-tokens) (unless (equal (getf token :data) (string #\u0000)) (push token character-tokens)) nil) (def :in-table-text process-space-characters (character-tokens) ;; pretty sure we should never reach here (push token character-tokens) nil) (def :in-table-text process-start-tag (phase original-phase) (flush-characters) (setf phase original-phase) token) (def :in-table-text process-end-tag (phase original-phase) (flush-characters) (setf phase original-phase) token) ;; InCaptionPhase ;; http://www.whatwg.org/specs/web-apps/current-work/#in-caption (tagname-dispatch :in-caption process-start-tag ("html" start-tag-html) (("caption" "col" "colgroup" "tbody" "td" "tfoot" "th" "thead" "tr") start-tag-table-element) (default start-tag-other)) (tagname-dispatch :in-caption process-end-tag ("caption" end-tag-caption) ("table" end-tag-table) (("body" "col" "colgroup" "html" "tbody" "td" "tfoot" "th" "thead" "tr") end-tag-ignore) (default end-tag-other)) (flet ((ignore-end-tag-caption () (not (element-in-scope "caption" "table")))) (def :in-caption process-eof () (process-eof token :phase :in-body)) (def :in-caption process-characters () (process-characters token :phase :in-body)) (def :in-caption start-tag-table-element (phase) (perror :start-tag-table-element-in-caption) ;; XXX Have to duplicate logic here to find out if the tag is ignored (prog1 (unless (ignore-end-tag-caption) token) (process-end-tag (implied-tag-token "caption") :phase phase))) (def :in-caption start-tag-other () (process-start-tag token :phase :in-body)) (def :in-caption end-tag-caption (phase inner-html open-elements) (cond ((not (ignore-end-tag-caption)) ;; AT this code is quite similar to endTagTable in "InTable" (generate-implied-end-tags) (unless (equal (node-name (last-open-element)) "caption") (perror :expected-one-end-tag-but-got-another :got-name "caption" :expected-name (node-name (last-open-element)))) (loop until (equal (node-name (last-open-element)) "caption") do (pop-end open-elements)) (clear-active-formatting-elements) (setf phase :in-table)) (t ;; innerHTML case (assert inner-html) (perror :end-tag-caption-in-caption-inner-html-mode))) nil) (def :in-caption end-tag-table (phase) (perror :end-tag-table-in-caption) (prog1 (unless (ignore-end-tag-caption) token) (process-end-tag (implied-tag-token "caption") :phase phase))) (def :in-caption end-tag-ignore () (perror :unexpected-end-tag :name (getf token :name)) nil) (def :in-caption end-tag-other () (process-end-tag token :phase :in-body))) ;; InColumnGroupPhase ;; http://www.whatwg.org/specs/web-apps/current-work/#in-column (tagname-dispatch :in-column-group process-start-tag ("html" start-tag-html) ("col" start-tag-col) (default start-tag-other)) (tagname-dispatch :in-column-group process-end-tag ("colgroup" end-tag-colgroup) ("col" end-tag-col) (default end-tag-other)) (flet ((ignore-end-tag-colgroup () (string= (node-name (last-open-element)) "html"))) (def :in-column-group process-eof (inner-html) (cond ((string= (node-name (last-open-element)) "html") (assert inner-html) nil) (t (let ((ignore-end-tag (ignore-end-tag-colgroup))) (end-tag-colgroup (implied-tag-token "colgroup")) (not ignore-end-tag))))) (def :in-column-group process-characters () (prog1 (unless (ignore-end-tag-colgroup) token) (end-tag-colgroup (implied-tag-token "colgroup")))) (def :in-column-group start-tag-col (open-elements) (insert-element token) (pop-end open-elements) nil) (def :in-column-group start-tag-other () (prog1 (unless (ignore-end-tag-colgroup) token) (end-tag-colgroup (implied-tag-token "colgroup")))) (def :in-column-group end-tag-colgroup (phase open-elements) (cond ((ignore-end-tag-colgroup) ;; innerHTML case (perror :end-tag-colgroup-in-column-group-inner-html-mode)) (t (pop-end open-elements) (setf phase :in-table))) nil) (def :in-column-group end-tag-col () (perror :no-end-tag :name "col") nil) (def :in-column-group end-tag-other () (prog1 (unless (ignore-end-tag-colgroup) token) (end-tag-colgroup (implied-tag-token "colgroup"))))) ;; InTableBodyPhase ;; http://www.whatwg.org/specs/web-apps/current-work/#in-table0 (tagname-dispatch :in-table-body process-start-tag ("html" start-tag-html) ("tr" start-tag-tr) (("td" "th") start-tag-table-cell) (("caption" "col" "colgroup" "tbody" "tfoot" "thead") start-tag-table-other) (default start-tag-other)) (tagname-dispatch :in-table-body process-end-tag (("tbody" "tfoot" "thead") end-Tag-Table-Row-Group) ("table" end-Tag-Table) (("body" "caption" "col" "colgroup" "html" "td" "th" "tr") end-Tag-Ignore) (default end-tag-other)) (flet ((clear-stack-to-table-body-context () (loop until (member (node-name (last-open-element)) '("tbody" "tfoot" "thead" "html") :test #'string=) do ;;(perror :unexpected-implied-end-tag-in-table ;; :name (node-name (last-open-element))) (pop-end (slot-value *parser* 'open-elements))) (when (string= (node-name (last-open-element)) "html") (assert (slot-value *parser* 'inner-html))))) (def :in-table-body process-eof () (process-eof token :phase :in-table)) (def :in-table-body process-space-characters () (process-space-characters token :phase :in-table)) (def :in-table-body process-characters () (process-characters token :phase :in-table)) (def :in-table-body start-tag-tr (phase) (clear-stack-to-table-body-context) (insert-element token) (setf phase :in-row) nil) (def :in-table-body start-tag-table-cell () (perror :unexpected-cell-in-table-body :name (getf token :name)) (start-tag-tr (implied-tag-token "tr" :start-tag)) token) (def :in-table-body start-tag-table-other (inner-html) ;; XXX AT Any ideas on how to share this with endTagTable? (cond ((or (element-in-scope "tbody" "table") (element-in-scope "thead" "table") (element-in-scope "tfoot" "table")) (clear-stack-to-table-body-context) (end-tag-table-row-group (implied-tag-token (node-name (last-open-element)))) token) (t ;; innerHTML case (assert inner-html) (perror :start-tag-table-other-in-table-body-inner-html-mode) nil))) (def :in-table-body start-tag-other () (process-start-tag token :phase :in-table)) (def :in-table-body end-tag-table-row-group (phase open-elements) (cond ((element-in-scope (getf token :name) "table") (clear-stack-to-table-body-context) (pop-end open-elements) (setf phase :in-table)) (t (perror :unexpected-end-tag-in-table-body :name (getf token :name)))) nil) (def :in-table-body end-tag-table (inner-html) (cond ((or (element-in-scope "tbody" "table") (element-in-scope "thead" "table") (element-in-scope "tfoot" "table")) (clear-stack-to-table-body-context) (end-tag-table-row-group (implied-tag-token (node-name (last-open-element)))) token) (t ;; innerHTML case (assert inner-html) (perror :end-tag-table-other-in-table-body-inner-html-mode) nil))) (def :in-table-body end-tag-ignore () (perror :unexpected-end-tag-in-table-body :name (getf token :name)) nil) (def :in-table-body end-tag-other () (process-end-tag token :phase :in-table))) ;; InRowPhase ;; http://www.whatwg.org/specs/web-apps/current-work/#in-row (tagname-dispatch :in-row process-start-tag ("html" start-tag-html) (("td" "th") start-tag-table-cell) (("caption" "col" "colgroup" "tbody" "tfoot" "thead" "tr") start-tag-table-other) (default start-tag-other)) (tagname-dispatch :in-row process-end-tag ("tr" end-tag-tr) ("table" end-tag-table) (("tbody" "tfoot" "thead") end-tag-table-row-group) (("body" "caption" "col" "colgroup" "html" "td" "th") end-tag-ignore) (default end-tag-other)) ;; helper methods (XXX unify this with other table helper methods) (flet ((clear-stack-to-table-row-context () (loop until (member (node-name (last-open-element)) '("tr" "html") :test #'string=) do (perror :unexpected-implied-end-tag-in-table-row :name (node-name (last-open-element))) (pop-end (slot-value *parser* 'open-elements)))) (ignore-end-tag-tr () (not (element-in-scope "tr" "table")))) ;; the rest (def :in-row process-eof () (process-eof token :phase :in-table) nil) (def :in-row process-space-characters () (process-space-characters token :phase :in-table)) (def :in-row process-characters () (process-characters token :phase :in-table)) (def :in-row start-tag-table-cell (phase active-formatting-elements) (clear-stack-to-table-row-context) (insert-element token) (setf phase :in-cell) (push-end :marker active-formatting-elements) nil) (def :in-row start-tag-table-other () (let ((ignore-end-tag (ignore-end-tag-tr))) (end-tag-tr (implied-tag-token "tr")) ;; XXX how are we sure it's always ignored in the innerHTML case? (unless ignore-end-tag token))) (def :in-row start-tag-other () (process-start-tag token :phase :in-table)) (def :in-row end-tag-tr (phase inner-html open-elements) (cond ((not (ignore-end-tag-tr)) (clear-stack-to-table-row-context) (pop-end open-elements) (setf phase :in-table-body)) (t ;; innerHTML case (assert inner-html) (perror :end-tag-tr-inner-html-mode))) nil) (def :in-row end-tag-table () (let ((ignore-end-tag (ignore-end-tag-tr))) (end-tag-tr (implied-tag-token "tr")) ;; Reprocess the current tag if the tr end tag was not ignored ;; XXX how are we sure it's always ignored in the innerHTML case? (unless ignore-end-tag token))) (def :in-row end-tag-table-row-group () (cond ((element-in-scope (getf token :name) "table") (end-tag-tr (implied-tag-token "tr")) token) (t (perror :end-tag-table-row-group-something-wrong) nil))) (def :in-row end-tag-ignore () (perror :unexpected-end-tag-in-table-row (getf token :name)) nil) (def :in-row end-tag-other () (process-end-tag token :phase :in-table))) ;; InCellPhase ;; http://www.whatwg.org/specs/web-apps/current-work/#in-cell (tagname-dispatch :in-cell process-start-tag ("html" start-tag-html) (("caption" "col" "colgroup" "tbody" "td" "tfoot" "th" "thead" "tr") start-tag-table-other) (default start-tag-other)) (tagname-dispatch :in-cell process-end-tag (("td" "th") end-tag-table-cell) (("body" "caption" "col" "colgroup" "html") end-tag-ignore) (("table" "tbody" "tfoot" "thead" "tr") end-tag-imply) (default end-tag-other)) (flet ((close-cell () (if (element-in-scope "td" "table") (end-tag-table-cell (implied-tag-token "td")) (if (element-in-scope "th" "table") (end-tag-table-cell (implied-tag-token "th")))))) (def :in-cell process-eof () (process-eof token :phase :in-body) nil) (def :in-cell process-characters () (process-characters token :phase :in-body)) (def :in-cell start-tag-table-other (inner-html) (cond ((or (element-in-scope "td" "table") (element-in-scope "th" "table")) (close-cell) token) (t ;; innerHTML case (assert inner-html) (perror :start-tag-table-other-in-inner-html-mode) nil))) (def :in-cell start-tag-other () (process-start-tag token :phase :in-body)) (def :in-cell end-tag-table-cell (phase open-elements) (cond ((element-in-scope (getf token :name) "table") (generate-implied-end-tags (getf token :name)) (cond ((not (equal (node-name (last-open-element)) (getf token :name))) (perror :unexpected-cell-end-tag :name (getf token :name)) (loop until (equal (node-name (pop-end open-elements)) (getf token :name)))) (t (pop-end open-elements))) (clear-active-formatting-elements) (setf phase :in-row)) (t (perror :unexpected-end-tag :name (getf token :name)))) nil) (def :in-cell end-tag-ignore () (perror :unexpected-end-tag :name (getf token :name)) nil) (def :in-cell end-tag-imply () (cond ((element-in-scope (getf token :name) "table") (close-cell) token) (t ;; sometimes innerHTML case (perror :end-tag-imply-sometimes-inner-html-case) nil))) (def :in-cell end-tag-other () (process-end-tag token :phase :in-body))) ;; InSelectPhase (tagname-dispatch :in-select process-start-tag ("html" start-tag-html) ("option" start-tag-option) ("optgroup" start-tag-optgroup) ("select" start-tag-select) (("input" "keygen" "textarea") start-tag-input) ("script" start-tag-script) (default start-tag-other)) (tagname-dispatch :in-select process-end-tag ("option" end-tag-option) ("optgroup" end-tag-optgroup) ("select" end-tag-select) (default end-tag-other)) ;; http://www.whatwg.org/specs/web-apps/current-work/#in-select (def :in-select process-eof (inner-html) (if (not (equal (node-name (last-open-element)) "html")) (perror :eof-in-select) (assert inner-html)) nil) (def :in-select process-characters () (unless (equal (getf token :data) (string #\u0000)) (parser-insert-text (getf token :data))) nil) (def :in-select start-tag-option (open-elements) ;; We need to imply if