|
|
|
@ -25,20 +25,22 @@ type parser struct {
|
|
|
|
|
hasSelfClosingToken bool
|
|
|
|
|
// doc is the document root element.
|
|
|
|
|
doc *Node
|
|
|
|
|
// The stack of open elements (section 12.2.3.2) and active formatting
|
|
|
|
|
// elements (section 12.2.3.3).
|
|
|
|
|
// The stack of open elements (section 12.2.4.2) and active formatting
|
|
|
|
|
// elements (section 12.2.4.3).
|
|
|
|
|
oe, afe nodeStack
|
|
|
|
|
// Element pointers (section 12.2.3.4).
|
|
|
|
|
// Element pointers (section 12.2.4.4).
|
|
|
|
|
head, form *Node
|
|
|
|
|
// Other parsing state flags (section 12.2.3.5).
|
|
|
|
|
// Other parsing state flags (section 12.2.4.5).
|
|
|
|
|
scripting, framesetOK bool
|
|
|
|
|
// The stack of template insertion modes
|
|
|
|
|
templateStack insertionModeStack
|
|
|
|
|
// im is the current insertion mode.
|
|
|
|
|
im insertionMode
|
|
|
|
|
// originalIM is the insertion mode to go back to after completing a text
|
|
|
|
|
// or inTableText insertion mode.
|
|
|
|
|
originalIM insertionMode
|
|
|
|
|
// fosterParenting is whether new elements should be inserted according to
|
|
|
|
|
// the foster parenting rules (section 12.2.5.3).
|
|
|
|
|
// the foster parenting rules (section 12.2.6.1).
|
|
|
|
|
fosterParenting bool
|
|
|
|
|
// quirks is whether the parser is operating in "quirks mode."
|
|
|
|
|
quirks bool
|
|
|
|
@ -56,7 +58,7 @@ func (p *parser) top() *Node {
|
|
|
|
|
return p.doc
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Stop tags for use in popUntil. These come from section 12.2.3.2.
|
|
|
|
|
// Stop tags for use in popUntil. These come from section 12.2.4.2.
|
|
|
|
|
var (
|
|
|
|
|
defaultScopeStopTags = map[string][]a.Atom{
|
|
|
|
|
"": {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
|
|
|
|
@ -79,7 +81,7 @@ const (
|
|
|
|
|
|
|
|
|
|
// popUntil pops the stack of open elements at the highest element whose tag
|
|
|
|
|
// is in matchTags, provided there is no higher element in the scope's stop
|
|
|
|
|
// tags (as defined in section 12.2.3.2). It returns whether or not there was
|
|
|
|
|
// tags (as defined in section 12.2.4.2). It returns whether or not there was
|
|
|
|
|
// such an element. If there was not, popUntil leaves the stack unchanged.
|
|
|
|
|
//
|
|
|
|
|
// For example, the set of stop tags for table scope is: "html", "table". If
|
|
|
|
@ -126,7 +128,7 @@ func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
|
|
|
|
|
return -1
|
|
|
|
|
}
|
|
|
|
|
case tableScope:
|
|
|
|
|
if tagAtom == a.Html || tagAtom == a.Table {
|
|
|
|
|
if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
|
|
|
|
|
return -1
|
|
|
|
|
}
|
|
|
|
|
case selectScope:
|
|
|
|
@ -162,17 +164,17 @@ func (p *parser) clearStackToContext(s scope) {
|
|
|
|
|
tagAtom := p.oe[i].DataAtom
|
|
|
|
|
switch s {
|
|
|
|
|
case tableScope:
|
|
|
|
|
if tagAtom == a.Html || tagAtom == a.Table {
|
|
|
|
|
if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
|
|
|
|
|
p.oe = p.oe[:i+1]
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
case tableRowScope:
|
|
|
|
|
if tagAtom == a.Html || tagAtom == a.Tr {
|
|
|
|
|
if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
|
|
|
|
|
p.oe = p.oe[:i+1]
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
case tableBodyScope:
|
|
|
|
|
if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead {
|
|
|
|
|
if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
|
|
|
|
|
p.oe = p.oe[:i+1]
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
@ -183,7 +185,7 @@ func (p *parser) clearStackToContext(s scope) {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// generateImpliedEndTags pops nodes off the stack of open elements as long as
|
|
|
|
|
// the top node has a tag name of dd, dt, li, option, optgroup, p, rp, or rt.
|
|
|
|
|
// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
|
|
|
|
|
// If exceptions are specified, nodes with that name will not be popped off.
|
|
|
|
|
func (p *parser) generateImpliedEndTags(exceptions ...string) {
|
|
|
|
|
var i int
|
|
|
|
@ -192,7 +194,7 @@ loop:
|
|
|
|
|
n := p.oe[i]
|
|
|
|
|
if n.Type == ElementNode {
|
|
|
|
|
switch n.DataAtom {
|
|
|
|
|
case a.Dd, a.Dt, a.Li, a.Option, a.Optgroup, a.P, a.Rp, a.Rt:
|
|
|
|
|
case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
|
|
|
|
|
for _, except := range exceptions {
|
|
|
|
|
if n.Data == except {
|
|
|
|
|
break loop
|
|
|
|
@ -234,9 +236,9 @@ func (p *parser) shouldFosterParent() bool {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// fosterParent adds a child node according to the foster parenting rules.
|
|
|
|
|
// Section 12.2.5.3, "foster parenting".
|
|
|
|
|
// Section 12.2.6.1, "foster parenting".
|
|
|
|
|
func (p *parser) fosterParent(n *Node) {
|
|
|
|
|
var table, parent, prev *Node
|
|
|
|
|
var table, parent, prev, template *Node
|
|
|
|
|
var i int
|
|
|
|
|
for i = len(p.oe) - 1; i >= 0; i-- {
|
|
|
|
|
if p.oe[i].DataAtom == a.Table {
|
|
|
|
@ -245,6 +247,19 @@ func (p *parser) fosterParent(n *Node) {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var j int
|
|
|
|
|
for j = len(p.oe) - 1; j >= 0; j-- {
|
|
|
|
|
if p.oe[j].DataAtom == a.Template {
|
|
|
|
|
template = p.oe[j]
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if template != nil && (table == nil || j > i) {
|
|
|
|
|
template.AppendChild(n)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if table == nil {
|
|
|
|
|
// The foster parent is the html element.
|
|
|
|
|
parent = p.oe[0]
|
|
|
|
@ -304,7 +319,7 @@ func (p *parser) addElement() {
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.3.3.
|
|
|
|
|
// Section 12.2.4.3.
|
|
|
|
|
func (p *parser) addFormattingElement() {
|
|
|
|
|
tagAtom, attr := p.tok.DataAtom, p.tok.Attr
|
|
|
|
|
p.addElement()
|
|
|
|
@ -351,7 +366,7 @@ findIdenticalElements:
|
|
|
|
|
p.afe = append(p.afe, p.top())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.3.3.
|
|
|
|
|
// Section 12.2.4.3.
|
|
|
|
|
func (p *parser) clearActiveFormattingElements() {
|
|
|
|
|
for {
|
|
|
|
|
n := p.afe.pop()
|
|
|
|
@ -361,7 +376,7 @@ func (p *parser) clearActiveFormattingElements() {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.3.3.
|
|
|
|
|
// Section 12.2.4.3.
|
|
|
|
|
func (p *parser) reconstructActiveFormattingElements() {
|
|
|
|
|
n := p.afe.top()
|
|
|
|
|
if n == nil {
|
|
|
|
@ -390,12 +405,12 @@ func (p *parser) reconstructActiveFormattingElements() {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.4.
|
|
|
|
|
// Section 12.2.5.
|
|
|
|
|
func (p *parser) acknowledgeSelfClosingTag() {
|
|
|
|
|
p.hasSelfClosingToken = false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// An insertion mode (section 12.2.3.1) is the state transition function from
|
|
|
|
|
// An insertion mode (section 12.2.4.1) is the state transition function from
|
|
|
|
|
// a particular state in the HTML5 parser's state machine. It updates the
|
|
|
|
|
// parser's fields depending on parser.tok (where ErrorToken means EOF).
|
|
|
|
|
// It returns whether the token was consumed.
|
|
|
|
@ -403,7 +418,7 @@ type insertionMode func(*parser) bool
|
|
|
|
|
|
|
|
|
|
// setOriginalIM sets the insertion mode to return to after completing a text or
|
|
|
|
|
// inTableText insertion mode.
|
|
|
|
|
// Section 12.2.3.1, "using the rules for".
|
|
|
|
|
// Section 12.2.4.1, "using the rules for".
|
|
|
|
|
func (p *parser) setOriginalIM() {
|
|
|
|
|
if p.originalIM != nil {
|
|
|
|
|
panic("html: bad parser state: originalIM was set twice")
|
|
|
|
@ -411,18 +426,38 @@ func (p *parser) setOriginalIM() {
|
|
|
|
|
p.originalIM = p.im
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.3.1, "reset the insertion mode".
|
|
|
|
|
// Section 12.2.4.1, "reset the insertion mode".
|
|
|
|
|
func (p *parser) resetInsertionMode() {
|
|
|
|
|
for i := len(p.oe) - 1; i >= 0; i-- {
|
|
|
|
|
n := p.oe[i]
|
|
|
|
|
if i == 0 && p.context != nil {
|
|
|
|
|
last := i == 0
|
|
|
|
|
if last && p.context != nil {
|
|
|
|
|
n = p.context
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch n.DataAtom {
|
|
|
|
|
case a.Select:
|
|
|
|
|
if !last {
|
|
|
|
|
for ancestor, first := n, p.oe[0]; ancestor != first; {
|
|
|
|
|
if ancestor == first {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
ancestor = p.oe[p.oe.index(ancestor)-1]
|
|
|
|
|
switch ancestor.DataAtom {
|
|
|
|
|
case a.Template:
|
|
|
|
|
p.im = inSelectIM
|
|
|
|
|
return
|
|
|
|
|
case a.Table:
|
|
|
|
|
p.im = inSelectInTableIM
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
p.im = inSelectIM
|
|
|
|
|
case a.Td, a.Th:
|
|
|
|
|
// TODO: remove this divergence from the HTML5 spec.
|
|
|
|
|
//
|
|
|
|
|
// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
|
|
|
|
|
p.im = inCellIM
|
|
|
|
|
case a.Tr:
|
|
|
|
|
p.im = inRowIM
|
|
|
|
@ -434,25 +469,41 @@ func (p *parser) resetInsertionMode() {
|
|
|
|
|
p.im = inColumnGroupIM
|
|
|
|
|
case a.Table:
|
|
|
|
|
p.im = inTableIM
|
|
|
|
|
case a.Template:
|
|
|
|
|
// TODO: remove this divergence from the HTML5 spec.
|
|
|
|
|
if n.Namespace != "" {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
p.im = p.templateStack.top()
|
|
|
|
|
case a.Head:
|
|
|
|
|
p.im = inBodyIM
|
|
|
|
|
// TODO: remove this divergence from the HTML5 spec.
|
|
|
|
|
//
|
|
|
|
|
// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
|
|
|
|
|
p.im = inHeadIM
|
|
|
|
|
case a.Body:
|
|
|
|
|
p.im = inBodyIM
|
|
|
|
|
case a.Frameset:
|
|
|
|
|
p.im = inFramesetIM
|
|
|
|
|
case a.Html:
|
|
|
|
|
p.im = beforeHeadIM
|
|
|
|
|
if p.head == nil {
|
|
|
|
|
p.im = beforeHeadIM
|
|
|
|
|
} else {
|
|
|
|
|
p.im = afterHeadIM
|
|
|
|
|
}
|
|
|
|
|
default:
|
|
|
|
|
if last {
|
|
|
|
|
p.im = inBodyIM
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
p.im = inBodyIM
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const whitespace = " \t\r\n\f"
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.1.
|
|
|
|
|
// Section 12.2.6.4.1.
|
|
|
|
|
func initialIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case TextToken:
|
|
|
|
@ -479,7 +530,7 @@ func initialIM(p *parser) bool {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.2.
|
|
|
|
|
// Section 12.2.6.4.2.
|
|
|
|
|
func beforeHTMLIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case DoctypeToken:
|
|
|
|
@ -517,7 +568,7 @@ func beforeHTMLIM(p *parser) bool {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.3.
|
|
|
|
|
// Section 12.2.6.4.3.
|
|
|
|
|
func beforeHeadIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case TextToken:
|
|
|
|
@ -560,7 +611,7 @@ func beforeHeadIM(p *parser) bool {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.4.
|
|
|
|
|
// Section 12.2.6.4.4.
|
|
|
|
|
func inHeadIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case TextToken:
|
|
|
|
@ -590,19 +641,41 @@ func inHeadIM(p *parser) bool {
|
|
|
|
|
case a.Head:
|
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true
|
|
|
|
|
case a.Template:
|
|
|
|
|
p.addElement()
|
|
|
|
|
p.afe = append(p.afe, &scopeMarker)
|
|
|
|
|
p.framesetOK = false
|
|
|
|
|
p.im = inTemplateIM
|
|
|
|
|
p.templateStack = append(p.templateStack, inTemplateIM)
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
case EndTagToken:
|
|
|
|
|
switch p.tok.DataAtom {
|
|
|
|
|
case a.Head:
|
|
|
|
|
n := p.oe.pop()
|
|
|
|
|
if n.DataAtom != a.Head {
|
|
|
|
|
panic("html: bad parser state: <head> element not found, in the in-head insertion mode")
|
|
|
|
|
}
|
|
|
|
|
p.oe.pop()
|
|
|
|
|
p.im = afterHeadIM
|
|
|
|
|
return true
|
|
|
|
|
case a.Body, a.Html, a.Br:
|
|
|
|
|
p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
|
|
|
|
|
return false
|
|
|
|
|
case a.Template:
|
|
|
|
|
if !p.oe.contains(a.Template) {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// TODO: remove this divergence from the HTML5 spec.
|
|
|
|
|
//
|
|
|
|
|
// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
|
|
|
|
|
p.generateImpliedEndTags()
|
|
|
|
|
for i := len(p.oe) - 1; i >= 0; i-- {
|
|
|
|
|
if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
|
|
|
|
|
p.oe = p.oe[:i]
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
p.clearActiveFormattingElements()
|
|
|
|
|
p.templateStack.pop()
|
|
|
|
|
p.resetInsertionMode()
|
|
|
|
|
return true
|
|
|
|
|
default:
|
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true
|
|
|
|
@ -622,7 +695,7 @@ func inHeadIM(p *parser) bool {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.6.
|
|
|
|
|
// Section 12.2.6.4.6.
|
|
|
|
|
func afterHeadIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case TextToken:
|
|
|
|
@ -648,7 +721,7 @@ func afterHeadIM(p *parser) bool {
|
|
|
|
|
p.addElement()
|
|
|
|
|
p.im = inFramesetIM
|
|
|
|
|
return true
|
|
|
|
|
case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title:
|
|
|
|
|
case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
|
|
|
|
|
p.oe = append(p.oe, p.head)
|
|
|
|
|
defer p.oe.remove(p.head)
|
|
|
|
|
return inHeadIM(p)
|
|
|
|
@ -660,6 +733,8 @@ func afterHeadIM(p *parser) bool {
|
|
|
|
|
switch p.tok.DataAtom {
|
|
|
|
|
case a.Body, a.Html, a.Br:
|
|
|
|
|
// Drop down to creating an implied <body> tag.
|
|
|
|
|
case a.Template:
|
|
|
|
|
return inHeadIM(p)
|
|
|
|
|
default:
|
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true
|
|
|
|
@ -697,7 +772,7 @@ func copyAttributes(dst *Node, src Token) {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.7.
|
|
|
|
|
// Section 12.2.6.4.7.
|
|
|
|
|
func inBodyIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case TextToken:
|
|
|
|
@ -727,10 +802,16 @@ func inBodyIM(p *parser) bool {
|
|
|
|
|
case StartTagToken:
|
|
|
|
|
switch p.tok.DataAtom {
|
|
|
|
|
case a.Html:
|
|
|
|
|
if p.oe.contains(a.Template) {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
copyAttributes(p.oe[0], p.tok)
|
|
|
|
|
case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title:
|
|
|
|
|
case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
|
|
|
|
|
return inHeadIM(p)
|
|
|
|
|
case a.Body:
|
|
|
|
|
if p.oe.contains(a.Template) {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
if len(p.oe) >= 2 {
|
|
|
|
|
body := p.oe[1]
|
|
|
|
|
if body.Type == ElementNode && body.DataAtom == a.Body {
|
|
|
|
@ -767,9 +848,13 @@ func inBodyIM(p *parser) bool {
|
|
|
|
|
// The newline, if any, will be dealt with by the TextToken case.
|
|
|
|
|
p.framesetOK = false
|
|
|
|
|
case a.Form:
|
|
|
|
|
if p.form == nil {
|
|
|
|
|
p.popUntil(buttonScope, a.P)
|
|
|
|
|
p.addElement()
|
|
|
|
|
if p.form != nil && !p.oe.contains(a.Template) {
|
|
|
|
|
// Ignore the token
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
p.popUntil(buttonScope, a.P)
|
|
|
|
|
p.addElement()
|
|
|
|
|
if !p.oe.contains(a.Template) {
|
|
|
|
|
p.form = p.top()
|
|
|
|
|
}
|
|
|
|
|
case a.Li:
|
|
|
|
@ -903,6 +988,14 @@ func inBodyIM(p *parser) bool {
|
|
|
|
|
p.acknowledgeSelfClosingTag()
|
|
|
|
|
p.popUntil(buttonScope, a.P)
|
|
|
|
|
p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
|
|
|
|
|
if p.form == nil {
|
|
|
|
|
// NOTE: The 'isindex' element has been removed,
|
|
|
|
|
// and the 'template' element has not been designed to be
|
|
|
|
|
// collaborative with the index element.
|
|
|
|
|
//
|
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
if action != "" {
|
|
|
|
|
p.form.Attr = []Attribute{{Key: "action", Val: action}}
|
|
|
|
|
}
|
|
|
|
@ -952,11 +1045,16 @@ func inBodyIM(p *parser) bool {
|
|
|
|
|
}
|
|
|
|
|
p.reconstructActiveFormattingElements()
|
|
|
|
|
p.addElement()
|
|
|
|
|
case a.Rp, a.Rt:
|
|
|
|
|
case a.Rb, a.Rtc:
|
|
|
|
|
if p.elementInScope(defaultScope, a.Ruby) {
|
|
|
|
|
p.generateImpliedEndTags()
|
|
|
|
|
}
|
|
|
|
|
p.addElement()
|
|
|
|
|
case a.Rp, a.Rt:
|
|
|
|
|
if p.elementInScope(defaultScope, a.Ruby) {
|
|
|
|
|
p.generateImpliedEndTags("rtc")
|
|
|
|
|
}
|
|
|
|
|
p.addElement()
|
|
|
|
|
case a.Math, a.Svg:
|
|
|
|
|
p.reconstructActiveFormattingElements()
|
|
|
|
|
if p.tok.DataAtom == a.Math {
|
|
|
|
@ -993,15 +1091,29 @@ func inBodyIM(p *parser) bool {
|
|
|
|
|
case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
|
|
|
|
|
p.popUntil(defaultScope, p.tok.DataAtom)
|
|
|
|
|
case a.Form:
|
|
|
|
|
node := p.form
|
|
|
|
|
p.form = nil
|
|
|
|
|
i := p.indexOfElementInScope(defaultScope, a.Form)
|
|
|
|
|
if node == nil || i == -1 || p.oe[i] != node {
|
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true
|
|
|
|
|
if p.oe.contains(a.Template) {
|
|
|
|
|
i := p.indexOfElementInScope(defaultScope, a.Form)
|
|
|
|
|
if i == -1 {
|
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
p.generateImpliedEndTags()
|
|
|
|
|
if p.oe[i].DataAtom != a.Form {
|
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
p.popUntil(defaultScope, a.Form)
|
|
|
|
|
} else {
|
|
|
|
|
node := p.form
|
|
|
|
|
p.form = nil
|
|
|
|
|
i := p.indexOfElementInScope(defaultScope, a.Form)
|
|
|
|
|
if node == nil || i == -1 || p.oe[i] != node {
|
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
p.generateImpliedEndTags()
|
|
|
|
|
p.oe.remove(node)
|
|
|
|
|
}
|
|
|
|
|
p.generateImpliedEndTags()
|
|
|
|
|
p.oe.remove(node)
|
|
|
|
|
case a.P:
|
|
|
|
|
if !p.elementInScope(buttonScope, a.P) {
|
|
|
|
|
p.parseImpliedToken(StartTagToken, a.P, a.P.String())
|
|
|
|
@ -1022,6 +1134,8 @@ func inBodyIM(p *parser) bool {
|
|
|
|
|
case a.Br:
|
|
|
|
|
p.tok.Type = StartTagToken
|
|
|
|
|
return false
|
|
|
|
|
case a.Template:
|
|
|
|
|
return inHeadIM(p)
|
|
|
|
|
default:
|
|
|
|
|
p.inBodyEndTagOther(p.tok.DataAtom)
|
|
|
|
|
}
|
|
|
|
@ -1030,6 +1144,21 @@ func inBodyIM(p *parser) bool {
|
|
|
|
|
Type: CommentNode,
|
|
|
|
|
Data: p.tok.Data,
|
|
|
|
|
})
|
|
|
|
|
case ErrorToken:
|
|
|
|
|
// TODO: remove this divergence from the HTML5 spec.
|
|
|
|
|
if len(p.templateStack) > 0 {
|
|
|
|
|
p.im = inTemplateIM
|
|
|
|
|
return false
|
|
|
|
|
} else {
|
|
|
|
|
for _, e := range p.oe {
|
|
|
|
|
switch e.DataAtom {
|
|
|
|
|
case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
|
|
|
|
|
a.Thead, a.Tr, a.Body, a.Html:
|
|
|
|
|
default:
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true
|
|
|
|
@ -1160,7 +1289,7 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
|
|
|
|
|
// "Any other end tag" handling from 12.2.5.5 The rules for parsing tokens in foreign content
|
|
|
|
|
// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
|
|
|
|
|
// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
|
|
|
|
|
func (p *parser) inBodyEndTagOther(tagAtom a.Atom) {
|
|
|
|
|
for i := len(p.oe) - 1; i >= 0; i-- {
|
|
|
|
@ -1174,7 +1303,7 @@ func (p *parser) inBodyEndTagOther(tagAtom a.Atom) {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.8.
|
|
|
|
|
// Section 12.2.6.4.8.
|
|
|
|
|
func textIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case ErrorToken:
|
|
|
|
@ -1203,12 +1332,9 @@ func textIM(p *parser) bool {
|
|
|
|
|
return p.tok.Type == EndTagToken
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.9.
|
|
|
|
|
// Section 12.2.6.4.9.
|
|
|
|
|
func inTableIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case ErrorToken:
|
|
|
|
|
// Stop parsing.
|
|
|
|
|
return true
|
|
|
|
|
case TextToken:
|
|
|
|
|
p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
|
|
|
|
|
switch p.oe.top().DataAtom {
|
|
|
|
@ -1249,7 +1375,7 @@ func inTableIM(p *parser) bool {
|
|
|
|
|
}
|
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true
|
|
|
|
|
case a.Style, a.Script:
|
|
|
|
|
case a.Style, a.Script, a.Template:
|
|
|
|
|
return inHeadIM(p)
|
|
|
|
|
case a.Input:
|
|
|
|
|
for _, t := range p.tok.Attr {
|
|
|
|
@ -1261,7 +1387,7 @@ func inTableIM(p *parser) bool {
|
|
|
|
|
}
|
|
|
|
|
// Otherwise drop down to the default action.
|
|
|
|
|
case a.Form:
|
|
|
|
|
if p.form != nil {
|
|
|
|
|
if p.oe.contains(a.Template) || p.form != nil {
|
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
@ -1291,6 +1417,8 @@ func inTableIM(p *parser) bool {
|
|
|
|
|
case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
|
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true
|
|
|
|
|
case a.Template:
|
|
|
|
|
return inHeadIM(p)
|
|
|
|
|
}
|
|
|
|
|
case CommentToken:
|
|
|
|
|
p.addChild(&Node{
|
|
|
|
@ -1301,6 +1429,8 @@ func inTableIM(p *parser) bool {
|
|
|
|
|
case DoctypeToken:
|
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true
|
|
|
|
|
case ErrorToken:
|
|
|
|
|
return inBodyIM(p)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
p.fosterParenting = true
|
|
|
|
@ -1309,7 +1439,7 @@ func inTableIM(p *parser) bool {
|
|
|
|
|
return inBodyIM(p)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.11.
|
|
|
|
|
// Section 12.2.6.4.11.
|
|
|
|
|
func inCaptionIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case StartTagToken:
|
|
|
|
@ -1355,7 +1485,7 @@ func inCaptionIM(p *parser) bool {
|
|
|
|
|
return inBodyIM(p)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.12.
|
|
|
|
|
// Section 12.2.6.4.12.
|
|
|
|
|
func inColumnGroupIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case TextToken:
|
|
|
|
@ -1386,11 +1516,13 @@ func inColumnGroupIM(p *parser) bool {
|
|
|
|
|
p.oe.pop()
|
|
|
|
|
p.acknowledgeSelfClosingTag()
|
|
|
|
|
return true
|
|
|
|
|
case a.Template:
|
|
|
|
|
return inHeadIM(p)
|
|
|
|
|
}
|
|
|
|
|
case EndTagToken:
|
|
|
|
|
switch p.tok.DataAtom {
|
|
|
|
|
case a.Colgroup:
|
|
|
|
|
if p.oe.top().DataAtom != a.Html {
|
|
|
|
|
if p.oe.top().DataAtom == a.Colgroup {
|
|
|
|
|
p.oe.pop()
|
|
|
|
|
p.im = inTableIM
|
|
|
|
|
}
|
|
|
|
@ -1398,17 +1530,21 @@ func inColumnGroupIM(p *parser) bool {
|
|
|
|
|
case a.Col:
|
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true
|
|
|
|
|
case a.Template:
|
|
|
|
|
return inHeadIM(p)
|
|
|
|
|
}
|
|
|
|
|
case ErrorToken:
|
|
|
|
|
return inBodyIM(p)
|
|
|
|
|
}
|
|
|
|
|
if p.oe.top().DataAtom != a.Html {
|
|
|
|
|
p.oe.pop()
|
|
|
|
|
p.im = inTableIM
|
|
|
|
|
return false
|
|
|
|
|
if p.oe.top().DataAtom != a.Colgroup {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return true
|
|
|
|
|
p.oe.pop()
|
|
|
|
|
p.im = inTableIM
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.13.
|
|
|
|
|
// Section 12.2.6.4.13.
|
|
|
|
|
func inTableBodyIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case StartTagToken:
|
|
|
|
@ -1460,7 +1596,7 @@ func inTableBodyIM(p *parser) bool {
|
|
|
|
|
return inTableIM(p)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.14.
|
|
|
|
|
// Section 12.2.6.4.14.
|
|
|
|
|
func inRowIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case StartTagToken:
|
|
|
|
@ -1511,7 +1647,7 @@ func inRowIM(p *parser) bool {
|
|
|
|
|
return inTableIM(p)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.15.
|
|
|
|
|
// Section 12.2.6.4.15.
|
|
|
|
|
func inCellIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case StartTagToken:
|
|
|
|
@ -1560,12 +1696,9 @@ func inCellIM(p *parser) bool {
|
|
|
|
|
return inBodyIM(p)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.16.
|
|
|
|
|
// Section 12.2.6.4.16.
|
|
|
|
|
func inSelectIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case ErrorToken:
|
|
|
|
|
// Stop parsing.
|
|
|
|
|
return true
|
|
|
|
|
case TextToken:
|
|
|
|
|
p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
|
|
|
|
|
case StartTagToken:
|
|
|
|
@ -1597,7 +1730,7 @@ func inSelectIM(p *parser) bool {
|
|
|
|
|
p.tokenizer.NextIsNotRawText()
|
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true
|
|
|
|
|
case a.Script:
|
|
|
|
|
case a.Script, a.Template:
|
|
|
|
|
return inHeadIM(p)
|
|
|
|
|
}
|
|
|
|
|
case EndTagToken:
|
|
|
|
@ -1618,6 +1751,8 @@ func inSelectIM(p *parser) bool {
|
|
|
|
|
if p.popUntil(selectScope, a.Select) {
|
|
|
|
|
p.resetInsertionMode()
|
|
|
|
|
}
|
|
|
|
|
case a.Template:
|
|
|
|
|
return inHeadIM(p)
|
|
|
|
|
}
|
|
|
|
|
case CommentToken:
|
|
|
|
|
p.addChild(&Node{
|
|
|
|
@ -1627,12 +1762,14 @@ func inSelectIM(p *parser) bool {
|
|
|
|
|
case DoctypeToken:
|
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true
|
|
|
|
|
case ErrorToken:
|
|
|
|
|
return inBodyIM(p)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.17.
|
|
|
|
|
// Section 12.2.6.4.17.
|
|
|
|
|
func inSelectInTableIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case StartTagToken, EndTagToken:
|
|
|
|
@ -1650,7 +1787,73 @@ func inSelectInTableIM(p *parser) bool {
|
|
|
|
|
return inSelectIM(p)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.18.
|
|
|
|
|
// Section 12.2.6.4.18.
|
|
|
|
|
func inTemplateIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case TextToken, CommentToken, DoctypeToken:
|
|
|
|
|
return inBodyIM(p)
|
|
|
|
|
case StartTagToken:
|
|
|
|
|
switch p.tok.DataAtom {
|
|
|
|
|
case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
|
|
|
|
|
return inHeadIM(p)
|
|
|
|
|
case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
|
|
|
|
|
p.templateStack.pop()
|
|
|
|
|
p.templateStack = append(p.templateStack, inTableIM)
|
|
|
|
|
p.im = inTableIM
|
|
|
|
|
return false
|
|
|
|
|
case a.Col:
|
|
|
|
|
p.templateStack.pop()
|
|
|
|
|
p.templateStack = append(p.templateStack, inColumnGroupIM)
|
|
|
|
|
p.im = inColumnGroupIM
|
|
|
|
|
return false
|
|
|
|
|
case a.Tr:
|
|
|
|
|
p.templateStack.pop()
|
|
|
|
|
p.templateStack = append(p.templateStack, inTableBodyIM)
|
|
|
|
|
p.im = inTableBodyIM
|
|
|
|
|
return false
|
|
|
|
|
case a.Td, a.Th:
|
|
|
|
|
p.templateStack.pop()
|
|
|
|
|
p.templateStack = append(p.templateStack, inRowIM)
|
|
|
|
|
p.im = inRowIM
|
|
|
|
|
return false
|
|
|
|
|
default:
|
|
|
|
|
p.templateStack.pop()
|
|
|
|
|
p.templateStack = append(p.templateStack, inBodyIM)
|
|
|
|
|
p.im = inBodyIM
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
case EndTagToken:
|
|
|
|
|
switch p.tok.DataAtom {
|
|
|
|
|
case a.Template:
|
|
|
|
|
return inHeadIM(p)
|
|
|
|
|
default:
|
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
case ErrorToken:
|
|
|
|
|
if !p.oe.contains(a.Template) {
|
|
|
|
|
// Ignore the token.
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
// TODO: remove this divergence from the HTML5 spec.
|
|
|
|
|
//
|
|
|
|
|
// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
|
|
|
|
|
p.generateImpliedEndTags()
|
|
|
|
|
for i := len(p.oe) - 1; i >= 0; i-- {
|
|
|
|
|
if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
|
|
|
|
|
p.oe = p.oe[:i]
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
p.clearActiveFormattingElements()
|
|
|
|
|
p.templateStack.pop()
|
|
|
|
|
p.resetInsertionMode()
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.6.4.19.
|
|
|
|
|
func afterBodyIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case ErrorToken:
|
|
|
|
@ -1688,7 +1891,7 @@ func afterBodyIM(p *parser) bool {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.19.
|
|
|
|
|
// Section 12.2.6.4.20.
|
|
|
|
|
func inFramesetIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case CommentToken:
|
|
|
|
@ -1738,7 +1941,7 @@ func inFramesetIM(p *parser) bool {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.20.
|
|
|
|
|
// Section 12.2.6.4.21.
|
|
|
|
|
func afterFramesetIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case CommentToken:
|
|
|
|
@ -1777,7 +1980,7 @@ func afterFramesetIM(p *parser) bool {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.21.
|
|
|
|
|
// Section 12.2.6.4.22.
|
|
|
|
|
func afterAfterBodyIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case ErrorToken:
|
|
|
|
@ -1806,7 +2009,7 @@ func afterAfterBodyIM(p *parser) bool {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.4.22.
|
|
|
|
|
// Section 12.2.6.4.23.
|
|
|
|
|
func afterAfterFramesetIM(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case CommentToken:
|
|
|
|
@ -1844,7 +2047,7 @@ func afterAfterFramesetIM(p *parser) bool {
|
|
|
|
|
|
|
|
|
|
const whitespaceOrNUL = whitespace + "\x00"
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.5.
|
|
|
|
|
// Section 12.2.6.5
|
|
|
|
|
func parseForeignContent(p *parser) bool {
|
|
|
|
|
switch p.tok.Type {
|
|
|
|
|
case TextToken:
|
|
|
|
@ -1924,7 +2127,7 @@ func parseForeignContent(p *parser) bool {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Section 12.2.5.
|
|
|
|
|
// Section 12.2.6.
|
|
|
|
|
func (p *parser) inForeignContent() bool {
|
|
|
|
|
if len(p.oe) == 0 {
|
|
|
|
|
return false
|
|
|
|
@ -2012,6 +2215,15 @@ func (p *parser) parse() error {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Parse returns the parse tree for the HTML from the given Reader.
|
|
|
|
|
//
|
|
|
|
|
// It implements the HTML5 parsing algorithm
|
|
|
|
|
// (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
|
|
|
|
|
// which is very complicated. The resultant tree can contain implicitly created
|
|
|
|
|
// nodes that have no explicit <tag> listed in r's data, and nodes' parents can
|
|
|
|
|
// differ from the nesting implied by a naive processing of start and end
|
|
|
|
|
// <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
|
|
|
|
|
// with no corresponding node in the resulting tree.
|
|
|
|
|
//
|
|
|
|
|
// The input is assumed to be UTF-8 encoded.
|
|
|
|
|
func Parse(r io.Reader) (*Node, error) {
|
|
|
|
|
p := &parser{
|
|
|
|
@ -2033,6 +2245,8 @@ func Parse(r io.Reader) (*Node, error) {
|
|
|
|
|
// ParseFragment parses a fragment of HTML and returns the nodes that were
|
|
|
|
|
// found. If the fragment is the InnerHTML for an existing element, pass that
|
|
|
|
|
// element in context.
|
|
|
|
|
//
|
|
|
|
|
// It has the same intricacies as Parse.
|
|
|
|
|
func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
|
|
|
|
|
contextTag := ""
|
|
|
|
|
if context != nil {
|
|
|
|
@ -2064,6 +2278,9 @@ func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
|
|
|
|
|
}
|
|
|
|
|
p.doc.AppendChild(root)
|
|
|
|
|
p.oe = nodeStack{root}
|
|
|
|
|
if context != nil && context.DataAtom == a.Template {
|
|
|
|
|
p.templateStack = append(p.templateStack, inTemplateIM)
|
|
|
|
|
}
|
|
|
|
|
p.resetInsertionMode()
|
|
|
|
|
|
|
|
|
|
for n := context; n != nil; n = n.Parent {
|
|
|
|
|