have HTML checker check context error in parsing loop
This commit is contained in:
+69
-31
@@ -499,16 +499,16 @@ func (ht *htmlCheckerImpl) attemptRewrite(rewriters []rewriter, data string) *ma
|
|||||||
}
|
}
|
||||||
|
|
||||||
// doFlushString attempts to flush a string from the temporary buffer.
|
// doFlushString attempts to flush a string from the temporary buffer.
|
||||||
func (ht *htmlCheckerImpl) doFlushString() bool {
|
func (ht *htmlCheckerImpl) doFlushString() (bool, error) {
|
||||||
md := ht.attemptRewrite(ht.stringRewriters, ht.tempBuffer.String())
|
md := ht.attemptRewrite(ht.stringRewriters, ht.tempBuffer.String())
|
||||||
if md != nil {
|
if md != nil {
|
||||||
ht.emitMarkupData(md)
|
ht.emitMarkupData(md)
|
||||||
ht.tempBuffer.Reset()
|
ht.tempBuffer.Reset()
|
||||||
if md.rescan {
|
if md.rescan {
|
||||||
ht.parse(md.all())
|
err := ht.parse(md.all())
|
||||||
return true
|
return true, err
|
||||||
}
|
}
|
||||||
return false
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
first := true
|
first := true
|
||||||
@@ -549,7 +549,10 @@ func (ht *htmlCheckerImpl) doFlushString() bool {
|
|||||||
// emit and/or reparse
|
// emit and/or reparse
|
||||||
ht.emitMarkupData(md)
|
ht.emitMarkupData(md)
|
||||||
if md.rescan {
|
if md.rescan {
|
||||||
ht.parse(md.all())
|
err := ht.parse(md.all())
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// just output the word normally
|
// just output the word normally
|
||||||
@@ -585,7 +588,7 @@ func (ht *htmlCheckerImpl) doFlushString() bool {
|
|||||||
}
|
}
|
||||||
first = false
|
first = false
|
||||||
}
|
}
|
||||||
return false
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// handleAsHTML attempts to handle the contents of the tag in the temporary buffer as HTML.
|
// handleAsHTML attempts to handle the contents of the tag in the temporary buffer as HTML.
|
||||||
@@ -703,7 +706,7 @@ func (ht *htmlCheckerImpl) containsXMLConstruct() bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// finishTag processes and outputs the tag in the temporary buffer.
|
// finishTag processes and outputs the tag in the temporary buffer.
|
||||||
func (ht *htmlCheckerImpl) finishTag() {
|
func (ht *htmlCheckerImpl) finishTag() error {
|
||||||
if ht.containsHTMLComment() {
|
if ht.containsHTMLComment() {
|
||||||
if ht.containsCompleteHTMLComment() && !ht.config.DiscardComments {
|
if ht.containsCompleteHTMLComment() && !ht.config.DiscardComments {
|
||||||
// output the comment in the raw
|
// output the comment in the raw
|
||||||
@@ -714,13 +717,13 @@ func (ht *htmlCheckerImpl) finishTag() {
|
|||||||
ht.tempBuffer.Reset()
|
ht.tempBuffer.Reset()
|
||||||
ht.state = stateWhitespace
|
ht.state = stateWhitespace
|
||||||
}
|
}
|
||||||
return
|
return nil
|
||||||
}
|
}
|
||||||
if ht.handleAsHTML() {
|
if ht.handleAsHTML() {
|
||||||
// this was valid HTML, we're done
|
// this was valid HTML, we're done
|
||||||
ht.tempBuffer.Reset()
|
ht.tempBuffer.Reset()
|
||||||
ht.state = stateWhitespace
|
ht.state = stateWhitespace
|
||||||
return
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// try to handle it with a tag rewriter
|
// try to handle it with a tag rewriter
|
||||||
@@ -729,19 +732,20 @@ func (ht *htmlCheckerImpl) finishTag() {
|
|||||||
ht.emitBracketedMarkupData(md, '<', '>')
|
ht.emitBracketedMarkupData(md, '<', '>')
|
||||||
ht.tempBuffer.Reset()
|
ht.tempBuffer.Reset()
|
||||||
ht.state = stateWhitespace
|
ht.state = stateWhitespace
|
||||||
|
var err error = nil
|
||||||
if md.rescan {
|
if md.rescan {
|
||||||
ht.tempBuffer.WriteByte('<')
|
ht.tempBuffer.WriteByte('<')
|
||||||
ht.state = stateChars
|
ht.state = stateChars
|
||||||
ht.parse(md.all() + ">")
|
err = ht.parse(md.all() + ">")
|
||||||
}
|
}
|
||||||
return
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if ht.config.DiscardXML && ht.containsXMLConstruct() {
|
if ht.config.DiscardXML && ht.containsXMLConstruct() {
|
||||||
// this tag is an XML construct, and needs to be discarded
|
// this tag is an XML construct, and needs to be discarded
|
||||||
ht.tempBuffer.Reset()
|
ht.tempBuffer.Reset()
|
||||||
ht.state = stateWhitespace
|
ht.state = stateWhitespace
|
||||||
return
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// This tag has been rejected! process it normally as character data
|
// This tag has been rejected! process it normally as character data
|
||||||
@@ -749,14 +753,18 @@ func (ht *htmlCheckerImpl) finishTag() {
|
|||||||
ht.tempBuffer.Reset()
|
ht.tempBuffer.Reset()
|
||||||
ht.tempBuffer.WriteByte('<')
|
ht.tempBuffer.WriteByte('<')
|
||||||
ht.state = stateChars
|
ht.state = stateChars
|
||||||
|
var err error = nil
|
||||||
if len(rejection) > 0 {
|
if len(rejection) > 0 {
|
||||||
ht.parse(rejection)
|
err = ht.parse(rejection)
|
||||||
}
|
}
|
||||||
ht.parse(">")
|
if err == nil {
|
||||||
|
err = ht.parse(">")
|
||||||
|
}
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// finishParen processes and outputs the parenthesized construct in the temporary buffer.
|
// finishParen processes and outputs the parenthesized construct in the temporary buffer.
|
||||||
func (ht *htmlCheckerImpl) finishParen() {
|
func (ht *htmlCheckerImpl) finishParen() error {
|
||||||
// Try to handle the element using a paren rewriter
|
// Try to handle the element using a paren rewriter
|
||||||
md := ht.attemptRewrite(ht.parenRewriters, ht.tempBuffer.String())
|
md := ht.attemptRewrite(ht.parenRewriters, ht.tempBuffer.String())
|
||||||
if md != nil {
|
if md != nil {
|
||||||
@@ -764,12 +772,13 @@ func (ht *htmlCheckerImpl) finishParen() {
|
|||||||
ht.tempBuffer.Reset()
|
ht.tempBuffer.Reset()
|
||||||
ht.state = stateWhitespace
|
ht.state = stateWhitespace
|
||||||
ht.parenLevel = 0
|
ht.parenLevel = 0
|
||||||
|
var err error = nil
|
||||||
if md.rescan {
|
if md.rescan {
|
||||||
ht.tempBuffer.WriteByte('(')
|
ht.tempBuffer.WriteByte('(')
|
||||||
ht.state = stateChars
|
ht.state = stateChars
|
||||||
ht.parse(md.all() + ")")
|
err = ht.parse(md.all() + ")")
|
||||||
}
|
}
|
||||||
return
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tag rejected! Process it normally as character data.
|
// Tag rejected! Process it normally as character data.
|
||||||
@@ -778,16 +787,24 @@ func (ht *htmlCheckerImpl) finishParen() {
|
|||||||
ht.tempBuffer.WriteByte('(')
|
ht.tempBuffer.WriteByte('(')
|
||||||
ht.state = stateChars
|
ht.state = stateChars
|
||||||
ht.parenLevel = 0
|
ht.parenLevel = 0
|
||||||
|
var err error = nil
|
||||||
if len(rejection) > 0 {
|
if len(rejection) > 0 {
|
||||||
ht.parse(rejection)
|
err = ht.parse(rejection)
|
||||||
}
|
}
|
||||||
ht.parse(")")
|
if err == nil {
|
||||||
|
err = ht.parse(")")
|
||||||
|
}
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// parse handles the meat of parsing an input string; it runs the state machine on the input.
|
// parse handles the meat of parsing an input string; it runs the state machine on the input.
|
||||||
func (ht *htmlCheckerImpl) parse(str string) {
|
func (ht *htmlCheckerImpl) parse(str string) error {
|
||||||
i := 0
|
i := 0
|
||||||
for i < len(str) {
|
for i < len(str) {
|
||||||
|
err := ht.ctx.Err()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
ch := str[i]
|
ch := str[i]
|
||||||
switch ht.state {
|
switch ht.state {
|
||||||
case stateWhitespace:
|
case stateWhitespace:
|
||||||
@@ -832,18 +849,27 @@ func (ht *htmlCheckerImpl) parse(str string) {
|
|||||||
case stateChars:
|
case stateChars:
|
||||||
switch ch {
|
switch ch {
|
||||||
case ' ', '\t': // go to Whitespace state
|
case ' ', '\t': // go to Whitespace state
|
||||||
ht.doFlushString()
|
_, err := ht.doFlushString()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
ht.state = stateWhitespace
|
ht.state = stateWhitespace
|
||||||
ht.tempBuffer.WriteByte(ch)
|
ht.tempBuffer.WriteByte(ch)
|
||||||
i++
|
i++
|
||||||
case '\r', '\n': // go to Newline state
|
case '\r', '\n': // go to Newline state
|
||||||
ht.doFlushString()
|
_, err := ht.doFlushString()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
ht.state = stateNewline
|
ht.state = stateNewline
|
||||||
ht.tempBuffer.WriteByte(ch)
|
ht.tempBuffer.WriteByte(ch)
|
||||||
i++
|
i++
|
||||||
case '<': // may be a start of tag
|
case '<': // may be a start of tag
|
||||||
if ht.config.Angles {
|
if ht.config.Angles {
|
||||||
ht.doFlushString()
|
_, err := ht.doFlushString()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
ht.state = stateLeftAngle
|
ht.state = stateLeftAngle
|
||||||
} else {
|
} else {
|
||||||
ht.tempBuffer.WriteByte(ch)
|
ht.tempBuffer.WriteByte(ch)
|
||||||
@@ -886,7 +912,10 @@ func (ht *htmlCheckerImpl) parse(str string) {
|
|||||||
case stateTag:
|
case stateTag:
|
||||||
switch ch {
|
switch ch {
|
||||||
case '>': // finish the tag - this changes the state, and possibly calls parse() recursively
|
case '>': // finish the tag - this changes the state, and possibly calls parse() recursively
|
||||||
ht.finishTag()
|
err := ht.finishTag()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
i++
|
i++
|
||||||
case '\'', '"': // go into "quote string" state inside the tag
|
case '\'', '"': // go into "quote string" state inside the tag
|
||||||
ht.tempBuffer.WriteByte(ch)
|
ht.tempBuffer.WriteByte(ch)
|
||||||
@@ -905,7 +934,10 @@ func (ht *htmlCheckerImpl) parse(str string) {
|
|||||||
i++
|
i++
|
||||||
case ')':
|
case ')':
|
||||||
if ht.parenLevel == 0 {
|
if ht.parenLevel == 0 {
|
||||||
ht.finishParen() // finish paren, changing state and recursively parsing if necessary
|
err := ht.finishParen() // finish paren, changing state and recursively parsing if necessary
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// nest parentheses one LESS level deeper
|
// nest parentheses one LESS level deeper
|
||||||
ht.tempBuffer.WriteByte(ch)
|
ht.tempBuffer.WriteByte(ch)
|
||||||
@@ -933,6 +965,7 @@ func (ht *htmlCheckerImpl) parse(str string) {
|
|||||||
log.Fatalf("invalid parser state: %d", ht.state)
|
log.Fatalf("invalid parser state: %d", ht.state)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------
|
/*----------------------------------------------------------------------------
|
||||||
@@ -953,10 +986,11 @@ func (ht *htmlCheckerImpl) Append(str string) error {
|
|||||||
if !ht.started {
|
if !ht.started {
|
||||||
ht.started = true
|
ht.started = true
|
||||||
}
|
}
|
||||||
|
var err error = nil
|
||||||
if str != "" {
|
if str != "" {
|
||||||
ht.parse(str)
|
err = ht.parse(str)
|
||||||
}
|
}
|
||||||
return nil
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Finish completes the HTML checker parsing and makes the result available.
|
/* Finish completes the HTML checker parsing and makes the result available.
|
||||||
@@ -975,11 +1009,12 @@ func (ht *htmlCheckerImpl) Finish() error {
|
|||||||
running := true
|
running := true
|
||||||
for running {
|
for running {
|
||||||
running = false // make sure we stop unless this is set to true
|
running = false // make sure we stop unless this is set to true
|
||||||
|
var err error = nil
|
||||||
switch ht.state {
|
switch ht.state {
|
||||||
case stateWhitespace, stateNewline:
|
case stateWhitespace, stateNewline:
|
||||||
// do nothing - discard whitespace or newlines at end
|
// do nothing - discard whitespace or newlines at end
|
||||||
case stateChars:
|
case stateChars:
|
||||||
running = ht.doFlushString() // flush the temporary buffer
|
running, err = ht.doFlushString() // flush the temporary buffer
|
||||||
case stateLeftAngle:
|
case stateLeftAngle:
|
||||||
// just emit a left angle character
|
// just emit a left angle character
|
||||||
ht.emitPossibleLineBreak()
|
ht.emitPossibleLineBreak()
|
||||||
@@ -991,21 +1026,24 @@ func (ht *htmlCheckerImpl) Finish() error {
|
|||||||
ht.tempBuffer.WriteByte('<')
|
ht.tempBuffer.WriteByte('<')
|
||||||
ht.state = stateChars
|
ht.state = stateChars
|
||||||
if len(rejection) > 0 {
|
if len(rejection) > 0 {
|
||||||
ht.parse(rejection)
|
err = ht.parse(rejection)
|
||||||
}
|
}
|
||||||
running = true
|
running = true
|
||||||
case stateParen:
|
case stateParen:
|
||||||
// we won't finish this, so it's automatically rejected
|
// we won't finish this, so it's automagically rejected
|
||||||
rejection := ht.tempBuffer.String()
|
rejection := ht.tempBuffer.String()
|
||||||
ht.tempBuffer.Reset()
|
ht.tempBuffer.Reset()
|
||||||
ht.tempBuffer.WriteByte('(')
|
ht.tempBuffer.WriteByte('(')
|
||||||
ht.state = stateChars
|
ht.state = stateChars
|
||||||
ht.parenLevel = 0
|
ht.parenLevel = 0
|
||||||
if len(rejection) > 0 {
|
if len(rejection) > 0 {
|
||||||
ht.parse(rejection)
|
err = ht.parse(rejection)
|
||||||
}
|
}
|
||||||
running = true
|
running = true
|
||||||
}
|
}
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now close all the HTML tags that were left open.
|
// Now close all the HTML tags that were left open.
|
||||||
|
|||||||
Reference in New Issue
Block a user