landed the tag repository
This commit is contained in:
@@ -14,12 +14,45 @@ import (
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/bits-and-blooms/bitset"
|
||||
)
|
||||
|
||||
// Constants used to group individual HTML tags.
|
||||
const (
|
||||
tagSetInlineFormat = 1 // inline formatting
|
||||
tagSetAnchor = 2 // the <A> tag
|
||||
tagSetBlockFormat = 3 // block-level formatting
|
||||
tagSetActiveContent = 4 // active content like objects and scripts
|
||||
tagSetImageMaps = 5 // image map tags
|
||||
tagSetDocFormat = 6 // document-level formatting
|
||||
tagSetFontFormat = 7 // the <FONT> tag
|
||||
tagSetForms = 8 // form tags
|
||||
tagSetTables = 9 // table tags
|
||||
tagSetChangeMarkup = 10 // change markup (<DEL> and <INS>)
|
||||
tagSetFrames = 11 // frame tags
|
||||
tagSetImages = 12 // the <IMG> tag
|
||||
tagSetPreformat = 13 // the <PRE> tag and similar
|
||||
tagSetNSCPInlineFormat = 14 // Netscape-specific inline formatting
|
||||
tagSetNSCPLayers = 15 // Netscape layer tags
|
||||
tagSetNSCPForms = 16 // Netscape form tags
|
||||
tagSetNSCPBlockFormat = 17 // Netscape block-formatting tags
|
||||
tagSetNSCPServer = 18 // the Netscape <SERVER> tag
|
||||
tagSetMSFTDocFormat = 19 // Microsoft-specific document formatting
|
||||
tagSetMSFTInlineFormat = 20 // Microsoft-specific inline formatting
|
||||
tagSetMSFTBlockFormat = 21 // Microsoft-specific block formatting
|
||||
tagSetMSFTActiveContent = 22 // Microsoft-specific active content
|
||||
tagSetServerPage = 23 // server-side page use
|
||||
tagSetJavaServer = 24 // Java server page use
|
||||
tagSetComment = 25 // HTML comments
|
||||
)
|
||||
|
||||
// Functions used inside the tag to implement "overridden" behavior.
|
||||
type causeLineBreakFunc func(*tag, bool) bool
|
||||
type closingTagFunc func(*tag) string
|
||||
type rewriteContentsFunc func(*tag, string, bool, htmlCheckerBackend) string
|
||||
|
||||
// tag is a structure describing a particular HTML tag.
|
||||
type tag struct {
|
||||
name string
|
||||
index int
|
||||
@@ -31,6 +64,7 @@ type tag struct {
|
||||
rwc rewriteContentsFunc
|
||||
}
|
||||
|
||||
// causeLineBreak returns true if the tag causes a line break.
|
||||
func (t *tag) causeLineBreak(isClosing bool) bool {
|
||||
if t.clb == nil {
|
||||
return t.lineBreak
|
||||
@@ -38,6 +72,7 @@ func (t *tag) causeLineBreak(isClosing bool) bool {
|
||||
return t.clb(t, isClosing)
|
||||
}
|
||||
|
||||
// makeClosingTag creates a closing tag for this one.
|
||||
func (t *tag) makeClosingTag() string {
|
||||
if t.ct == nil {
|
||||
return ""
|
||||
@@ -45,6 +80,7 @@ func (t *tag) makeClosingTag() string {
|
||||
return t.ct(t)
|
||||
}
|
||||
|
||||
// rewriteContents is a hook used to rewrite the contents of the tag.
|
||||
func (t *tag) rewriteContents(contents string, isClosing bool, ctxt htmlCheckerBackend) string {
|
||||
if t.rwc == nil {
|
||||
return contents
|
||||
@@ -52,6 +88,7 @@ func (t *tag) rewriteContents(contents string, isClosing bool, ctxt htmlCheckerB
|
||||
return t.rwc(t, contents, isClosing, ctxt)
|
||||
}
|
||||
|
||||
// createSimpleTag creates a structure for a simple tag.
|
||||
func createSimpleTag(name string, brk bool) *tag {
|
||||
return &tag{
|
||||
name: strings.ToUpper(name),
|
||||
@@ -65,6 +102,7 @@ func createSimpleTag(name string, brk bool) *tag {
|
||||
}
|
||||
}
|
||||
|
||||
// createWBRTag creates a structure for a WBR (word break) tag.
|
||||
func createWBRTag() *tag {
|
||||
return &tag{
|
||||
name: "WBR",
|
||||
@@ -81,10 +119,12 @@ func createWBRTag() *tag {
|
||||
}
|
||||
}
|
||||
|
||||
// stdClosingTag is the standard way a closing tag is made.
|
||||
func stdClosingTag(tag *tag) string {
|
||||
return fmt.Sprintf("</%s>", tag.name)
|
||||
}
|
||||
|
||||
// createOpenCloseTag creates a tag that has a specific open and close form.
|
||||
func createOpenCloseTag(name string, brk bool) *tag {
|
||||
return &tag{
|
||||
name: strings.ToUpper(name),
|
||||
@@ -98,6 +138,7 @@ func createOpenCloseTag(name string, brk bool) *tag {
|
||||
}
|
||||
}
|
||||
|
||||
// createListElementTag creates a tag that is part of a list.
|
||||
func createListElementTag(name string) *tag {
|
||||
return &tag{
|
||||
name: strings.ToUpper(name),
|
||||
@@ -113,6 +154,7 @@ func createListElementTag(name string) *tag {
|
||||
}
|
||||
}
|
||||
|
||||
// createBalancedTag creates a tag that should have opens and closes inherently balanced.
|
||||
func createBalancedTag(name string, brk bool) *tag {
|
||||
return &tag{
|
||||
name: strings.ToUpper(name),
|
||||
@@ -126,6 +168,7 @@ func createBalancedTag(name string, brk bool) *tag {
|
||||
}
|
||||
}
|
||||
|
||||
// createNOBRTag creates a NOBR (no break) tag.
|
||||
func createNOBRTag() *tag {
|
||||
return &tag{
|
||||
name: "NOBR",
|
||||
@@ -146,11 +189,16 @@ func createNOBRTag() *tag {
|
||||
}
|
||||
}
|
||||
|
||||
// Patterns to be used in recognizing attributes in an <A> tag.
|
||||
var hrefPattern = regexp.MustCompile(`(?i:href\s*=)`)
|
||||
var targetPattern = regexp.MustCompile(`(?i:target\s*=)`)
|
||||
|
||||
// extractAttribute extracts an attribute value from the contents of an <A> tag.
|
||||
func extractAttribute(s string) string {
|
||||
s = strings.TrimSpace(s)
|
||||
if len(s) == 0 {
|
||||
return ""
|
||||
}
|
||||
if s[0] == '\'' || s[0] == '"' {
|
||||
p := strings.IndexByte(s[1:], s[0])
|
||||
if p < 0 {
|
||||
@@ -161,6 +209,7 @@ func extractAttribute(s string) string {
|
||||
return strings.Fields(s)[0]
|
||||
}
|
||||
|
||||
// rewriteATagContents rewrites the contents of an <A> tag.
|
||||
func rewriteATagContents(t *tag, contents string, isClosing bool, ctxt htmlCheckerBackend) string {
|
||||
if isClosing {
|
||||
return contents // don't bother checking close tag
|
||||
@@ -191,6 +240,7 @@ func rewriteATagContents(t *tag, contents string, isClosing bool, ctxt htmlCheck
|
||||
return contents + " " + tail
|
||||
}
|
||||
|
||||
// createATag creates an <A> tag.
|
||||
func createATag() *tag {
|
||||
return &tag{
|
||||
name: "A",
|
||||
@@ -203,3 +253,164 @@ func createATag() *tag {
|
||||
rwc: rewriteATagContents,
|
||||
}
|
||||
}
|
||||
|
||||
// tagNameToIndex is a mapping from tag names to indexes into the arrays.
|
||||
var tagNameToIndex = make(map[string]int)
|
||||
|
||||
// tagIndexToObject contains the actual tags.
|
||||
var tagIndexToObject = make([]*tag, 0, 50)
|
||||
|
||||
// tagIndexToSetId contains the set ID values for each tag.
|
||||
var tagIndexToSetId = make([]int, 0, 50)
|
||||
|
||||
// tagMaxLength is the maximum length of a tag name.
|
||||
var tagMaxLength = 0
|
||||
|
||||
// tagSetNameToSet is the listing of bit sets corresponding to the set names in configuration.
|
||||
var tagSetNameToSet = make(map[string]*bitset.BitSet)
|
||||
|
||||
// enshrineTag adds a tag to our internal repository structures.
|
||||
func enshrineTag(tag *tag, set int) {
|
||||
ndx := len(tagIndexToObject)
|
||||
tagIndexToObject = append(tagIndexToObject, tag)
|
||||
tagIndexToSetId = append(tagIndexToSetId, set)
|
||||
tag.index = ndx
|
||||
tagNameToIndex[tag.name] = ndx
|
||||
if len(tag.name) > tagMaxLength {
|
||||
tagMaxLength = len(tag.name)
|
||||
}
|
||||
}
|
||||
|
||||
// init actually sets up the tag repository.
|
||||
func init() {
|
||||
enshrineTag(createSimpleTag("!DOCTYPE", false), tagSetDocFormat)
|
||||
enshrineTag(createSimpleTag("%", false), tagSetServerPage)
|
||||
enshrineTag(createSimpleTag("%=", false), tagSetServerPage)
|
||||
enshrineTag(createSimpleTag("%@", false), tagSetServerPage)
|
||||
enshrineTag(createATag(), tagSetAnchor)
|
||||
enshrineTag(createBalancedTag("ABBR", false), tagSetInlineFormat)
|
||||
enshrineTag(createBalancedTag("ACRONYM", false), tagSetInlineFormat)
|
||||
enshrineTag(createBalancedTag("ADDRESS", true), tagSetBlockFormat)
|
||||
enshrineTag(createBalancedTag("APPLET", false), tagSetActiveContent)
|
||||
enshrineTag(createSimpleTag("AREA", false), tagSetImageMaps)
|
||||
enshrineTag(createBalancedTag("B", false), tagSetInlineFormat)
|
||||
enshrineTag(createSimpleTag("BASE", false), tagSetDocFormat)
|
||||
enshrineTag(createSimpleTag("BASEFONT", false), tagSetDocFormat)
|
||||
enshrineTag(createBalancedTag("BDO", false), tagSetInlineFormat)
|
||||
enshrineTag(createBalancedTag("BEAN", false), tagSetJavaServer)
|
||||
enshrineTag(createSimpleTag("BGSOUND", false), tagSetMSFTDocFormat)
|
||||
enshrineTag(createBalancedTag("BIG", false), tagSetInlineFormat)
|
||||
enshrineTag(createBalancedTag("BLINK", false), tagSetNSCPInlineFormat)
|
||||
enshrineTag(createBalancedTag("BLOCKQUOTE", true), tagSetBlockFormat)
|
||||
enshrineTag(createOpenCloseTag("BODY", false), tagSetDocFormat)
|
||||
enshrineTag(createSimpleTag("BR", true), tagSetBlockFormat)
|
||||
enshrineTag(createOpenCloseTag("BUTTON", false), tagSetForms)
|
||||
enshrineTag(createBalancedTag("CAPTION", true), tagSetTables)
|
||||
enshrineTag(createBalancedTag("CENTER", true), tagSetBlockFormat)
|
||||
enshrineTag(createBalancedTag("CITE", false), tagSetInlineFormat)
|
||||
enshrineTag(createBalancedTag("CODE", false), tagSetInlineFormat)
|
||||
enshrineTag(createSimpleTag("COL", true), tagSetTables)
|
||||
enshrineTag(createOpenCloseTag("COLGROUP", true), tagSetTables)
|
||||
enshrineTag(createBalancedTag("COMMENT", false), tagSetMSFTInlineFormat)
|
||||
enshrineTag(createListElementTag("DD"), tagSetBlockFormat)
|
||||
enshrineTag(createBalancedTag("DEL", false), tagSetChangeMarkup)
|
||||
enshrineTag(createBalancedTag("DFN", false), tagSetInlineFormat)
|
||||
enshrineTag(createBalancedTag("DIR", true), tagSetBlockFormat)
|
||||
enshrineTag(createBalancedTag("DIV", true), tagSetBlockFormat)
|
||||
enshrineTag(createBalancedTag("DL", true), tagSetBlockFormat)
|
||||
enshrineTag(createListElementTag("DT"), tagSetBlockFormat)
|
||||
enshrineTag(createBalancedTag("EM", false), tagSetInlineFormat)
|
||||
enshrineTag(createSimpleTag("EMBED", false), tagSetActiveContent)
|
||||
enshrineTag(createBalancedTag("FIELDSET", false), tagSetForms)
|
||||
enshrineTag(createBalancedTag("FONT", false), tagSetFontFormat)
|
||||
enshrineTag(createBalancedTag("FORM", false), tagSetForms)
|
||||
enshrineTag(createSimpleTag("FRAME", true), tagSetFrames)
|
||||
enshrineTag(createBalancedTag("FRAMESET", false), tagSetFrames)
|
||||
enshrineTag(createBalancedTag("H1", true), tagSetFontFormat)
|
||||
enshrineTag(createBalancedTag("H2", true), tagSetFontFormat)
|
||||
enshrineTag(createBalancedTag("H3", true), tagSetFontFormat)
|
||||
enshrineTag(createBalancedTag("H4", true), tagSetFontFormat)
|
||||
enshrineTag(createBalancedTag("H5", true), tagSetFontFormat)
|
||||
enshrineTag(createBalancedTag("H6", true), tagSetFontFormat)
|
||||
enshrineTag(createOpenCloseTag("HEAD", false), tagSetDocFormat)
|
||||
enshrineTag(createSimpleTag("HR", true), tagSetBlockFormat)
|
||||
enshrineTag(createOpenCloseTag("HTML", false), tagSetDocFormat)
|
||||
enshrineTag(createBalancedTag("I", false), tagSetInlineFormat)
|
||||
enshrineTag(createBalancedTag("IFRAME", true), tagSetFrames)
|
||||
enshrineTag(createBalancedTag("ILAYER", true), tagSetNSCPLayers)
|
||||
enshrineTag(createSimpleTag("IMG", false), tagSetImages)
|
||||
enshrineTag(createSimpleTag("INPUT", false), tagSetForms)
|
||||
enshrineTag(createBalancedTag("INS", false), tagSetChangeMarkup)
|
||||
enshrineTag(createSimpleTag("ISINDEX", false), tagSetForms)
|
||||
enshrineTag(createBalancedTag("KBD", false), tagSetInlineFormat)
|
||||
enshrineTag(createSimpleTag("KEYGEN", false), tagSetNSCPForms)
|
||||
enshrineTag(createBalancedTag("LABEL", false), tagSetForms)
|
||||
enshrineTag(createBalancedTag("LAYER", true), tagSetNSCPLayers)
|
||||
enshrineTag(createBalancedTag("LEGEND", false), tagSetForms)
|
||||
enshrineTag(createListElementTag("LI"), tagSetBlockFormat)
|
||||
enshrineTag(createSimpleTag("LINK", false), tagSetDocFormat)
|
||||
enshrineTag(createBalancedTag("LISTING", false), tagSetMSFTInlineFormat)
|
||||
enshrineTag(createBalancedTag("MAP", false), tagSetImageMaps)
|
||||
enshrineTag(createBalancedTag("MARQUEE", true), tagSetMSFTBlockFormat)
|
||||
enshrineTag(createBalancedTag("MENU", true), tagSetBlockFormat)
|
||||
enshrineTag(createSimpleTag("META", false), tagSetDocFormat)
|
||||
enshrineTag(createBalancedTag("MULTICOL", false), tagSetNSCPBlockFormat)
|
||||
enshrineTag(createNOBRTag(), tagSetBlockFormat)
|
||||
enshrineTag(createBalancedTag("NOEMBED", false), tagSetActiveContent)
|
||||
enshrineTag(createBalancedTag("NOFRAMES", false), tagSetFrames)
|
||||
enshrineTag(createBalancedTag("NOLAYER", false), tagSetNSCPLayers)
|
||||
enshrineTag(createBalancedTag("NOSCRIPT", false), tagSetActiveContent)
|
||||
enshrineTag(createBalancedTag("OBJECT", false), tagSetActiveContent)
|
||||
enshrineTag(createBalancedTag("OL", true), tagSetBlockFormat)
|
||||
enshrineTag(createBalancedTag("OPTGROUP", false), tagSetForms)
|
||||
enshrineTag(createListElementTag("OPTION"), tagSetForms)
|
||||
enshrineTag(createOpenCloseTag("P", true), tagSetBlockFormat)
|
||||
enshrineTag(createSimpleTag("PARAM", false), tagSetActiveContent)
|
||||
enshrineTag(createSimpleTag("PLAINTEXT", false), tagSetPreformat)
|
||||
enshrineTag(createBalancedTag("PRE", false), tagSetPreformat)
|
||||
enshrineTag(createBalancedTag("Q", false), tagSetInlineFormat)
|
||||
enshrineTag(createSimpleTag("RT", false), tagSetMSFTActiveContent)
|
||||
enshrineTag(createBalancedTag("RUBY", false), tagSetMSFTActiveContent)
|
||||
enshrineTag(createBalancedTag("S", false), tagSetInlineFormat)
|
||||
enshrineTag(createBalancedTag("SAMP", false), tagSetInlineFormat)
|
||||
enshrineTag(createBalancedTag("SCRIPT", false), tagSetActiveContent)
|
||||
enshrineTag(createBalancedTag("SELECT", false), tagSetForms)
|
||||
enshrineTag(createBalancedTag("SERVER", false), tagSetNSCPServer)
|
||||
enshrineTag(createBalancedTag("SERVLET", false), tagSetJavaServer)
|
||||
enshrineTag(createBalancedTag("SMALL", false), tagSetInlineFormat)
|
||||
enshrineTag(createSimpleTag("SPACER", false), tagSetNSCPInlineFormat)
|
||||
enshrineTag(createBalancedTag("SPAN", false), tagSetInlineFormat)
|
||||
enshrineTag(createBalancedTag("STRIKE", false), tagSetInlineFormat)
|
||||
enshrineTag(createBalancedTag("STRONG", false), tagSetInlineFormat)
|
||||
enshrineTag(createBalancedTag("STYLE", false), tagSetDocFormat)
|
||||
enshrineTag(createBalancedTag("SUB", false), tagSetInlineFormat)
|
||||
enshrineTag(createBalancedTag("SUP", false), tagSetInlineFormat)
|
||||
enshrineTag(createBalancedTag("TABLE", true), tagSetTables)
|
||||
enshrineTag(createOpenCloseTag("TBODY", true), tagSetTables)
|
||||
enshrineTag(createBalancedTag("TD", true), tagSetTables)
|
||||
enshrineTag(createBalancedTag("TEXTAREA", true), tagSetForms)
|
||||
enshrineTag(createOpenCloseTag("TFOOT", true), tagSetTables)
|
||||
enshrineTag(createBalancedTag("TH", true), tagSetTables)
|
||||
enshrineTag(createOpenCloseTag("THEAD", true), tagSetTables)
|
||||
enshrineTag(createBalancedTag("TITLE", false), tagSetDocFormat)
|
||||
enshrineTag(createBalancedTag("TR", true), tagSetTables)
|
||||
enshrineTag(createBalancedTag("TT", false), tagSetInlineFormat)
|
||||
enshrineTag(createBalancedTag("U", false), tagSetInlineFormat)
|
||||
enshrineTag(createBalancedTag("UL", true), tagSetBlockFormat)
|
||||
enshrineTag(createBalancedTag("VAR", false), tagSetInlineFormat)
|
||||
enshrineTag(createWBRTag(), tagSetBlockFormat)
|
||||
enshrineTag(createBalancedTag("XML", false), tagSetMSFTActiveContent)
|
||||
enshrineTag(createBalancedTag("XMP", false), tagSetNSCPInlineFormat)
|
||||
|
||||
// Create the tag sets.
|
||||
bs := bitset.New(tagSetComment + 1)
|
||||
bs.Set(tagSetInlineFormat)
|
||||
bs.Set(tagSetAnchor)
|
||||
bs.Set(tagSetBlockFormat)
|
||||
bs.Set(tagSetFontFormat)
|
||||
bs.Set(tagSetImages)
|
||||
tagSetNameToSet["normal"] = bs
|
||||
bs = bitset.New(tagSetComment + 1)
|
||||
bs.Set(tagSetInlineFormat)
|
||||
tagSetNameToSet["restricted"] = bs
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user