documentation pass done, ready to use the HTML Checker in other code

2025-11-03 15:20:41 -07:00
parent 4f9cdde1f2
commit 1ba02f37e9
7 changed files with 255 additions and 132 deletions
@@ -12,7 +12,6 @@ package htmlcheck
 import (
 	"errors"
 	"fmt"
 	"maps"
 	"net/url"
 	"strings"
 	"unicode"
@@ -23,24 +22,38 @@ import (
 	log "github.com/sirupsen/logrus"
 )
 /*----------------------------------------------------------------------------
 * External definitions
 *----------------------------------------------------------------------------
 */
 // HTMLChecker is a component that checks HTML and reformats it as needed.
 type HTMLChecker interface {
-	Append(string) error
+	Append(string) error               // add additional string to the checker state
-	Finish() error
+	Finish() error                     // finish parsing HTML
-	Reset()
+	Reset()                            // clear state
-	Value() (string, error)
+	Value() (string, error)            // return value
-	Length() (int, error)
+	Length() (int, error)              // return text length
-	Lines() (int, error)
+	Lines() (int, error)               // return number of lines
-	Counter(string) (int, error)
+	Counter(string) (int, error)       // return value of a counter
-	GetContext(string) any
+	GetContext(string) any             // get a context value
-	SetContext(string, any)
+	SetContext(string, any)            // set a context value
-	ExternalRefs() ([]*url.URL, error)
+	ExternalRefs() ([]*url.URL, error) // return a list of external references
-	InternalRefs() ([]string, error)
+	InternalRefs() ([]string, error)   // return a list of internal references
 }
 // ErrAlreadyFinished is a common error that's returned if the checker has been finished when it shouldn't be.
 var ErrAlreadyFinished = errors.New("the HTML checker has already finished")
 // ErrNotYetFinished is a common error that's returned if the checker has not been finished when it should be.
 var ErrNotYetFinished = errors.New("the HTML checker has not yet been finished")
 /*----------------------------------------------------------------------------
 * Internal definitions
 *----------------------------------------------------------------------------
 */
 // htmlCheckerBackend is an interface used by subcomponents to communicate back to the HTML checker.
 type htmlCheckerBackend interface {
 	getCheckerAttrValue(string) string
 	sendTagMessage(string)
@@ -66,33 +79,40 @@ const htmlMarginSlop = 5
 // hyphApos is used to find hyphens and apostrophes.
 const hyphApos = "-'"
 // htmlCheckerImpl is the implementation of the HTML checker.
 type htmlCheckerImpl struct {
-	config             *HTMLCheckerConfig
+	config             *HTMLCheckerConfig           // pointer to configuration
-	started            bool
+	started            bool                         // has checker been started?
-	finished           bool
+	finished           bool                         // has checker been finished?
-	state              int
+	state              int                          // current state
-	quoteChar          byte
+	quoteChar          byte                         // quote character to match in stateTagQuote
-	parenLevel         int
+	parenLevel         int                          // parenthesis level in stateParen
-	columns            int
+	columns            int                          // current column position - runes, not bytes!
-	lines              int
+	lines              int                          // lines of text
-	noBreakCount       int
+	noBreakCount       int                          // current NOBR nesting count
-	triggerWBR         bool
+	triggerWBR         bool                         // do we need to trigger a word break?
-	outputBuffer       strings.Builder
+	outputBuffer       strings.Builder              // output is gathered here
-	tempBuffer         strings.Builder
+	tempBuffer         strings.Builder              // input is gathered here within a state and flushed on transition
-	tagStack           *util.Stack[*tag]
+	tagStack           *util.Stack[*tag]            // keeps track of nested HTML tags
-	counters           map[string]*countingRewriter
+	counters           map[string]*countingRewriter // counters for times rewrites have happened
-	stringRewriters    []rewriter
+	stringRewriters    []rewriter                   // loaded string rewriters
-	wordRewriters      []rewriter
+	wordRewriters      []rewriter                   // loaded word rewriters
-	tagRewriters       []rewriter
+	tagRewriters       []rewriter                   // loaded tag rewriters
-	parenRewriters     []rewriter
+	parenRewriters     []rewriter                   // loaded parenthesis rewriters
-	outputFilters      []outputFilter
+	outputFilters      []outputFilter               // loaded standard output filters
-	rawOutputFilters   []outputFilter
+	rawOutputFilters   []outputFilter               // loaded "raw" output filters
-	contextData        map[string]any
+	contextData        map[string]any               // holds context values
-	externalReferences map[*url.URL]bool
+	externalReferences map[*url.URL]bool            // saved external references
-	internalReferences map[string]bool
+	internalReferences map[string]bool              // saved internal references
-	tagSet             *bitset.BitSet
+	tagSet             *bitset.BitSet               // set of valid tags from configuration
 }
 /*----------------------------------------------------------------------------
 * Construction helpers
 *----------------------------------------------------------------------------
 */
 // copyRewriters looks up all rewriters in the source array and builds a target array.
 func (ht *htmlCheckerImpl) copyRewriters(dest []rewriter, source []string) {
 	for i := range source {
 		rw, ok := rewriterRegistry[source[i]]
@@ -109,6 +129,7 @@ func (ht *htmlCheckerImpl) copyRewriters(dest []rewriter, source []string) {
 	}
 }
 // copyOutputFilters looks up all output filters in the source array and builds a target array.
 func (ht *htmlCheckerImpl) copyOutputFilters(dest []outputFilter, source []string) {
 	for i := range source {
 		f, ok := outputFilterRegistry[source[i]]
@@ -120,6 +141,18 @@ func (ht *htmlCheckerImpl) copyOutputFilters(dest []outputFilter, source []strin
 	}
 }
 /*----------------------------------------------------------------------------
 * The construction function
 *----------------------------------------------------------------------------
 */
 /* AmNewHTMLChecker creates a new HTML Checker object.
 * Parametrers:
 *     configName - Name of the configuration to use.
 * Returns:
 *     New HTML checker reference.
 *     Standard Go error status.
 */
 func AmNewHTMLChecker(configName string) (HTMLChecker, error) {
 	config, ok := configsRegistry[configName]
 	if !ok {
@@ -161,6 +194,12 @@ func AmNewHTMLChecker(configName string) (HTMLChecker, error) {
 	return &rc, nil
 }
 /*----------------------------------------------------------------------------
 * Implementations from htmlCheckerBackend and rewriterServices
 *----------------------------------------------------------------------------
 */
 // getCheckerAttrValue returns the value of an HTML checker attribute.
 func (ht *htmlCheckerImpl) getCheckerAttrValue(name string) string {
 	if name == "ANCHORTAIL" {
 		return ht.config.AnchorTail
@@ -168,6 +207,7 @@ func (ht *htmlCheckerImpl) getCheckerAttrValue(name string) string {
 	return ""
 }
 // sendTagMessage offers specific HTML tags a way to send messages to affect the HTML checker's state.
 func (ht *htmlCheckerImpl) sendTagMessage(msg string) {
 	switch msg {
 	case "NOBR":
@@ -179,26 +219,37 @@ func (ht *htmlCheckerImpl) sendTagMessage(msg string) {
 	}
 }
 // getCheckerContextValue returns a context value set on the HTML checker.
 func (ht *htmlCheckerImpl) getCheckerContextValue(name string) any {
 	return ht.contextData[name]
 }
 // addExternalRef adds an external reference to the checker's logs.
 func (ht *htmlCheckerImpl) addExternalRef(ref *url.URL) {
 	ht.externalReferences[ref] = true
 }
 // addInternalRef adds an internal reference to the checker's logs.
 func (ht *htmlCheckerImpl) addInternalRef(ref string) {
 	ht.internalReferences[ref] = true
 }
 // rewriterAttrValue returns the value of an HTML checker attribute.
 func (ht *htmlCheckerImpl) rewriterAttrValue(name string) string {
 	return ht.getCheckerAttrValue(name)
 }
 // rewriterContextValue returns a context value set on the HTML checker.
 func (ht *htmlCheckerImpl) rewriterContextValue(name string) any {
 	return ht.contextData[name]
 }
 /*----------------------------------------------------------------------------
 * Internal functions forming the meat of the parser
 *----------------------------------------------------------------------------
 */
 // emitRune emits a rune to the output buffer, respecting the specified output filters.
 func (ht *htmlCheckerImpl) emitRune(ch rune, filters []outputFilter, countCols bool) {
 	handled := false
 	if len(filters) > 0 {
@@ -209,28 +260,26 @@ func (ht *htmlCheckerImpl) emitRune(ch rune, filters []outputFilter, countCols b
 				break // found a filter to handle it, done
 			}
 		}
 	}
 	if !handled { // output the raw character
 		ht.outputBuffer.WriteRune(ch)
 	}
 	if countCols && ht.config.WordWrap > 0 {
 		ht.columns++
 	}
 	}
 }
 // emitString emits an entire string to the output buffer, respecting the specified output filters.
 func (ht *htmlCheckerImpl) emitString(str string, filters []outputFilter, countCols bool) {
-	if str == "" {
+	if str != "" {
-		return
+		realCountCols := countCols && ht.config.WordWrap > 0
 	}
 	realCountCols := countCols && (ht.config.WordWrap > 0)
 		if len(filters) == 0 {
 			// if there are no filters, just output the whole thing
 			ht.outputBuffer.WriteString(str)
 			if realCountCols {
 				ht.columns += utf8.RuneCountInString(str)
 			}
-		return
+		} else {
 	}
 			temp := str
 			for len(temp) > 0 {
 				// We output as much of the string as we possibly can at once. Assume, for now, we'll output the whole thing.
@@ -276,8 +325,11 @@ func (ht *htmlCheckerImpl) emitString(str string, filters []outputFilter, countC
 					temp = temp[outputLen:]
 				}
 			}
 		}
 	}
 }
 // emitLineBreak emits a line break to the output.
 func (ht *htmlCheckerImpl) emitLineBreak() {
 	ht.emitString("\r\n", ht.rawOutputFilters, false)
 	if ht.config.WordWrap > 0 {
@@ -286,34 +338,38 @@ func (ht *htmlCheckerImpl) emitLineBreak() {
 	ht.lines++
 }
 // emitPossibleLineBreak emits a line break to the output, if it's warranted.
 func (ht *htmlCheckerImpl) emitPossibleLineBreak() {
 	if ht.config.WordWrap > 0 && ht.noBreakCount <= 0 && ht.columns >= ht.config.WordWrap {
 		ht.emitLineBreak()
 	}
 }
-func (ht *htmlCheckerImpl) ensureSpaceOnLine(nchars int) {
+// ensureSpaceOnLine makes sure we have enough space on the current line for a certain number of runes, adding a line break if needed.
 func (ht *htmlCheckerImpl) ensureSpaceOnLine(nrunes int) {
 	if ht.config.WordWrap > 0 && ht.noBreakCount <= 0 {
 		// add a line break if needed here
 		remainSpace := ht.config.WordWrap - ht.columns
-		if remainSpace < nchars {
+		if remainSpace < nrunes {
 			ht.emitLineBreak()
 		}
 	}
 }
 // emitMarkupData emits the markup data in the specified data structure.
 func (ht *htmlCheckerImpl) emitMarkupData(md *markupData) {
 	if !md.rescan {
-		ht.ensureSpaceOnLine(len(md.text))
+		ht.ensureSpaceOnLine(utf8.RuneCountInString(md.text))
 		ht.emitString(md.beginMarkup, ht.rawOutputFilters, false)
 		ht.emitString(md.text, ht.outputFilters, true)
 		ht.emitString(md.endMarkup, ht.rawOutputFilters, false)
 	}
 }
 // emitBrackedtedMarkupData emits the marketed data in the specified data structure, with prefix and suffix runes.
 func (ht *htmlCheckerImpl) emitBracketedMarkupData(md *markupData, prefix rune, suffix rune) {
 	if !md.rescan {
-		l := len(md.text)
+		l := utf8.RuneCountInString(md.text)
 		if l > 0 {
 			l += 2
 		}
@@ -330,6 +386,7 @@ func (ht *htmlCheckerImpl) emitBracketedMarkupData(md *markupData, prefix rune,
 	}
 }
 // doFlushWhitespace flushes out all the whitespace in the temporary buffer.
 func (ht *htmlCheckerImpl) doFlushWhitespace() {
 	outputLen := ht.tempBuffer.Len()
 	if outputLen > 0 {
@@ -356,6 +413,7 @@ func (ht *htmlCheckerImpl) doFlushWhitespace() {
 	}
 }
 // doFlushNewlines flushes all the newlines that are in the temporary buffer.
 func (ht *htmlCheckerImpl) doFlushNewlines() {
 	// Measure the number of line breaks we have.
 	lineBreaks, crs := 0, 0
@@ -385,6 +443,7 @@ func (ht *htmlCheckerImpl) doFlushNewlines() {
 		}
 	}
 	// emit line breaks
 	for lineBreaks > 0 {
 		ht.emitLineBreak()
 		lineBreaks--
@@ -393,6 +452,7 @@ func (ht *htmlCheckerImpl) doFlushNewlines() {
 	ht.state = stateWhitespace
 }
 // emitFromStartOfTempBuffer emits a certain number of runes from the start of the temporary buffer.
 func (ht *htmlCheckerImpl) emitFromStartOfTempBuffer(nrunes int) {
 	if nrunes > 0 {
 		if ht.config.WordWrap > 0 && ht.noBreakCount <= 0 {
@@ -420,6 +480,7 @@ func (ht *htmlCheckerImpl) emitFromStartOfTempBuffer(nrunes int) {
 	}
 }
 // attemptRewrite attempts to apply a list of rewriters on the text, returning the first one that matches.
 func (ht *htmlCheckerImpl) attemptRewrite(rewriters []rewriter, data string) *markupData {
 	for _, r := range rewriters {
 		rc := r.Rewrite(data, ht)
@@ -430,6 +491,7 @@ func (ht *htmlCheckerImpl) attemptRewrite(rewriters []rewriter, data string) *ma
 	return nil
 }
 // doFlushString attempts to flush a string from the temporary buffer.
 func (ht *htmlCheckerImpl) doFlushString() bool {
 	md := ht.attemptRewrite(ht.stringRewriters, ht.tempBuffer.String())
 	if md != nil {
@@ -519,9 +581,11 @@ func (ht *htmlCheckerImpl) doFlushString() bool {
 	return false
 }
 // handleAsHTML attempts to handle the contents of the tag in the temporary buffer as HTML.
 func (ht *htmlCheckerImpl) handleAsHTML() bool {
 	ht.triggerWBR = false
 	tempString := ht.tempBuffer.String()
 	// Figure out where the start of the command word is.
 	startCmd := 0
 	closingTag := false
@@ -543,8 +607,7 @@ func (ht *htmlCheckerImpl) handleAsHTML() bool {
 		// command word is empty or is too long to be an HTML tag
 		return false
 	}
-	possTagName := tempString[startCmd:endCmd]
+	tagIndex, ok := tagNameToIndex[strings.ToUpper(tempString[startCmd:endCmd])]
 	tagIndex, ok := tagNameToIndex[strings.ToUpper(possTagName)]
 	if !ok {
 		// not a known HTML tag
 		return false
@@ -584,6 +647,7 @@ func (ht *htmlCheckerImpl) handleAsHTML() bool {
 	ht.emitString(realTagData, ht.rawOutputFilters, false)
 	ht.emitRune('>', ht.rawOutputFilters, false)
 	// Determine whether this tag causes a "logical line break."
 	logicalLineBreak := false
 	if ht.triggerWBR && !closingTag && ht.noBreakCount > 0 {
 		// word break is logical line break, but only within no-break tags
@@ -597,10 +661,12 @@ func (ht *htmlCheckerImpl) handleAsHTML() bool {
 	return true
 }
 // containsHTMLComment returns true if the temporary buffer contains (the start of) an HTML comment.
 func (ht *htmlCheckerImpl) containsHTMLComment() bool {
 	return ht.tempBuffer.Len() >= 3 && strings.HasPrefix(ht.tempBuffer.String(), "!--")
 }
 // containsCompleteHTMLComment returns true if the temporary buffer contains a complete HTML comment.
 func (ht *htmlCheckerImpl) containsCompleteHTMLComment() bool {
 	if ht.tempBuffer.Len() >= 5 {
 		s := ht.tempBuffer.String()
@@ -609,6 +675,7 @@ func (ht *htmlCheckerImpl) containsCompleteHTMLComment() bool {
 	return false
 }
 // containsXMLConstruct returns true if the temporary buffer contains an XML-style namespaced tag.
 func (ht *htmlCheckerImpl) containsXMLConstruct() bool {
 	tempString := ht.tempBuffer.String()
 	ptr := 0
@@ -626,19 +693,18 @@ func (ht *htmlCheckerImpl) containsXMLConstruct() bool {
 	return false
 }
 // finishTag processes and outputs the tag in the temporary buffer.
 func (ht *htmlCheckerImpl) finishTag() {
 	if ht.containsHTMLComment() {
-		if ht.containsCompleteHTMLComment() {
+		if ht.containsCompleteHTMLComment() && !ht.config.DiscardComments {
 			if !ht.config.DiscardComments {
 			// output the comment in the raw
 			ht.emitRune('<', ht.rawOutputFilters, false)
 			ht.emitString(ht.tempBuffer.String(), ht.rawOutputFilters, false)
 			ht.emitRune('>', ht.rawOutputFilters, false)
-				// clear state and retun to parsing
+			// clear state and return to parsing
 			ht.tempBuffer.Reset()
 			ht.state = stateWhitespace
 		}
 		}
 		return
 	}
 	if ht.handleAsHTML() {
@@ -680,6 +746,7 @@ func (ht *htmlCheckerImpl) finishTag() {
 	ht.parse(">")
 }
 // finishParen processes and outputs the parenthesized construct in the temporary buffer.
 func (ht *htmlCheckerImpl) finishParen() {
 	// Try to handle the element using a paren rewriter
 	md := ht.attemptRewrite(ht.parenRewriters, ht.tempBuffer.String())
@@ -708,6 +775,7 @@ func (ht *htmlCheckerImpl) finishParen() {
 	ht.parse(")")
 }
 // parse handles the meat of parsing an input string; it runs the state machine on the input.
 func (ht *htmlCheckerImpl) parse(str string) {
 	i := 0
 	for i < len(str) {
@@ -785,7 +853,7 @@ func (ht *htmlCheckerImpl) parse(str string) {
 						ht.tempBuffer.WriteByte('\\')
 					}
 				} else {
-					// just append the backslash notrmally
+					// just append the backslash normally
 					ht.tempBuffer.WriteByte(ch)
 					i++
 				}
@@ -801,7 +869,7 @@ func (ht *htmlCheckerImpl) parse(str string) {
 			case '<': // output < and stay in this state
 				ht.emitRune('<', ht.outputFilters, true)
 				i++
-			default:
+			default: // begin processing tag
 				ht.state = stateTag
 				ht.tempBuffer.WriteByte(ch)
 				i++
@@ -822,14 +890,15 @@ func (ht *htmlCheckerImpl) parse(str string) {
 			}
 		case stateParen:
 			switch ch {
-			case '(':
+			case '(': // nest parentheses one level deeper
 				ht.tempBuffer.WriteByte(ch)
 				ht.parenLevel++
 				i++
 			case ')':
 				if ht.parenLevel == 0 {
-					ht.finishParen()
+					ht.finishParen() // finish paren, changing state and recursively parsing if necessary
 				} else {
 					// nest parentheses one LESS level deeper
 					ht.tempBuffer.WriteByte(ch)
 					ht.parenLevel--
 				}
@@ -851,10 +920,23 @@ func (ht *htmlCheckerImpl) parse(str string) {
 			} else {
 				ht.doFlushNewlines()
 			}
 		default:
 			log.Fatalf("invalid parser state: %d", ht.state)
 		}
 	}
 }
 /*----------------------------------------------------------------------------
 * Implementations from the HTMLChecker interface
 *----------------------------------------------------------------------------
 */
 /* Append adds an additional string to the HTML checker data.
 * Parameters:
 *     str - The string to be added and parsed.
 * Returns:
 *     Standard Go error status.
 */
 func (ht *htmlCheckerImpl) Append(str string) error {
 	if ht.finished {
 		return ErrAlreadyFinished
@@ -868,6 +950,10 @@ func (ht *htmlCheckerImpl) Append(str string) error {
 	return nil
 }
 /* Finish completes the HTML checker parsing and makes the result available.
 * Returns:
 *     Standard Go error status.
 */
 func (ht *htmlCheckerImpl) Finish() error {
 	if ht.finished {
 		return ErrAlreadyFinished
@@ -900,6 +986,7 @@ func (ht *htmlCheckerImpl) Finish() error {
 			}
 			running = true
 		case stateParen:
 			// we won't finish this, so it's automatically rejected
 			rejection := ht.tempBuffer.String()
 			ht.tempBuffer.Reset()
 			ht.tempBuffer.WriteByte('(')
@@ -923,6 +1010,7 @@ func (ht *htmlCheckerImpl) Finish() error {
 	return nil
 }
 // Reset clears the internal state of the HTML Checker.
 func (ht *htmlCheckerImpl) Reset() {
 	ht.started = false
 	ht.finished = false
@@ -933,17 +1021,20 @@ func (ht *htmlCheckerImpl) Reset() {
 	ht.lines = 0
 	ht.parenLevel = 0
 	ht.outputBuffer.Reset()
 	ht.tempBuffer.Reset()
 	ht.tagStack.Clear()
 	for u := range ht.externalReferences {
 		delete(ht.externalReferences, u)
 	}
 	for k := range ht.internalReferences {
 		delete(ht.internalReferences, k)
 	}
-	for c := range maps.Values(ht.counters) {
+	for _, c := range ht.counters {
 		c.Reset()
 	}
 }
 // Value returns the value of the output from the HTML Checker.
 func (ht *htmlCheckerImpl) Value() (string, error) {
 	if ht.finished {
 		return ht.outputBuffer.String(), nil
@@ -951,6 +1042,7 @@ func (ht *htmlCheckerImpl) Value() (string, error) {
 	return "", ErrNotYetFinished
 }
 // Length returns the length in bytes of the HTML Checker result.
 func (ht *htmlCheckerImpl) Length() (int, error) {
 	if ht.finished {
 		return ht.outputBuffer.Len(), nil
@@ -958,6 +1050,7 @@ func (ht *htmlCheckerImpl) Length() (int, error) {
 	return 0, ErrNotYetFinished
 }
 // Lines returns the number of lines of text in the HTML Checker result.
 func (ht *htmlCheckerImpl) Lines() (int, error) {
 	if ht.finished {
 		return ht.lines, nil
@@ -965,6 +1058,7 @@ func (ht *htmlCheckerImpl) Lines() (int, error) {
 	return 0, ErrNotYetFinished
 }
 // Counter returns the value of a counter maintained by the HTML Checker (corresponding to a rewriter).
 func (ht *htmlCheckerImpl) Counter(name string) (int, error) {
 	if ht.finished {
 		cr, ok := ht.counters[name]
@@ -976,19 +1070,22 @@ func (ht *htmlCheckerImpl) Counter(name string) (int, error) {
 	return 0, ErrNotYetFinished
 }
 // GetContext returns an HTML checker context value.
 func (ht *htmlCheckerImpl) GetContext(name string) any {
 	return ht.contextData[name]
 }
 // SetContext sets an HTML checker context value.
 func (ht *htmlCheckerImpl) SetContext(name string, value any) {
 	ht.contextData[name] = value
 }
 // ExternalRefs returns a list of URLs as external references in the parsed text.
 func (ht *htmlCheckerImpl) ExternalRefs() ([]*url.URL, error) {
 	if ht.finished {
 		rc := make([]*url.URL, len(ht.externalReferences))
 		p := 0
-		for url := range maps.Keys(ht.externalReferences) {
+		for url := range ht.externalReferences {
 			rc[p] = url
 			p++
 		}
@@ -997,11 +1094,12 @@ func (ht *htmlCheckerImpl) ExternalRefs() ([]*url.URL, error) {
 	return nil, ErrNotYetFinished
 }
 // InternalRefs returns a list of internal references in the parsed text.
 func (ht *htmlCheckerImpl) InternalRefs() ([]string, error) {
 	if ht.finished {
 		rc := make([]string, len(ht.internalReferences))
 		p := 0
-		for s := range maps.Keys(ht.internalReferences) {
+		for s := range ht.internalReferences {
 			rc[p] = s
 			p++
 		}
@@ -42,6 +42,7 @@ type HTMLCheckerConfigFile struct {
 	Configs []HTMLCheckerConfig `yaml:"configs"`
 }
 // defaultAnchorTail is the default value of the anchor tail.
 const defaultAnchorTail = "TARGET=\"Wander\""
 //go:embed configs.yaml
@@ -32,7 +32,7 @@ type EmoticonConfig struct {
 	emos        map[string]*EmoticonDef
 }
-// emoticonRewriter is the implementation of rewriter in this file
+// emoticonRewriter is the implementation of rewriter in this file.
 type emoticonRewriter struct {
 	config      *EmoticonConfig
 	prefixChars []byte
@@ -33,7 +33,7 @@ type htmlEncodingFilter struct{}
 // htmlEscapedChars is a list of HTML characters that are escaped.
 const htmlEscapedChars = "<>&"
-// tryOutputCharacter outputs a character that needs to be escaped.
+// tryOutputRune outputs a rune that needs to be escaped.
 func (f *htmlEncodingFilter) tryOutputRune(buf strings.Builder, ch rune) bool {
 	switch ch {
 	case '<':
@@ -38,10 +38,10 @@ const (
 	tagSetNSCPForms         = 16 // Netscape form tags
 	tagSetNSCPBlockFormat   = 17 // Netscape block-formatting tags
 	tagSetNSCPServer        = 18 // the Netscape <SERVER> tag
-	tagSetMSFTDocFormat     = 19 // Microsoft-specific document formatting
+	tagSetMSFTDocFormat     = 19 // Micro$oft-specific document formatting
-	tagSetMSFTInlineFormat  = 20 // Microsoft-specific inline formatting
+	tagSetMSFTInlineFormat  = 20 // Micro$oft-specific inline formatting
-	tagSetMSFTBlockFormat   = 21 // Microsoft-specific block formatting
+	tagSetMSFTBlockFormat   = 21 // Micro$oft-specific block formatting
-	tagSetMSFTActiveContent = 22 // Microsoft-specific active content
+	tagSetMSFTActiveContent = 22 // Micro$oft-specific active content
 	tagSetServerPage        = 23 // server-side page use
 	tagSetJavaServer        = 24 // Java server page use
 	tagSetComment           = 25 // HTML comments
@@ -54,14 +54,14 @@ type rewriteContentsFunc func(*tag, string, bool, htmlCheckerBackend) string
 // tag is a structure describing a particular HTML tag.
 type tag struct {
-	name        string
+	name        string              // tag name, upper case
-	index       int
+	index       int                 // index in the array
-	lineBreak   bool
+	lineBreak   bool                // does the tag cause line breaks?
-	allowClose  bool
+	allowClose  bool                // is a close form of the tag allowed?
-	balanceTags bool
+	balanceTags bool                // do we need to balance open and close tags?
-	clb         causeLineBreakFunc
+	clb         causeLineBreakFunc  // does this tag cause line breaks?
-	ct          closingTagFunc
+	ct          closingTagFunc      // generate closing tag
-	rwc         rewriteContentsFunc
+	rwc         rewriteContentsFunc // rewrite the contents if necessary
 }
 // causeLineBreak returns true if the tag causes a line break.
@@ -43,6 +43,7 @@ func (stk *Stack[T]) Peek() (T, bool) {
 	return stk.elements[len(stk.elements)-1], true
 }
 // RemoveMostRecent looks for the most recent particular data element on the stack, and removes that.
 func (stk *Stack[T]) RemoveMostRecent(data T) bool {
 	i := len(stk.elements) - 1
 	for i >= 0 {
@@ -58,10 +59,16 @@ func (stk *Stack[T]) RemoveMostRecent(data T) bool {
 			}
 			return true
 		}
 		i--
 	}
 	return false
 }
 // Clear clears out the stack.
 func (stk *Stack[T]) Clear() {
 	stk.elements = make([]T, 0)
 }
 // NewStack creates and returns a new stack.
 func NewStack[T comparable]() *Stack[T] {
 	return &Stack[T]{
@@ -94,10 +94,18 @@ func RunesToBytes(s string, runeCount int) int {
 	return bp
 }
 // IsRuneWord returns true if the given rune is part of a word.
 func IsRuneWord(ch rune) bool {
 	return unicode.IsLetter(ch) || ch == '-' || ch == '\''
 }
 /* WordRunLength calculates the number of runes at the start of the string that are either word or non-word characters.
 * Parameters:
 *     s - The string under test.
 * Returns:
 *     The run length in runes.
 *     true if the run is a length of word characters, false if it's a run of non-word characters.
 */
 func WordRunLength(s string) (int, bool) {
 	c1, initLen := utf8.DecodeRuneInString(s)
 	wordChar := IsRuneWord(c1)
@@ -111,6 +119,15 @@ func WordRunLength(s string) (int, bool) {
 	return rlen, wordChar
 }
 /* WordRunLengthAfterPrefix calculates the number of runes after a certain number in the string
 * that are either word or non-word characters.
 * Parameters:
 *     s - The string under test.
 *     nrunes - The number of runes to skip at the start of the string.
 * Returns:
 *     The run length in runes.
 *     true if the run is a length of word characters, false if it's a run of non-word characters.
 */
 func WordRunLengthAfterPrefix(s string, nrunes int) (int, bool) {
 	ofs := 0
 	for _, ch := range s {