landed enough rewriters and filters to begin building configurations for the HTML checker

2025-10-29 22:50:25 -06:00
parent e4d7deaf5f
commit eb47b001bb
5 changed files with 614 additions and 19 deletions
@@ -0,0 +1,275 @@
+/*
+ * Amsterdam Web Communities System
+ * Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/.
+ */
+// The database package contains database management and storage logic.
+package database
+
+import (
+	"errors"
+	"math"
+	"strconv"
+	"strings"
+)
+
+// PostLinkData is the structure holding the decoded parts of the post link.
+type PostLinkData struct {
+	Community  string
+	Conference string
+	Topic      int16
+	FirstPost  int32
+	LastPost   int32
+}
+
+// NeedsDBVerification returns true if the post link data needs tro be varified against the database.
+func (d *PostLinkData) NeedsDBVerification() bool {
+	return d.Community != "" || d.Conference != ""
+}
+
+// VerifyNames verifies the post link data against the database.
+func (d *PostLinkData) VerifyNames() error {
+	if d.Community != "" {
+		comm, err := AmGetCommunityByAlias(d.Community)
+		if err != nil {
+			return err
+		}
+		if comm == nil {
+			return errors.New("community alias not found")
+		}
+	}
+	if d.Conference != "" {
+		conf, err := AmGetConferenceByAlias(d.Conference)
+		if err != nil {
+			return err
+		}
+		if conf == nil {
+			return errors.New("conference alias not found")
+		}
+	}
+	return nil
+}
+
+// Maximum lengths of the components.
+const (
+	maxLinkLength       = 130
+	maxCommunityLength  = 32
+	maxConferenceLength = 64
+)
+
+// validateCommunity validates the community name and saves it.
+func validateCommunity(name string, rc *PostLinkData) error {
+	if len(name) > maxCommunityLength {
+		return errors.New("community alias is too long")
+	}
+	if !AmIsValidAmsterdamID(name) {
+		return errors.New("community alias is not a valid identifier")
+	}
+	rc.Community = name
+	return nil
+}
+
+// validateConference validates the conference name and saves it.
+func validateConference(name string, rc *PostLinkData) error {
+	if len(name) > maxConferenceLength {
+		return errors.New("conference alias is too long")
+	}
+	if !AmIsValidAmsterdamID(name) {
+		return errors.New("conference alias is not a valid identifier")
+	}
+	rc.Conference = name
+	return nil
+}
+
+// decodeTopicNumber decodes the topic number and saves it.
+func decodeTopicNumber(data string, rc *PostLinkData) error {
+	v, err := strconv.Atoi(data)
+	if err != nil {
+		return errors.New("invalid topic number reference")
+	}
+	if v > math.MaxInt16 {
+		return errors.New("topic number out of range")
+	}
+	rc.Topic = int16(v)
+	return nil
+}
+
+// decodePostRange decodes the post ranges (first and last post) and saves them.
+func decodePostRange(data string, rc *PostLinkData) error {
+	pos := strings.IndexByte(data, '-')
+	var tempVal int32 = -1
+	if pos > 0 {
+		temp := data[:pos]
+		data = data[pos+1:]
+		v, err := strconv.Atoi(temp)
+		if err != nil {
+			return errors.New("invalid post number reference")
+		}
+		tempVal = int32(v)
+
+		if len(data) == 0 {
+			// range is open-ended (number-)
+			rc.FirstPost = tempVal
+			rc.LastPost = -1
+			return nil
+		}
+	} else if pos == 0 {
+		return errors.New("cannot have - at beginning of post range")
+	}
+
+	v2, err := strconv.Atoi(data)
+	if err != nil {
+		return errors.New("invalid post number reference")
+	}
+	rc.FirstPost = int32(v2)
+	if tempVal >= 0 {
+		if tempVal < rc.FirstPost {
+			// "frontwards" range - reorder the components
+			rc.LastPost = rc.FirstPost
+			rc.FirstPost = tempVal
+		} else {
+			// "backwards" range
+			rc.LastPost = tempVal
+		}
+	} else {
+		// a "range" of a single post
+		rc.LastPost = rc.FirstPost
+	}
+	return nil
+}
+
+/* AmDecodePostLink decodes a post link and returns the complete breakdown of its components.
+ * Parameters:
+ *     data - The post link to be decoded.
+ * Returns:
+ *     Pointer to structure containing post link data, or nil.
+ *     Standard Go error status.
+ */
+func AmDecodePostLink(data string) (*PostLinkData, error) {
+	if data == "" {
+		return nil, errors.New("empty string")
+	}
+	if len(data) > maxLinkLength {
+		return nil, errors.New("post link string too long")
+	}
+	rc := PostLinkData{
+		Community:  "",
+		Conference: "",
+		Topic:      -1,
+		FirstPost:  -1,
+		LastPost:   -1,
+	}
+
+	work := data
+	// First test: Bang
+	pos := strings.IndexByte(work, '!')
+	if pos > 0 {
+		err := validateCommunity(work[:pos], &rc)
+		if err != nil {
+			return nil, err
+		}
+		work = work[pos+1:]
+		if len(work) == 0 {
+			return &rc, nil // community link
+		}
+	} else if pos == 0 {
+		return nil, errors.New("cannot have ! at beginning")
+	}
+
+	// Second test: Dot #1
+	pos = strings.IndexByte(work, '.')
+	if pos < 0 {
+		// no dots in here, must be either "postlink" or "community!conference"
+		var err error
+		if rc.Community == "" {
+			err = decodePostRange(work, &rc)
+		} else {
+			err = validateConference(work, &rc)
+		}
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	// Peel off the initial substring before the dot.
+	confOrTopic := work[:pos]
+	work = work[pos+1:]
+	if len(work) == 0 {
+		// we had "conference." or "topic." or maybe "community!conference."
+		if rc.Community == "" {
+			// it's either "conference." or "topic." - try the latter first
+			err := decodeTopicNumber(confOrTopic, &rc)
+			if err != nil {
+				// it's not a topic number, try it as a conference name
+				err = validateConference(confOrTopic, &rc)
+			}
+			if err != nil {
+				return nil, err
+			}
+		} else {
+			// it was "community!conference."
+			err := validateConference(confOrTopic, &rc)
+			if err != nil {
+				return nil, err
+			}
+		}
+	}
+
+	// Third test: Dot #2
+	pos = strings.IndexByte(work, '.')
+	if pos < 0 {
+		// we had "conference.topic" or "topic.posts" or maybe "community!conference.topic"
+		var err error
+		if rc.Community == "" {
+			// either "conference.topic" or "topic.posts"
+			isTopic := false
+			err = decodeTopicNumber(confOrTopic, &rc)
+			if err != nil {
+				// it's "conference.topic"
+				err = validateConference(confOrTopic, &rc)
+				isTopic = true
+			}
+			if err == nil {
+				if isTopic {
+					err = decodeTopicNumber(work, &rc)
+				} else {
+					err = decodePostRange(work, &rc)
+				}
+			}
+		} else {
+			// we have "community!conference.topic"
+			err = validateConference(confOrTopic, &rc)
+			if err == nil {
+				err = decodeTopicNumber(work, &rc)
+			}
+		}
+		if err != nil {
+			return nil, err
+		}
+		return &rc, nil
+	} else if pos == 0 {
+		return nil, errors.New("cannot have . at beginning of string")
+	}
+
+	// We definitely have "conference.topic.something" or "community!conference.topic.something"
+	err := validateConference(confOrTopic, &rc)
+	if err == nil {
+		err = decodeTopicNumber(work[:pos], &rc)
+	}
+	if err != nil {
+		return nil, err
+	}
+	work = work[pos+1:]
+	if len(work) == 0 {
+		// we had "conference.topic." or "communtiy!conference.topic.", those are both valid
+		return &rc, nil
+	}
+	err = decodePostRange(work, &rc) // the rest must be the post range
+	if err != nil {
+		return nil, err
+	}
+	return &rc, nil
+}
@@ -0,0 +1,48 @@
+#
+# Amsterdam Web Communities System
+# Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved
+# 
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at https://mozilla.org/MPL/2.0/.
+#
+configs:
+  - name: "post-body"
+    wordWrap: 55
+    angles: true
+    parens: true
+    discardHTML: false
+    outputFilters:
+      - html
+    stringRewriters:
+      - emoticon
+      - email
+      - url
+    tagRewriters:
+      - emoticon_tag
+      - postlink
+      - userlink
+      - email
+      - url
+    parenRewriters:
+      - userlink
+    tagSet: normal
+  - name: "post-pseud"
+    wordWrap: 0
+    angles: true
+    parens: false
+    discardHTML: false
+    outputFilters:
+      - html
+    tagSet: restricted
+  - name: "preview"
+    wordWrap: 55
+    angles: true
+    parens: true
+    discardHTML: false
+    outputFilters:
+      - html
+    stringRewriters:
+      - emoticon
+      - email
+      - url
@@ -12,6 +12,7 @@ package htmlcheck
 import (
 	_ "embed"
 	"math"
+	"regexp"
 	"strings"

 	"gopkg.in/yaml.v3"
@@ -28,13 +29,13 @@ type EmoticonDef struct {
 type EmoticonConfig struct {
 	PrefixChars string        `yaml:"prefixChars"`
 	Emoticons   []EmoticonDef `yaml:"emoticons"`
+	emos        map[string]*EmoticonDef
 }

 // emoticonRewriter is the implementation of rewriter in this file
 type emoticonRewriter struct {
 	config      *EmoticonConfig
 	prefixChars []byte
-	emos        map[string]*EmoticonDef
 	patterns    map[string]string
 	minLength   int
 }
@@ -42,28 +43,27 @@ type emoticonRewriter struct {
 //go:embed emoticons.yaml
 var rawEmoConfig []byte

-// EmoticonRewriter is the singleton instance of the emoticon rewriter.
-var EmoticonRewriter rewriter
-
-// init loads the configuration and creates the singleton instance.
+// init loads the configuration and registers the rewriters.
 func init() {
 	var cfg EmoticonConfig
 	if err := yaml.Unmarshal(rawEmoConfig, &cfg); err != nil {
 		panic(err)
 	}
+	cfg.emos = make(map[string]*EmoticonDef)
+	for i, def := range cfg.Emoticons {
+		cfg.emos[def.Name] = &(cfg.Emoticons[i])
+	}
 	rw := emoticonRewriter{
 		config:      &cfg,
 		prefixChars: []byte(cfg.PrefixChars),
-		emos:        make(map[string]*EmoticonDef),
 		patterns:    make(map[string]string),
 		minLength:   math.MaxInt,
 	}
-	for i, def := range rw.config.Emoticons {
-		rw.emos[def.Name] = &(rw.config.Emoticons[i])
+	for _, def := range rw.config.Emoticons {
 		for _, p := range def.Patterns {
 			f := false
-			for k := range rw.prefixChars {
-				if p[0] == rw.prefixChars[k] {
+			for i := range rw.prefixChars {
+				if p[0] == rw.prefixChars[i] {
 					f = true
 					break
 				}
@@ -74,7 +74,13 @@ func init() {
 			}
 		}
 	}
-	EmoticonRewriter = &rw
+	rewriterRegistry[rw.Name()] = &rw
+
+	rw2 := emoticonTagRewriter{
+		config: &cfg,
+		re:     regexp.MustCompile(`^ei:\s*(\w+)(\s*/)?\s*$`),
+	}
+	rewriterRegistry[rw2.Name()] = &rw2
 }

 // Name returns the rewriter's name.
@@ -113,7 +119,7 @@ func (rw *emoticonRewriter) Rewrite(data string, svc rewriterServices) *markupDa
 			for k, v := range rw.patterns {
 				if strings.HasPrefix(work, k) {
 					looking = false
-					output.WriteString(rw.emos[v].Replace)
+					output.WriteString(rw.config.emos[v].Replace)
 					work = work[len(k):]
 					didReplace = true
 					break
@@ -136,5 +142,45 @@ func (rw *emoticonRewriter) Rewrite(data string, svc rewriterServices) *markupDa
 		return nil
 	}
 	output.WriteString(work)
-	return &markupData{beginMarkup: "", text: output.String(), endMarkup: "", rescan: true}
+	return &markupData{
+		beginMarkup: "",
+		text:        output.String(),
+		endMarkup:   "",
+		rescan:      true,
+	}
+}
+
+// emoticonTagRewriter rewrites emoticon tags.
+type emoticonTagRewriter struct {
+	config *EmoticonConfig
+	re     *regexp.Regexp
+}
+
+// Name returns the rewriter's name.
+func (rw *emoticonTagRewriter) Name() string {
+	return "emoticon_tag"
+}
+
+/* Rewrite rewrites the given string data and adds markup before and after if needed.
+ * Parameters:
+ *     data - The data to be rewritten.
+ *     svc - Services interface we can use.
+ * Returns:
+ *     Pointer to markup data, or nil.
+ */
+func (rw *emoticonTagRewriter) Rewrite(data string, svc rewriterServices) *markupData {
+	m := rw.re.FindStringSubmatch(data)
+	if m == nil {
+		return nil
+	}
+	d, ok := rw.config.emos[m[1]]
+	if !ok {
+		return nil
+	}
+	return &markupData{
+		beginMarkup: "",
+		text:        d.Replace,
+		endMarkup:   "",
+		rescan:      false,
+	}
 }
@@ -0,0 +1,69 @@
+/*
+ * Amsterdam Web Communities System
+ * Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/.
+ */
+// The htmlcheck package contains the HTML Checker.
+package htmlcheck
+
+import "strings"
+
+// outputFilter is the interface for an HTML checker output filter.
+type outputFilter interface {
+	tryOutputCharacter(strings.Builder, byte) bool
+	matchCharacter(byte) bool
+	lengthNoMatch(string) int
+}
+
+// outputFilterRegistry contains a listing of all defined output filters.
+var outputFilterRegistry = make(map[string]outputFilter)
+
+// init registers all known filters.
+func init() {
+	f := htmlEncodingFilter{}
+	outputFilterRegistry["html"] = &f
+}
+
+// htmlEncodingFilter is a filter that escapes certain characters in HTML.
+type htmlEncodingFilter struct{}
+
+// htmlEscapedChars is a list of HTML characters that are escaped.
+const htmlEscapedChars = "<>&"
+
+// tryOutputCharacter outputs a character that needs to be escaped.
+func (f *htmlEncodingFilter) tryOutputCharacter(buf strings.Builder, ch byte) bool {
+	switch ch {
+	case '<':
+		buf.WriteString("&lt;")
+	case '>':
+		buf.WriteString("&gt;")
+	case '&':
+		buf.WriteString("&amp;")
+	default:
+		return false
+	}
+	return true
+}
+
+// matchCharacter returns true if this character needs to be escaped.
+func (f *htmlEncodingFilter) matchCharacter(ch byte) bool {
+	return strings.IndexByte(htmlEscapedChars, ch) >= 0
+}
+
+// lengthNoMatch returns the maximum length of unmatched characters at the start of the string.
+func (f *htmlEncodingFilter) lengthNoMatch(s string) int {
+	rc := len(s)
+	for _, c := range []byte(htmlEscapedChars) {
+		tmp := strings.IndexByte(s, c)
+		if tmp >= 0 && tmp < rc {
+			rc = tmp
+			if rc == 0 {
+				return 0
+			}
+		}
+	}
+	return rc
+}
@@ -10,9 +10,12 @@
 package htmlcheck

 import (
+	"fmt"
 	"net/mail"
 	"net/url"
 	"strings"
+
+	"git.erbosoft.com/amy/amsterdam/database"
 )

 // markupData holds the return from rewriters.
@@ -37,12 +40,24 @@ type rewriter interface {
 	Rewrite(string, rewriterServices) *markupData
 }

+// rewriterRegistry contains a list of all rewriters.
+var rewriterRegistry = make(map[string]rewriter)
+
+// init registers our rewriters with the registry.
+func init() {
+	r1 := emailRewriter{}
+	rewriterRegistry[r1.Name()] = &r1
+	r2 := urlRewriter{}
+	rewriterRegistry[r2.Name()] = &r2
+	r3 := postLinkRewriter{}
+	rewriterRegistry[r3.Name()] = &r3
+	r4 := userLinkRewriter{}
+	rewriterRegistry[r4.Name()] = &r4
+}
+
 // emailRewriter is an implementation of Rewriter that recognizes E-mail addresses.
 type emailRewriter struct{}

-// EmailRewriter is a singleton implementration of rewriter for E-mail addresses.
-var EmailRewriter = emailRewriter{}
-
 // Name returns the rewriter's name.
 func (rw *emailRewriter) Name() string {
 	return "email"
@@ -81,9 +96,6 @@ func (rw *emailRewriter) Rewrite(data string, svc rewriterServices) *markupData
 // urlRewriter is an implementation of Rewriter that recognizes URLs.
 type urlRewriter struct{}

-// URLRewriter is a singleton implementration of rewriter for URLs.
-var URLRewriter = urlRewriter{}
-
 // Name returns the rewriter's name.
 func (rw *urlRewriter) Name() string {
 	return "url"
@@ -137,3 +149,148 @@ func (rw *urlRewriter) Rewrite(data string, svc rewriterServices) *markupData {
 		rescan:      false,
 	}
 }
+
+// postLinkRewriter is the rewriter that handles links to conference posts.
+type postLinkRewriter struct{}
+
+// postLinkURLPrefix is the default URL prefix for post links.
+const postLinkURLPrefix = "x-postlink:"
+
+// Name returns the rewriter's name.
+func (rw *postLinkRewriter) Name() string {
+	return "postlink"
+}
+
+// buildPostLink constructs a full post link from decoded data and context.
+func buildPostLink(decoded, context *database.PostLinkData) string {
+	var b strings.Builder
+	started := false
+	if decoded.Community == "" {
+		b.WriteString(context.Community)
+	} else {
+		b.WriteString(decoded.Community)
+		started = true
+	}
+	b.WriteString("!")
+	if decoded.Conference == "" {
+		if started {
+			return b.String()
+		}
+		b.WriteString(context.Conference)
+	} else {
+		b.WriteString(decoded.Conference)
+	}
+	b.WriteString(".")
+	if decoded.Topic == -1 {
+		if started {
+			return b.String()
+		}
+		b.WriteString(fmt.Sprintf("%d", context.Topic))
+	} else {
+		b.WriteString(fmt.Sprintf("%d", decoded.Topic))
+	}
+	b.WriteString(".")
+	if decoded.FirstPost != -1 {
+		b.WriteString(fmt.Sprintf("%d", decoded.FirstPost))
+		if decoded.FirstPost != decoded.LastPost {
+			b.WriteString("-")
+			if decoded.LastPost != -1 {
+				b.WriteString(fmt.Sprintf("%d", decoded.LastPost))
+			}
+		}
+	}
+	return b.String()
+}
+
+/* Rewrite rewrites the given string data and adds markup before and after if needed.
+ * Parameters:
+ *     data - The data to be rewritten.
+ *     svc - Services interface we can use.
+ * Returns:
+ *     Pointer to markup data, or nil.
+ */
+func (rw *postLinkRewriter) Rewrite(data string, svc rewriterServices) *markupData {
+	q := svc.rewriterContextValue("PostLinkDecoderContext")
+	if q == nil {
+		return nil
+	}
+	ctxt := q.(*database.PostLinkData)
+
+	mydata, err := database.AmDecodePostLink(data)
+	if err != nil {
+		return nil
+	}
+	err = mydata.VerifyNames()
+	if err != nil {
+		return nil
+	}
+	// build post link, add it as an internal reference
+	link := buildPostLink(mydata, ctxt)
+	svc.addInternalRef(link)
+	// build the necessary markup and return it
+	var openA strings.Builder
+	openA.WriteString("<a href=\"")
+	openA.WriteString(postLinkURLPrefix)
+	openA.WriteString(link)
+	openA.WriteString("\"")
+	catenate := svc.rewriterAttrValue("ANCHORTAIL")
+	if catenate != "" {
+		openA.WriteString(" ")
+		openA.WriteString(catenate)
+	}
+	openA.WriteString(">")
+	return &markupData{
+		beginMarkup: openA.String(),
+		text:        data,
+		endMarkup:   "</a>",
+		rescan:      false,
+	}
+}
+
+// userLinkRewriter is the rewriter that handles links to user names.
+type userLinkRewriter struct{}
+
+// userLinkURIPrefix is the default URL prefix for user links.
+const userLinkURIPRefix = "x-userlink:"
+
+// Name returns the rewriter's name.
+func (rw *userLinkRewriter) Name() string {
+	return "userlink"
+}
+
+/* Rewrite rewrites the given string data and adds markup before and after if needed.
+ * Parameters:
+ *     data - The data to be rewritten.
+ *     svc - Services interface we can use.
+ * Returns:
+ *     Pointer to markup data, or nil.
+ */
+func (rw *userLinkRewriter) Rewrite(data string, svc rewriterServices) *markupData {
+	if data == "" || len(data) > 64 || !database.AmIsValidAmsterdamID(data) {
+		return nil
+	}
+
+	user, err := database.AmGetUserByName(data)
+	if err != nil || user == nil {
+		return nil
+	}
+
+	// build the necessary markup and return it
+	var openA strings.Builder
+	openA.WriteString("<a href=\"")
+	openA.WriteString(userLinkURIPRefix)
+	openA.WriteString(data)
+	openA.WriteString("\"")
+	catenate := svc.rewriterAttrValue("ANCHORTAIL")
+	if catenate != "" {
+		openA.WriteString(" ")
+		openA.WriteString(catenate)
+	}
+	openA.WriteString(">")
+	return &markupData{
+		beginMarkup: openA.String(),
+		text:        data,
+		endMarkup:   "</a>",
+		rescan:      false,
+	}
+}