landed enough rewriters and filters to begin building configurations for the HTML checker

This commit is contained in:
2025-10-29 22:50:25 -06:00
parent e4d7deaf5f
commit eb47b001bb
5 changed files with 614 additions and 19 deletions
+275
View File
@@ -0,0 +1,275 @@
/*
* Amsterdam Web Communities System
* Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
// The database package contains database management and storage logic.
package database
import (
"errors"
"math"
"strconv"
"strings"
)
// PostLinkData is the structure holding the decoded parts of the post link.
type PostLinkData struct {
Community string
Conference string
Topic int16
FirstPost int32
LastPost int32
}
// NeedsDBVerification returns true if the post link data needs tro be varified against the database.
func (d *PostLinkData) NeedsDBVerification() bool {
return d.Community != "" || d.Conference != ""
}
// VerifyNames verifies the post link data against the database.
func (d *PostLinkData) VerifyNames() error {
if d.Community != "" {
comm, err := AmGetCommunityByAlias(d.Community)
if err != nil {
return err
}
if comm == nil {
return errors.New("community alias not found")
}
}
if d.Conference != "" {
conf, err := AmGetConferenceByAlias(d.Conference)
if err != nil {
return err
}
if conf == nil {
return errors.New("conference alias not found")
}
}
return nil
}
// Maximum lengths of the components.
const (
maxLinkLength = 130
maxCommunityLength = 32
maxConferenceLength = 64
)
// validateCommunity validates the community name and saves it.
func validateCommunity(name string, rc *PostLinkData) error {
if len(name) > maxCommunityLength {
return errors.New("community alias is too long")
}
if !AmIsValidAmsterdamID(name) {
return errors.New("community alias is not a valid identifier")
}
rc.Community = name
return nil
}
// validateConference validates the conference name and saves it.
func validateConference(name string, rc *PostLinkData) error {
if len(name) > maxConferenceLength {
return errors.New("conference alias is too long")
}
if !AmIsValidAmsterdamID(name) {
return errors.New("conference alias is not a valid identifier")
}
rc.Conference = name
return nil
}
// decodeTopicNumber decodes the topic number and saves it.
func decodeTopicNumber(data string, rc *PostLinkData) error {
v, err := strconv.Atoi(data)
if err != nil {
return errors.New("invalid topic number reference")
}
if v > math.MaxInt16 {
return errors.New("topic number out of range")
}
rc.Topic = int16(v)
return nil
}
// decodePostRange decodes the post ranges (first and last post) and saves them.
func decodePostRange(data string, rc *PostLinkData) error {
pos := strings.IndexByte(data, '-')
var tempVal int32 = -1
if pos > 0 {
temp := data[:pos]
data = data[pos+1:]
v, err := strconv.Atoi(temp)
if err != nil {
return errors.New("invalid post number reference")
}
tempVal = int32(v)
if len(data) == 0 {
// range is open-ended (number-)
rc.FirstPost = tempVal
rc.LastPost = -1
return nil
}
} else if pos == 0 {
return errors.New("cannot have - at beginning of post range")
}
v2, err := strconv.Atoi(data)
if err != nil {
return errors.New("invalid post number reference")
}
rc.FirstPost = int32(v2)
if tempVal >= 0 {
if tempVal < rc.FirstPost {
// "frontwards" range - reorder the components
rc.LastPost = rc.FirstPost
rc.FirstPost = tempVal
} else {
// "backwards" range
rc.LastPost = tempVal
}
} else {
// a "range" of a single post
rc.LastPost = rc.FirstPost
}
return nil
}
/* AmDecodePostLink decodes a post link and returns the complete breakdown of its components.
* Parameters:
* data - The post link to be decoded.
* Returns:
* Pointer to structure containing post link data, or nil.
* Standard Go error status.
*/
func AmDecodePostLink(data string) (*PostLinkData, error) {
if data == "" {
return nil, errors.New("empty string")
}
if len(data) > maxLinkLength {
return nil, errors.New("post link string too long")
}
rc := PostLinkData{
Community: "",
Conference: "",
Topic: -1,
FirstPost: -1,
LastPost: -1,
}
work := data
// First test: Bang
pos := strings.IndexByte(work, '!')
if pos > 0 {
err := validateCommunity(work[:pos], &rc)
if err != nil {
return nil, err
}
work = work[pos+1:]
if len(work) == 0 {
return &rc, nil // community link
}
} else if pos == 0 {
return nil, errors.New("cannot have ! at beginning")
}
// Second test: Dot #1
pos = strings.IndexByte(work, '.')
if pos < 0 {
// no dots in here, must be either "postlink" or "community!conference"
var err error
if rc.Community == "" {
err = decodePostRange(work, &rc)
} else {
err = validateConference(work, &rc)
}
if err != nil {
return nil, err
}
}
// Peel off the initial substring before the dot.
confOrTopic := work[:pos]
work = work[pos+1:]
if len(work) == 0 {
// we had "conference." or "topic." or maybe "community!conference."
if rc.Community == "" {
// it's either "conference." or "topic." - try the latter first
err := decodeTopicNumber(confOrTopic, &rc)
if err != nil {
// it's not a topic number, try it as a conference name
err = validateConference(confOrTopic, &rc)
}
if err != nil {
return nil, err
}
} else {
// it was "community!conference."
err := validateConference(confOrTopic, &rc)
if err != nil {
return nil, err
}
}
}
// Third test: Dot #2
pos = strings.IndexByte(work, '.')
if pos < 0 {
// we had "conference.topic" or "topic.posts" or maybe "community!conference.topic"
var err error
if rc.Community == "" {
// either "conference.topic" or "topic.posts"
isTopic := false
err = decodeTopicNumber(confOrTopic, &rc)
if err != nil {
// it's "conference.topic"
err = validateConference(confOrTopic, &rc)
isTopic = true
}
if err == nil {
if isTopic {
err = decodeTopicNumber(work, &rc)
} else {
err = decodePostRange(work, &rc)
}
}
} else {
// we have "community!conference.topic"
err = validateConference(confOrTopic, &rc)
if err == nil {
err = decodeTopicNumber(work, &rc)
}
}
if err != nil {
return nil, err
}
return &rc, nil
} else if pos == 0 {
return nil, errors.New("cannot have . at beginning of string")
}
// We definitely have "conference.topic.something" or "community!conference.topic.something"
err := validateConference(confOrTopic, &rc)
if err == nil {
err = decodeTopicNumber(work[:pos], &rc)
}
if err != nil {
return nil, err
}
work = work[pos+1:]
if len(work) == 0 {
// we had "conference.topic." or "communtiy!conference.topic.", those are both valid
return &rc, nil
}
err = decodePostRange(work, &rc) // the rest must be the post range
if err != nil {
return nil, err
}
return &rc, nil
}
+48
View File
@@ -0,0 +1,48 @@
#
# Amsterdam Web Communities System
# Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
#
configs:
- name: "post-body"
wordWrap: 55
angles: true
parens: true
discardHTML: false
outputFilters:
- html
stringRewriters:
- emoticon
- email
- url
tagRewriters:
- emoticon_tag
- postlink
- userlink
- email
- url
parenRewriters:
- userlink
tagSet: normal
- name: "post-pseud"
wordWrap: 0
angles: true
parens: false
discardHTML: false
outputFilters:
- html
tagSet: restricted
- name: "preview"
wordWrap: 55
angles: true
parens: true
discardHTML: false
outputFilters:
- html
stringRewriters:
- emoticon
- email
- url
+59 -13
View File
@@ -12,6 +12,7 @@ package htmlcheck
import (
_ "embed"
"math"
"regexp"
"strings"
"gopkg.in/yaml.v3"
@@ -28,13 +29,13 @@ type EmoticonDef struct {
type EmoticonConfig struct {
PrefixChars string `yaml:"prefixChars"`
Emoticons []EmoticonDef `yaml:"emoticons"`
emos map[string]*EmoticonDef
}
// emoticonRewriter is the implementation of rewriter in this file
type emoticonRewriter struct {
config *EmoticonConfig
prefixChars []byte
emos map[string]*EmoticonDef
patterns map[string]string
minLength int
}
@@ -42,28 +43,27 @@ type emoticonRewriter struct {
//go:embed emoticons.yaml
var rawEmoConfig []byte
// EmoticonRewriter is the singleton instance of the emoticon rewriter.
var EmoticonRewriter rewriter
// init loads the configuration and creates the singleton instance.
// init loads the configuration and registers the rewriters.
func init() {
var cfg EmoticonConfig
if err := yaml.Unmarshal(rawEmoConfig, &cfg); err != nil {
panic(err)
}
cfg.emos = make(map[string]*EmoticonDef)
for i, def := range cfg.Emoticons {
cfg.emos[def.Name] = &(cfg.Emoticons[i])
}
rw := emoticonRewriter{
config: &cfg,
prefixChars: []byte(cfg.PrefixChars),
emos: make(map[string]*EmoticonDef),
patterns: make(map[string]string),
minLength: math.MaxInt,
}
for i, def := range rw.config.Emoticons {
rw.emos[def.Name] = &(rw.config.Emoticons[i])
for _, def := range rw.config.Emoticons {
for _, p := range def.Patterns {
f := false
for k := range rw.prefixChars {
if p[0] == rw.prefixChars[k] {
for i := range rw.prefixChars {
if p[0] == rw.prefixChars[i] {
f = true
break
}
@@ -74,7 +74,13 @@ func init() {
}
}
}
EmoticonRewriter = &rw
rewriterRegistry[rw.Name()] = &rw
rw2 := emoticonTagRewriter{
config: &cfg,
re: regexp.MustCompile(`^ei:\s*(\w+)(\s*/)?\s*$`),
}
rewriterRegistry[rw2.Name()] = &rw2
}
// Name returns the rewriter's name.
@@ -113,7 +119,7 @@ func (rw *emoticonRewriter) Rewrite(data string, svc rewriterServices) *markupDa
for k, v := range rw.patterns {
if strings.HasPrefix(work, k) {
looking = false
output.WriteString(rw.emos[v].Replace)
output.WriteString(rw.config.emos[v].Replace)
work = work[len(k):]
didReplace = true
break
@@ -136,5 +142,45 @@ func (rw *emoticonRewriter) Rewrite(data string, svc rewriterServices) *markupDa
return nil
}
output.WriteString(work)
return &markupData{beginMarkup: "", text: output.String(), endMarkup: "", rescan: true}
return &markupData{
beginMarkup: "",
text: output.String(),
endMarkup: "",
rescan: true,
}
}
// emoticonTagRewriter rewrites emoticon tags.
type emoticonTagRewriter struct {
config *EmoticonConfig
re *regexp.Regexp
}
// Name returns the rewriter's name.
func (rw *emoticonTagRewriter) Name() string {
return "emoticon_tag"
}
/* Rewrite rewrites the given string data and adds markup before and after if needed.
* Parameters:
* data - The data to be rewritten.
* svc - Services interface we can use.
* Returns:
* Pointer to markup data, or nil.
*/
func (rw *emoticonTagRewriter) Rewrite(data string, svc rewriterServices) *markupData {
m := rw.re.FindStringSubmatch(data)
if m == nil {
return nil
}
d, ok := rw.config.emos[m[1]]
if !ok {
return nil
}
return &markupData{
beginMarkup: "",
text: d.Replace,
endMarkup: "",
rescan: false,
}
}
+69
View File
@@ -0,0 +1,69 @@
/*
* Amsterdam Web Communities System
* Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
// The htmlcheck package contains the HTML Checker.
package htmlcheck
import "strings"
// outputFilter is the interface for an HTML checker output filter.
type outputFilter interface {
tryOutputCharacter(strings.Builder, byte) bool
matchCharacter(byte) bool
lengthNoMatch(string) int
}
// outputFilterRegistry contains a listing of all defined output filters.
var outputFilterRegistry = make(map[string]outputFilter)
// init registers all known filters.
func init() {
f := htmlEncodingFilter{}
outputFilterRegistry["html"] = &f
}
// htmlEncodingFilter is a filter that escapes certain characters in HTML.
type htmlEncodingFilter struct{}
// htmlEscapedChars is a list of HTML characters that are escaped.
const htmlEscapedChars = "<>&"
// tryOutputCharacter outputs a character that needs to be escaped.
func (f *htmlEncodingFilter) tryOutputCharacter(buf strings.Builder, ch byte) bool {
switch ch {
case '<':
buf.WriteString("&lt;")
case '>':
buf.WriteString("&gt;")
case '&':
buf.WriteString("&amp;")
default:
return false
}
return true
}
// matchCharacter returns true if this character needs to be escaped.
func (f *htmlEncodingFilter) matchCharacter(ch byte) bool {
return strings.IndexByte(htmlEscapedChars, ch) >= 0
}
// lengthNoMatch returns the maximum length of unmatched characters at the start of the string.
func (f *htmlEncodingFilter) lengthNoMatch(s string) int {
rc := len(s)
for _, c := range []byte(htmlEscapedChars) {
tmp := strings.IndexByte(s, c)
if tmp >= 0 && tmp < rc {
rc = tmp
if rc == 0 {
return 0
}
}
}
return rc
}
+163 -6
View File
@@ -10,9 +10,12 @@
package htmlcheck
import (
"fmt"
"net/mail"
"net/url"
"strings"
"git.erbosoft.com/amy/amsterdam/database"
)
// markupData holds the return from rewriters.
@@ -37,12 +40,24 @@ type rewriter interface {
Rewrite(string, rewriterServices) *markupData
}
// rewriterRegistry contains a list of all rewriters.
var rewriterRegistry = make(map[string]rewriter)
// init registers our rewriters with the registry.
func init() {
r1 := emailRewriter{}
rewriterRegistry[r1.Name()] = &r1
r2 := urlRewriter{}
rewriterRegistry[r2.Name()] = &r2
r3 := postLinkRewriter{}
rewriterRegistry[r3.Name()] = &r3
r4 := userLinkRewriter{}
rewriterRegistry[r4.Name()] = &r4
}
// emailRewriter is an implementation of Rewriter that recognizes E-mail addresses.
type emailRewriter struct{}
// EmailRewriter is a singleton implementration of rewriter for E-mail addresses.
var EmailRewriter = emailRewriter{}
// Name returns the rewriter's name.
func (rw *emailRewriter) Name() string {
return "email"
@@ -81,9 +96,6 @@ func (rw *emailRewriter) Rewrite(data string, svc rewriterServices) *markupData
// urlRewriter is an implementation of Rewriter that recognizes URLs.
type urlRewriter struct{}
// URLRewriter is a singleton implementration of rewriter for URLs.
var URLRewriter = urlRewriter{}
// Name returns the rewriter's name.
func (rw *urlRewriter) Name() string {
return "url"
@@ -137,3 +149,148 @@ func (rw *urlRewriter) Rewrite(data string, svc rewriterServices) *markupData {
rescan: false,
}
}
// postLinkRewriter is the rewriter that handles links to conference posts.
type postLinkRewriter struct{}
// postLinkURLPrefix is the default URL prefix for post links.
const postLinkURLPrefix = "x-postlink:"
// Name returns the rewriter's name.
func (rw *postLinkRewriter) Name() string {
return "postlink"
}
// buildPostLink constructs a full post link from decoded data and context.
func buildPostLink(decoded, context *database.PostLinkData) string {
var b strings.Builder
started := false
if decoded.Community == "" {
b.WriteString(context.Community)
} else {
b.WriteString(decoded.Community)
started = true
}
b.WriteString("!")
if decoded.Conference == "" {
if started {
return b.String()
}
b.WriteString(context.Conference)
} else {
b.WriteString(decoded.Conference)
}
b.WriteString(".")
if decoded.Topic == -1 {
if started {
return b.String()
}
b.WriteString(fmt.Sprintf("%d", context.Topic))
} else {
b.WriteString(fmt.Sprintf("%d", decoded.Topic))
}
b.WriteString(".")
if decoded.FirstPost != -1 {
b.WriteString(fmt.Sprintf("%d", decoded.FirstPost))
if decoded.FirstPost != decoded.LastPost {
b.WriteString("-")
if decoded.LastPost != -1 {
b.WriteString(fmt.Sprintf("%d", decoded.LastPost))
}
}
}
return b.String()
}
/* Rewrite rewrites the given string data and adds markup before and after if needed.
* Parameters:
* data - The data to be rewritten.
* svc - Services interface we can use.
* Returns:
* Pointer to markup data, or nil.
*/
func (rw *postLinkRewriter) Rewrite(data string, svc rewriterServices) *markupData {
q := svc.rewriterContextValue("PostLinkDecoderContext")
if q == nil {
return nil
}
ctxt := q.(*database.PostLinkData)
mydata, err := database.AmDecodePostLink(data)
if err != nil {
return nil
}
err = mydata.VerifyNames()
if err != nil {
return nil
}
// build post link, add it as an internal reference
link := buildPostLink(mydata, ctxt)
svc.addInternalRef(link)
// build the necessary markup and return it
var openA strings.Builder
openA.WriteString("<a href=\"")
openA.WriteString(postLinkURLPrefix)
openA.WriteString(link)
openA.WriteString("\"")
catenate := svc.rewriterAttrValue("ANCHORTAIL")
if catenate != "" {
openA.WriteString(" ")
openA.WriteString(catenate)
}
openA.WriteString(">")
return &markupData{
beginMarkup: openA.String(),
text: data,
endMarkup: "</a>",
rescan: false,
}
}
// userLinkRewriter is the rewriter that handles links to user names.
type userLinkRewriter struct{}
// userLinkURIPrefix is the default URL prefix for user links.
const userLinkURIPRefix = "x-userlink:"
// Name returns the rewriter's name.
func (rw *userLinkRewriter) Name() string {
return "userlink"
}
/* Rewrite rewrites the given string data and adds markup before and after if needed.
* Parameters:
* data - The data to be rewritten.
* svc - Services interface we can use.
* Returns:
* Pointer to markup data, or nil.
*/
func (rw *userLinkRewriter) Rewrite(data string, svc rewriterServices) *markupData {
if data == "" || len(data) > 64 || !database.AmIsValidAmsterdamID(data) {
return nil
}
user, err := database.AmGetUserByName(data)
if err != nil || user == nil {
return nil
}
// build the necessary markup and return it
var openA strings.Builder
openA.WriteString("<a href=\"")
openA.WriteString(userLinkURIPRefix)
openA.WriteString(data)
openA.WriteString("\"")
catenate := svc.rewriterAttrValue("ANCHORTAIL")
if catenate != "" {
openA.WriteString(" ")
openA.WriteString(catenate)
}
openA.WriteString(">")
return &markupData{
beginMarkup: openA.String(),
text: data,
endMarkup: "</a>",
rescan: false,
}
}