landed enough rewriters and filters to begin building configurations for the HTML checker

This commit is contained in:
2025-10-29 22:50:25 -06:00
parent e4d7deaf5f
commit eb47b001bb
5 changed files with 614 additions and 19 deletions
+48
View File
@@ -0,0 +1,48 @@
#
# Amsterdam Web Communities System
# Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
#
configs:
- name: "post-body"
wordWrap: 55
angles: true
parens: true
discardHTML: false
outputFilters:
- html
stringRewriters:
- emoticon
- email
- url
tagRewriters:
- emoticon_tag
- postlink
- userlink
- email
- url
parenRewriters:
- userlink
tagSet: normal
- name: "post-pseud"
wordWrap: 0
angles: true
parens: false
discardHTML: false
outputFilters:
- html
tagSet: restricted
- name: "preview"
wordWrap: 55
angles: true
parens: true
discardHTML: false
outputFilters:
- html
stringRewriters:
- emoticon
- email
- url
+59 -13
View File
@@ -12,6 +12,7 @@ package htmlcheck
import (
_ "embed"
"math"
"regexp"
"strings"
"gopkg.in/yaml.v3"
@@ -28,13 +29,13 @@ type EmoticonDef struct {
type EmoticonConfig struct {
PrefixChars string `yaml:"prefixChars"`
Emoticons []EmoticonDef `yaml:"emoticons"`
emos map[string]*EmoticonDef
}
// emoticonRewriter is the implementation of rewriter in this file
type emoticonRewriter struct {
config *EmoticonConfig
prefixChars []byte
emos map[string]*EmoticonDef
patterns map[string]string
minLength int
}
@@ -42,28 +43,27 @@ type emoticonRewriter struct {
//go:embed emoticons.yaml
var rawEmoConfig []byte
// EmoticonRewriter is the singleton instance of the emoticon rewriter.
var EmoticonRewriter rewriter
// init loads the configuration and creates the singleton instance.
// init loads the configuration and registers the rewriters.
func init() {
var cfg EmoticonConfig
if err := yaml.Unmarshal(rawEmoConfig, &cfg); err != nil {
panic(err)
}
cfg.emos = make(map[string]*EmoticonDef)
for i, def := range cfg.Emoticons {
cfg.emos[def.Name] = &(cfg.Emoticons[i])
}
rw := emoticonRewriter{
config: &cfg,
prefixChars: []byte(cfg.PrefixChars),
emos: make(map[string]*EmoticonDef),
patterns: make(map[string]string),
minLength: math.MaxInt,
}
for i, def := range rw.config.Emoticons {
rw.emos[def.Name] = &(rw.config.Emoticons[i])
for _, def := range rw.config.Emoticons {
for _, p := range def.Patterns {
f := false
for k := range rw.prefixChars {
if p[0] == rw.prefixChars[k] {
for i := range rw.prefixChars {
if p[0] == rw.prefixChars[i] {
f = true
break
}
@@ -74,7 +74,13 @@ func init() {
}
}
}
EmoticonRewriter = &rw
rewriterRegistry[rw.Name()] = &rw
rw2 := emoticonTagRewriter{
config: &cfg,
re: regexp.MustCompile(`^ei:\s*(\w+)(\s*/)?\s*$`),
}
rewriterRegistry[rw2.Name()] = &rw2
}
// Name returns the rewriter's name.
@@ -113,7 +119,7 @@ func (rw *emoticonRewriter) Rewrite(data string, svc rewriterServices) *markupDa
for k, v := range rw.patterns {
if strings.HasPrefix(work, k) {
looking = false
output.WriteString(rw.emos[v].Replace)
output.WriteString(rw.config.emos[v].Replace)
work = work[len(k):]
didReplace = true
break
@@ -136,5 +142,45 @@ func (rw *emoticonRewriter) Rewrite(data string, svc rewriterServices) *markupDa
return nil
}
output.WriteString(work)
return &markupData{beginMarkup: "", text: output.String(), endMarkup: "", rescan: true}
return &markupData{
beginMarkup: "",
text: output.String(),
endMarkup: "",
rescan: true,
}
}
// emoticonTagRewriter rewrites emoticon tags.
type emoticonTagRewriter struct {
config *EmoticonConfig
re *regexp.Regexp
}
// Name returns the rewriter's name.
func (rw *emoticonTagRewriter) Name() string {
return "emoticon_tag"
}
/* Rewrite rewrites the given string data and adds markup before and after if needed.
* Parameters:
* data - The data to be rewritten.
* svc - Services interface we can use.
* Returns:
* Pointer to markup data, or nil.
*/
func (rw *emoticonTagRewriter) Rewrite(data string, svc rewriterServices) *markupData {
m := rw.re.FindStringSubmatch(data)
if m == nil {
return nil
}
d, ok := rw.config.emos[m[1]]
if !ok {
return nil
}
return &markupData{
beginMarkup: "",
text: d.Replace,
endMarkup: "",
rescan: false,
}
}
+69
View File
@@ -0,0 +1,69 @@
/*
* Amsterdam Web Communities System
* Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
// The htmlcheck package contains the HTML Checker.
package htmlcheck
import "strings"
// outputFilter is the interface for an HTML checker output filter.
type outputFilter interface {
tryOutputCharacter(strings.Builder, byte) bool
matchCharacter(byte) bool
lengthNoMatch(string) int
}
// outputFilterRegistry contains a listing of all defined output filters.
var outputFilterRegistry = make(map[string]outputFilter)
// init registers all known filters.
func init() {
f := htmlEncodingFilter{}
outputFilterRegistry["html"] = &f
}
// htmlEncodingFilter is a filter that escapes certain characters in HTML.
type htmlEncodingFilter struct{}
// htmlEscapedChars is a list of HTML characters that are escaped.
const htmlEscapedChars = "<>&"
// tryOutputCharacter outputs a character that needs to be escaped.
func (f *htmlEncodingFilter) tryOutputCharacter(buf strings.Builder, ch byte) bool {
switch ch {
case '<':
buf.WriteString("&lt;")
case '>':
buf.WriteString("&gt;")
case '&':
buf.WriteString("&amp;")
default:
return false
}
return true
}
// matchCharacter returns true if this character needs to be escaped.
func (f *htmlEncodingFilter) matchCharacter(ch byte) bool {
return strings.IndexByte(htmlEscapedChars, ch) >= 0
}
// lengthNoMatch returns the maximum length of unmatched characters at the start of the string.
func (f *htmlEncodingFilter) lengthNoMatch(s string) int {
rc := len(s)
for _, c := range []byte(htmlEscapedChars) {
tmp := strings.IndexByte(s, c)
if tmp >= 0 && tmp < rc {
rc = tmp
if rc == 0 {
return 0
}
}
}
return rc
}
+163 -6
View File
@@ -10,9 +10,12 @@
package htmlcheck
import (
"fmt"
"net/mail"
"net/url"
"strings"
"git.erbosoft.com/amy/amsterdam/database"
)
// markupData holds the return from rewriters.
@@ -37,12 +40,24 @@ type rewriter interface {
Rewrite(string, rewriterServices) *markupData
}
// rewriterRegistry contains a list of all rewriters.
var rewriterRegistry = make(map[string]rewriter)
// init registers our rewriters with the registry.
func init() {
r1 := emailRewriter{}
rewriterRegistry[r1.Name()] = &r1
r2 := urlRewriter{}
rewriterRegistry[r2.Name()] = &r2
r3 := postLinkRewriter{}
rewriterRegistry[r3.Name()] = &r3
r4 := userLinkRewriter{}
rewriterRegistry[r4.Name()] = &r4
}
// emailRewriter is an implementation of Rewriter that recognizes E-mail addresses.
type emailRewriter struct{}
// EmailRewriter is a singleton implementration of rewriter for E-mail addresses.
var EmailRewriter = emailRewriter{}
// Name returns the rewriter's name.
func (rw *emailRewriter) Name() string {
return "email"
@@ -81,9 +96,6 @@ func (rw *emailRewriter) Rewrite(data string, svc rewriterServices) *markupData
// urlRewriter is an implementation of Rewriter that recognizes URLs.
type urlRewriter struct{}
// URLRewriter is a singleton implementration of rewriter for URLs.
var URLRewriter = urlRewriter{}
// Name returns the rewriter's name.
func (rw *urlRewriter) Name() string {
return "url"
@@ -137,3 +149,148 @@ func (rw *urlRewriter) Rewrite(data string, svc rewriterServices) *markupData {
rescan: false,
}
}
// postLinkRewriter is the rewriter that handles links to conference posts.
type postLinkRewriter struct{}
// postLinkURLPrefix is the default URL prefix for post links.
const postLinkURLPrefix = "x-postlink:"
// Name returns the rewriter's name.
func (rw *postLinkRewriter) Name() string {
return "postlink"
}
// buildPostLink constructs a full post link from decoded data and context.
func buildPostLink(decoded, context *database.PostLinkData) string {
var b strings.Builder
started := false
if decoded.Community == "" {
b.WriteString(context.Community)
} else {
b.WriteString(decoded.Community)
started = true
}
b.WriteString("!")
if decoded.Conference == "" {
if started {
return b.String()
}
b.WriteString(context.Conference)
} else {
b.WriteString(decoded.Conference)
}
b.WriteString(".")
if decoded.Topic == -1 {
if started {
return b.String()
}
b.WriteString(fmt.Sprintf("%d", context.Topic))
} else {
b.WriteString(fmt.Sprintf("%d", decoded.Topic))
}
b.WriteString(".")
if decoded.FirstPost != -1 {
b.WriteString(fmt.Sprintf("%d", decoded.FirstPost))
if decoded.FirstPost != decoded.LastPost {
b.WriteString("-")
if decoded.LastPost != -1 {
b.WriteString(fmt.Sprintf("%d", decoded.LastPost))
}
}
}
return b.String()
}
/* Rewrite rewrites the given string data and adds markup before and after if needed.
* Parameters:
* data - The data to be rewritten.
* svc - Services interface we can use.
* Returns:
* Pointer to markup data, or nil.
*/
func (rw *postLinkRewriter) Rewrite(data string, svc rewriterServices) *markupData {
q := svc.rewriterContextValue("PostLinkDecoderContext")
if q == nil {
return nil
}
ctxt := q.(*database.PostLinkData)
mydata, err := database.AmDecodePostLink(data)
if err != nil {
return nil
}
err = mydata.VerifyNames()
if err != nil {
return nil
}
// build post link, add it as an internal reference
link := buildPostLink(mydata, ctxt)
svc.addInternalRef(link)
// build the necessary markup and return it
var openA strings.Builder
openA.WriteString("<a href=\"")
openA.WriteString(postLinkURLPrefix)
openA.WriteString(link)
openA.WriteString("\"")
catenate := svc.rewriterAttrValue("ANCHORTAIL")
if catenate != "" {
openA.WriteString(" ")
openA.WriteString(catenate)
}
openA.WriteString(">")
return &markupData{
beginMarkup: openA.String(),
text: data,
endMarkup: "</a>",
rescan: false,
}
}
// userLinkRewriter is the rewriter that handles links to user names.
type userLinkRewriter struct{}
// userLinkURIPrefix is the default URL prefix for user links.
const userLinkURIPRefix = "x-userlink:"
// Name returns the rewriter's name.
func (rw *userLinkRewriter) Name() string {
return "userlink"
}
/* Rewrite rewrites the given string data and adds markup before and after if needed.
* Parameters:
* data - The data to be rewritten.
* svc - Services interface we can use.
* Returns:
* Pointer to markup data, or nil.
*/
func (rw *userLinkRewriter) Rewrite(data string, svc rewriterServices) *markupData {
if data == "" || len(data) > 64 || !database.AmIsValidAmsterdamID(data) {
return nil
}
user, err := database.AmGetUserByName(data)
if err != nil || user == nil {
return nil
}
// build the necessary markup and return it
var openA strings.Builder
openA.WriteString("<a href=\"")
openA.WriteString(userLinkURIPRefix)
openA.WriteString(data)
openA.WriteString("\"")
catenate := svc.rewriterAttrValue("ANCHORTAIL")
if catenate != "" {
openA.WriteString(" ")
openA.WriteString(catenate)
}
openA.WriteString(">")
return &markupData{
beginMarkup: openA.String(),
text: data,
endMarkup: "</a>",
rescan: false,
}
}