From 122d581e7f0264b13c6894bbb352b59916bd9f99 Mon Sep 17 00:00:00 2001 From: Amy Gale Ruth Bowersox Date: Tue, 28 Oct 2025 23:33:52 -0600 Subject: [PATCH] beginnings of HTML checker - added struct and interface defs, emoticon rewriter --- htmlcheck/checker.go | 26 ++++++ htmlcheck/emoticon_rewriter.go | 140 +++++++++++++++++++++++++++++++++ htmlcheck/emoticons.yaml | 56 +++++++++++++ htmlcheck/rewriter.go | 31 ++++++++ 4 files changed, 253 insertions(+) create mode 100644 htmlcheck/checker.go create mode 100644 htmlcheck/emoticon_rewriter.go create mode 100644 htmlcheck/emoticons.yaml create mode 100644 htmlcheck/rewriter.go diff --git a/htmlcheck/checker.go b/htmlcheck/checker.go new file mode 100644 index 0000000..dc497da --- /dev/null +++ b/htmlcheck/checker.go @@ -0,0 +1,26 @@ +/* + * Amsterdam Web Communities System + * Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + */ +// The htmlcheck package contains the HTML Checker. +package htmlcheck + +// HTMLChecker is a component that checks HTML and reformats it as needed. +type HTMLChecker interface { + Append(string) error + Finish() error + Reset() + Value() (string, error) + Length() (int, error) + Lines() (int, error) + Counter(string) (int, error) + Context() map[string]any + ExternalRefs() ([]any, error) + InternalRefs() ([]any, error) +} + +// var NotYetFinished = errors.New("the HTML checker has not yet been finished") diff --git a/htmlcheck/emoticon_rewriter.go b/htmlcheck/emoticon_rewriter.go new file mode 100644 index 0000000..cbd2ff2 --- /dev/null +++ b/htmlcheck/emoticon_rewriter.go @@ -0,0 +1,140 @@ +/* + * Amsterdam Web Communities System + * Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + */ +// The htmlcheck package contains the HTML Checker. +package htmlcheck + +import ( + _ "embed" + "math" + "strings" + + "gopkg.in/yaml.v3" +) + +// EmoticonDef is a single emoticon definition. +type EmoticonDef struct { + Name string `yaml:"name"` + Patterns []string `yaml:"patterns"` + Replace string `yaml:"replace"` +} + +// EmoticonConfig is the YAML configuration of the emoticons. +type EmoticonConfig struct { + PrefixChars string `yaml:"prefixChars"` + Emoticons []EmoticonDef `yaml:"emoticons"` +} + +// emoticonRewriter is the implementation of rewriter in this file +type emoticonRewriter struct { + config *EmoticonConfig + prefixChars []byte + emos map[string]*EmoticonDef + patterns map[string]string + minLength int +} + +//go:embed emoticons.yaml +var rawEmoConfig []byte + +// EmoticonRewriter is the singleton instance of the emoticon rewriter. +var EmoticonRewriter rewriter + +// init loads the configuration and creates the singleton instance. +func init() { + var cfg EmoticonConfig + if err := yaml.Unmarshal(rawEmoConfig, &cfg); err != nil { + panic(err) + } + rw := emoticonRewriter{ + config: &cfg, + prefixChars: []byte(cfg.PrefixChars), + emos: make(map[string]*EmoticonDef), + patterns: make(map[string]string), + minLength: math.MaxInt, + } + for i, def := range rw.config.Emoticons { + rw.emos[def.Name] = &(rw.config.Emoticons[i]) + for _, p := range def.Patterns { + f := false + for k := range rw.prefixChars { + if p[0] == rw.prefixChars[k] { + f = true + break + } + } + if f { + rw.patterns[p] = def.Name + rw.minLength = min(rw.minLength, len(p)) + } + } + } + EmoticonRewriter = &rw +} + +// Name returns the rewriter's name. +func (rw *emoticonRewriter) Name() string { + return "emoticon" +} + +/* Rewrite rewrites the given string data and adds markup before and after if needed. + * Parameters: + * data - The data to be rewritten. + * svc - Services interface we can use. + * Returns: + * Pointer to markup data, or nil. + */ +func (rw *emoticonRewriter) Rewrite(data string, svc rewriterServices) *markupData { + pos := math.MaxInt + for _, c := range rw.prefixChars { + foo := strings.IndexByte(data, c) + if foo >= 0 { + pos = min(pos, foo) + } + } + if pos == math.MaxInt { + return nil + } + didReplace := false + var output strings.Builder + work := data + for pos != math.MaxInt { + if pos > 0 { + output.WriteString(work[:pos]) + work = work[pos:] + } + looking := true + if len(work) >= rw.minLength { + for k, v := range rw.patterns { + if strings.HasPrefix(work, k) { + looking = false + output.WriteString(rw.emos[v].Replace) + work = work[len(k):] + didReplace = true + break + } + } + } + if looking { + output.WriteString(work[:1]) + work = work[1:] + } + pos = math.MaxInt + for _, c := range rw.prefixChars { + foo := strings.IndexByte(work, c) + if foo >= 0 { + pos = min(pos, foo) + } + } + } + if !didReplace { + return nil + } + output.WriteString(work) + return &markupData{beginMarkup: "", text: output.String(), endMarkup: "", rescan: true} +} diff --git a/htmlcheck/emoticons.yaml b/htmlcheck/emoticons.yaml new file mode 100644 index 0000000..eec688d --- /dev/null +++ b/htmlcheck/emoticons.yaml @@ -0,0 +1,56 @@ +# +# Amsterdam Web Communities System +# Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# +prefixChars: ":;" +emoticons: + - name: smile + patterns: + - ":)" + replace: "😊" + - name: frown + patterns: + - ":(" + replace: "🙁" + - name: redface + patterns: + - ":o" + - ":O" + replace: "😳" + - name: biggrin + patterns: + ":D" + replace: "😁" + - name: wink + patterns: + - ";)" + replace: "😉" + - name: tongue + patterns: + - ":p" + - ":P" + replace: "😛" + - name: cool + patterns: + - ":cool:" + replace: "😎" + - name: rolleyes + patterns: + - ":rolleyes:" + replace: "🙄" + - name: mad + patterns: + - ":mad:" + replace: "😡" + - name: eek + patterns: + ":eek:" + replace: "😱" + - name: confused + patterns: + ":confused:" + replace: "😕" diff --git a/htmlcheck/rewriter.go b/htmlcheck/rewriter.go new file mode 100644 index 0000000..92e4956 --- /dev/null +++ b/htmlcheck/rewriter.go @@ -0,0 +1,31 @@ +/* + * Amsterdam Web Communities System + * Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + */ +// The htmlcheck package contains the HTML Checker. +package htmlcheck + +import "net/url" + +type markupData struct { + beginMarkup string + text string + endMarkup string + rescan bool +} + +type rewriterServices interface { + rewriterAttrValue(string) string + rewriterContextValue(string) any + addExternalRef(url.URL) + addInternalRef(string) +} + +type rewriter interface { + Name() string + Rewrite(string, rewriterServices) *markupData +}