landed the dictionary objects and spelling rewriter, which also allowed me to complete the configuration YAML file

This commit is contained in:
2025-10-30 22:33:39 -06:00
parent 05a43bcd47
commit 596d7de7de
10 changed files with 323 additions and 2 deletions
+5 -1
View File
@@ -16,7 +16,7 @@ import (
"os"
argparse "github.com/alexflint/go-arg"
"github.com/labstack/gommon/log"
log "github.com/sirupsen/logrus"
"gopkg.in/yaml.v3"
)
@@ -86,6 +86,9 @@ type AmConfig struct {
Prioritize string `yaml:"prioritize"`
} `yaml:"countryList"`
} `yaml:"rendering"`
Posting struct {
ExternalDictionary string `yaml:"externalDictionary"`
} `yaml:"posting"`
}
//go:embed default.yaml
@@ -164,6 +167,7 @@ func overlayConfig(dest *AmConfig, loaded *AmConfig, defaults *AmConfig) {
dest.Rendering.TemplateDir = overlayString(loaded.Rendering.TemplateDir, defaults.Rendering.TemplateDir)
dest.Rendering.CookieKey = overlayString(loaded.Rendering.CookieKey, defaults.Rendering.CookieKey)
dest.Rendering.CountryList.Prioritize = overlayString(loaded.Rendering.CountryList.Prioritize, defaults.Rendering.CountryList.Prioritize)
dest.Posting.ExternalDictionary = overlayString(loaded.Posting.ExternalDictionary, defaults.Posting.ExternalDictionary)
}
// SetupConfig loads the command line arguments, loads the config file, and prepares GlobalConfig.
+2
View File
@@ -43,3 +43,5 @@ rendering:
cookiekey: ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
countryList:
prioritize: US
posting:
externalDictionary: ""
+1 -1
View File
@@ -13,7 +13,7 @@ import (
"fmt"
"time"
"github.com/labstack/gommon/log"
log "github.com/sirupsen/logrus"
)
// AuditRecord holds an audit record instance.
+1
View File
@@ -7,6 +7,7 @@ require (
github.com/alexflint/go-arg v1.6.0
github.com/biter777/countries v1.7.5
github.com/bits-and-blooms/bitset v1.24.0
github.com/derekparker/trie v0.0.0-20230829180723-39f4de51ef7d
github.com/disintegration/imaging v1.6.2
github.com/go-sql-driver/mysql v1.9.3
github.com/gorilla/sessions v1.4.0
+2
View File
@@ -15,6 +15,8 @@ github.com/bits-and-blooms/bitset v1.24.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/derekparker/trie v0.0.0-20230829180723-39f4de51ef7d h1:hUWoLdw5kvo2xCsqlsIBMvWUc1QCSsCYD2J2+Fg6YoU=
github.com/derekparker/trie v0.0.0-20230829180723-39f4de51ef7d/go.mod h1:C7Es+DLenIpPc9J6IYw4jrK0h7S9bKj4DNl8+KxGEXU=
github.com/disintegration/imaging v1.6.2 h1:w1LecBlG2Lnp8B3jk5zSuNqd7b4DXhcjwek1ei82L+c=
github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4=
github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
+51
View File
@@ -46,3 +46,54 @@ configs:
- emoticon
- email
- url
wordRewriters:
- spelling
tagRewriters:
- emoticon_tag
- postlink
- userlink
- email
- url
parenRewriters:
- userlink
tagSet: normal
- name: "escaper"
wordWrap: 0
angles: false
parens: false
discardHTML: false
outputFilters:
- html
- name: "mail-post"
wordWrap: 55
angles: true
parens: false
dicardHTML: true
discardRejected: true
tagSet: normal
- name: "post-from-email"
wordWrap: 55
rewrap: true
angles: true
parens: true
discardHTML: false
dicardRejected: true
discardComments: true
discardXML: true
outputFilters:
- html
stringRewriters:
- emoticon
- email
- url
tagRewriters:
- emoticon_tag
- postlink
- userlink
- email
- url
parenRewriters:
- userlink
tagSet: normal
disallowTags:
- font
+90
View File
@@ -0,0 +1,90 @@
/*
* Amsterdam Web Communities System
* Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
// The htmlcheck package contains the HTML Checker.
package htmlcheck
import "strings"
// CompositeDictionary is a dictionary that wraps several base dictionaries, and adds some extra behavior.
type CompositeDictionary struct {
dicts []SpellingDictionary
}
// Ready returns true if the dictionary has been fully loaded.
func (d *CompositeDictionary) Ready() bool {
for _, sd := range d.dicts {
if !sd.Ready() {
return false
}
}
return true
}
// Size returns the number of words in the dictionary.
func (d *CompositeDictionary) Size() int {
rc := 0
for _, sd := range d.dicts {
rc += sd.Size()
}
return rc
}
// checkSimple passes a word to the subdictionaries to check it.
func (d *CompositeDictionary) checkSimple(word string) bool {
for _, sd := range d.dicts {
if sd.CheckWord(word) {
return true
}
}
return false
}
// checkHyphenates breaks a hyphenatewd work up into parts and checks each one.
func (d *CompositeDictionary) checkHyphenates(word string) bool {
parts := strings.Split(word, "-")
if len(parts) == 1 {
return false // no hyphens
}
for _, frag := range parts {
// each fragment greater than 1 character must be in dictionary
if len(frag) > 1 {
if !d.checkSimple(frag) {
return false
}
}
}
return true
}
// CheckWord returns true if a word appears in the dictionary.
func (d *CompositeDictionary) CheckWord(word string) bool {
if len(word) <= 1 {
return true // words of length 1 get a free pass
}
realWord := strings.ToLower(word)
if d.checkSimple(realWord) {
return true
}
if strings.HasSuffix(realWord, "'s") {
l := len(realWord)
base := realWord[:l-2]
if d.checkSimple(base) {
return true
}
return d.checkHyphenates(base)
}
return d.checkHyphenates(realWord)
}
// NewCompositeDict wraps an array of SpellingDictionary objects up in a composite.
func NewCompositeDict(dicts []SpellingDictionary) *CompositeDictionary {
return &CompositeDictionary{
dicts: dicts,
}
}
+98
View File
@@ -0,0 +1,98 @@
/*
* Amsterdam Web Communities System
* Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
// The htmlcheck package contains the HTML Checker.
package htmlcheck
import (
"bufio"
"strings"
"sync"
"sync/atomic"
"github.com/derekparker/trie"
log "github.com/sirupsen/logrus"
)
// TrieDictionary is a ModSpellingDictionary implemented using a trie.
type TrieDictionary struct {
mutex sync.Mutex
loaded atomic.Bool
trie *trie.Trie
count int
}
// Ready lets us know if the dictionary is fully loaded.
func (d *TrieDictionary) Ready() bool {
return d.loaded.Load()
}
// Size returns the number of words in the dictionary.
func (d *TrieDictionary) Size() int {
d.mutex.Lock()
defer d.mutex.Unlock()
return d.count
}
// CheckWord returns true if a word is in the dictionary, false if not.
func (d *TrieDictionary) CheckWord(word string) bool {
d.mutex.Lock()
defer d.mutex.Unlock()
_, rc := d.trie.Find(strings.ToLower(word))
return rc
}
// AddWord adds a new word to the dictionary.
func (d *TrieDictionary) AddWord(word string) {
d.mutex.Lock()
defer d.mutex.Unlock()
d.trie.Add(strings.ToLower(word), true)
d.count++
}
// DelWord deletes a word from the dictionary.
func (d *TrieDictionary) DelWord(word string) {
// not implemented for this type
}
// Clear removes all words from the dictionary.
func (d *TrieDictionary) Clear() {
d.mutex.Lock()
defer d.mutex.Unlock()
d.trie = trie.New()
d.count = 0
}
// loadDict is a goroutine that loads the dictionary in the background.
func loadDict(d *TrieDictionary, words []byte) {
d.mutex.Lock()
defer d.mutex.Unlock()
scanner := bufio.NewScanner(strings.NewReader(string(words)))
for scanner.Scan() {
word := strings.TrimSpace(scanner.Text())
if word != "" {
d.trie.Add(strings.ToLower(word), true)
}
}
if err := scanner.Err(); err != nil {
log.Fatalf("failed to load dictionary: %v", err)
}
d.loaded.Store(true)
}
// LoadTrieDict creates a TrieDictionary from a byte array that represents a word list (one word per line).
func LoadTrieDict(words []byte) *TrieDictionary {
rc := TrieDictionary{
loaded: atomic.Bool{},
trie: trie.New(),
count: 0,
}
rc.loaded.Store(false)
go loadDict(&rc, words)
return &rc
}
+71
View File
@@ -9,15 +9,86 @@
// The htmlcheck package contains the HTML Checker.
package htmlcheck
import (
_ "embed"
"os"
"git.erbosoft.com/amy/amsterdam/config"
log "github.com/sirupsen/logrus"
)
// SpellingDictionary is a simple dictionary interface.
type SpellingDictionary interface {
Ready() bool
Size() int
CheckWord(string) bool
}
// ModSpellingDictionary is an intrerface to a modifiable spelling dictionary.
type ModSpellingDictionary interface {
SpellingDictionary
AddWord(string)
DelWord(string)
Clear()
}
//go:embed en-us.dict
var mainDict []byte
//go:embed supplement.dict
var supplementaryDict []byte
// SetupDicts sets up the dictionaries and the spelling rewriter.
func SetupDicts() {
dicts := make([]SpellingDictionary, 2, 3)
dicts[0] = LoadTrieDict(mainDict)
dicts[1] = LoadTrieDict(supplementaryDict)
if config.GlobalConfig.Posting.ExternalDictionary != "" {
data, err := os.ReadFile(config.GlobalConfig.Posting.ExternalDictionary)
if err == nil {
ndict := LoadTrieDict(data)
dicts = append(dicts, ndict)
} else {
log.Errorf("failed to load external dictionary %s: %v", config.GlobalConfig.Posting.ExternalDictionary, err)
}
}
rw := spellingRewriter{
dict: NewCompositeDict(dicts),
}
rewriterRegistry[rw.Name()] = &rw
}
// spellingRewriter is a rewriter that flags spelling errors.
type spellingRewriter struct {
dict SpellingDictionary
}
// defaultBeginError is the markup that indicates the start of an error.
const defaultBeginError = "<span class=\"text-red-600 font-bold\">"
// defaultEndError is the markup that indicates the end of an error.
const defaultEndError = "</span>"
// Name returns the rewriter's name.
func (rw *spellingRewriter) Name() string {
return "spelling"
}
/* Rewrite rewrites the given string data and adds markup before and after if needed.
* Parameters:
* data - The data to be rewritten.
* svc - Services interface we can use.
* Returns:
* Pointer to markup data, or nil.
*/
func (rw *spellingRewriter) Rewrite(data string, svc rewriterServices) *markupData {
if rw.dict.CheckWord(data) {
return nil
}
return &markupData{
beginMarkup: defaultBeginError,
text: data,
endMarkup: defaultEndError,
rescan: false,
}
}
+2
View File
@@ -21,6 +21,7 @@ import (
"git.erbosoft.com/amy/amsterdam/config"
"git.erbosoft.com/amy/amsterdam/database"
"git.erbosoft.com/amy/amsterdam/email"
"git.erbosoft.com/amy/amsterdam/htmlcheck"
"git.erbosoft.com/amy/amsterdam/ui"
"github.com/labstack/echo-contrib/session"
"github.com/labstack/echo/v4"
@@ -101,6 +102,7 @@ func main() {
defer closer()
closer = email.SetupMailSender()
defer closer()
htmlcheck.SetupDicts()
ui.SetupTemplates()
closer = ui.SetupSessionManager()
defer closer()