landed the dictionary objects and spelling rewriter, which also allowed me to complete the configuration YAML file
This commit is contained in:
+5
-1
@@ -16,7 +16,7 @@ import (
|
||||
"os"
|
||||
|
||||
argparse "github.com/alexflint/go-arg"
|
||||
"github.com/labstack/gommon/log"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
@@ -86,6 +86,9 @@ type AmConfig struct {
|
||||
Prioritize string `yaml:"prioritize"`
|
||||
} `yaml:"countryList"`
|
||||
} `yaml:"rendering"`
|
||||
Posting struct {
|
||||
ExternalDictionary string `yaml:"externalDictionary"`
|
||||
} `yaml:"posting"`
|
||||
}
|
||||
|
||||
//go:embed default.yaml
|
||||
@@ -164,6 +167,7 @@ func overlayConfig(dest *AmConfig, loaded *AmConfig, defaults *AmConfig) {
|
||||
dest.Rendering.TemplateDir = overlayString(loaded.Rendering.TemplateDir, defaults.Rendering.TemplateDir)
|
||||
dest.Rendering.CookieKey = overlayString(loaded.Rendering.CookieKey, defaults.Rendering.CookieKey)
|
||||
dest.Rendering.CountryList.Prioritize = overlayString(loaded.Rendering.CountryList.Prioritize, defaults.Rendering.CountryList.Prioritize)
|
||||
dest.Posting.ExternalDictionary = overlayString(loaded.Posting.ExternalDictionary, defaults.Posting.ExternalDictionary)
|
||||
}
|
||||
|
||||
// SetupConfig loads the command line arguments, loads the config file, and prepares GlobalConfig.
|
||||
|
||||
@@ -43,3 +43,5 @@ rendering:
|
||||
cookiekey: ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
|
||||
countryList:
|
||||
prioritize: US
|
||||
posting:
|
||||
externalDictionary: ""
|
||||
|
||||
+1
-1
@@ -13,7 +13,7 @@ import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/labstack/gommon/log"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// AuditRecord holds an audit record instance.
|
||||
|
||||
@@ -7,6 +7,7 @@ require (
|
||||
github.com/alexflint/go-arg v1.6.0
|
||||
github.com/biter777/countries v1.7.5
|
||||
github.com/bits-and-blooms/bitset v1.24.0
|
||||
github.com/derekparker/trie v0.0.0-20230829180723-39f4de51ef7d
|
||||
github.com/disintegration/imaging v1.6.2
|
||||
github.com/go-sql-driver/mysql v1.9.3
|
||||
github.com/gorilla/sessions v1.4.0
|
||||
|
||||
@@ -15,6 +15,8 @@ github.com/bits-and-blooms/bitset v1.24.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/derekparker/trie v0.0.0-20230829180723-39f4de51ef7d h1:hUWoLdw5kvo2xCsqlsIBMvWUc1QCSsCYD2J2+Fg6YoU=
|
||||
github.com/derekparker/trie v0.0.0-20230829180723-39f4de51ef7d/go.mod h1:C7Es+DLenIpPc9J6IYw4jrK0h7S9bKj4DNl8+KxGEXU=
|
||||
github.com/disintegration/imaging v1.6.2 h1:w1LecBlG2Lnp8B3jk5zSuNqd7b4DXhcjwek1ei82L+c=
|
||||
github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4=
|
||||
github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
|
||||
|
||||
@@ -46,3 +46,54 @@ configs:
|
||||
- emoticon
|
||||
- email
|
||||
- url
|
||||
wordRewriters:
|
||||
- spelling
|
||||
tagRewriters:
|
||||
- emoticon_tag
|
||||
- postlink
|
||||
- userlink
|
||||
- email
|
||||
- url
|
||||
parenRewriters:
|
||||
- userlink
|
||||
tagSet: normal
|
||||
- name: "escaper"
|
||||
wordWrap: 0
|
||||
angles: false
|
||||
parens: false
|
||||
discardHTML: false
|
||||
outputFilters:
|
||||
- html
|
||||
- name: "mail-post"
|
||||
wordWrap: 55
|
||||
angles: true
|
||||
parens: false
|
||||
dicardHTML: true
|
||||
discardRejected: true
|
||||
tagSet: normal
|
||||
- name: "post-from-email"
|
||||
wordWrap: 55
|
||||
rewrap: true
|
||||
angles: true
|
||||
parens: true
|
||||
discardHTML: false
|
||||
dicardRejected: true
|
||||
discardComments: true
|
||||
discardXML: true
|
||||
outputFilters:
|
||||
- html
|
||||
stringRewriters:
|
||||
- emoticon
|
||||
- email
|
||||
- url
|
||||
tagRewriters:
|
||||
- emoticon_tag
|
||||
- postlink
|
||||
- userlink
|
||||
- email
|
||||
- url
|
||||
parenRewriters:
|
||||
- userlink
|
||||
tagSet: normal
|
||||
disallowTags:
|
||||
- font
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Amsterdam Web Communities System
|
||||
* Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved
|
||||
*
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
||||
*/
|
||||
// The htmlcheck package contains the HTML Checker.
|
||||
package htmlcheck
|
||||
|
||||
import "strings"
|
||||
|
||||
// CompositeDictionary is a dictionary that wraps several base dictionaries, and adds some extra behavior.
|
||||
type CompositeDictionary struct {
|
||||
dicts []SpellingDictionary
|
||||
}
|
||||
|
||||
// Ready returns true if the dictionary has been fully loaded.
|
||||
func (d *CompositeDictionary) Ready() bool {
|
||||
for _, sd := range d.dicts {
|
||||
if !sd.Ready() {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Size returns the number of words in the dictionary.
|
||||
func (d *CompositeDictionary) Size() int {
|
||||
rc := 0
|
||||
for _, sd := range d.dicts {
|
||||
rc += sd.Size()
|
||||
}
|
||||
return rc
|
||||
}
|
||||
|
||||
// checkSimple passes a word to the subdictionaries to check it.
|
||||
func (d *CompositeDictionary) checkSimple(word string) bool {
|
||||
for _, sd := range d.dicts {
|
||||
if sd.CheckWord(word) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// checkHyphenates breaks a hyphenatewd work up into parts and checks each one.
|
||||
func (d *CompositeDictionary) checkHyphenates(word string) bool {
|
||||
parts := strings.Split(word, "-")
|
||||
if len(parts) == 1 {
|
||||
return false // no hyphens
|
||||
}
|
||||
for _, frag := range parts {
|
||||
// each fragment greater than 1 character must be in dictionary
|
||||
if len(frag) > 1 {
|
||||
if !d.checkSimple(frag) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// CheckWord returns true if a word appears in the dictionary.
|
||||
func (d *CompositeDictionary) CheckWord(word string) bool {
|
||||
if len(word) <= 1 {
|
||||
return true // words of length 1 get a free pass
|
||||
}
|
||||
realWord := strings.ToLower(word)
|
||||
if d.checkSimple(realWord) {
|
||||
return true
|
||||
}
|
||||
if strings.HasSuffix(realWord, "'s") {
|
||||
l := len(realWord)
|
||||
base := realWord[:l-2]
|
||||
if d.checkSimple(base) {
|
||||
return true
|
||||
}
|
||||
return d.checkHyphenates(base)
|
||||
}
|
||||
return d.checkHyphenates(realWord)
|
||||
}
|
||||
|
||||
// NewCompositeDict wraps an array of SpellingDictionary objects up in a composite.
|
||||
func NewCompositeDict(dicts []SpellingDictionary) *CompositeDictionary {
|
||||
return &CompositeDictionary{
|
||||
dicts: dicts,
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Amsterdam Web Communities System
|
||||
* Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved
|
||||
*
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
||||
*/
|
||||
// The htmlcheck package contains the HTML Checker.
|
||||
package htmlcheck
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/derekparker/trie"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// TrieDictionary is a ModSpellingDictionary implemented using a trie.
|
||||
type TrieDictionary struct {
|
||||
mutex sync.Mutex
|
||||
loaded atomic.Bool
|
||||
trie *trie.Trie
|
||||
count int
|
||||
}
|
||||
|
||||
// Ready lets us know if the dictionary is fully loaded.
|
||||
func (d *TrieDictionary) Ready() bool {
|
||||
return d.loaded.Load()
|
||||
}
|
||||
|
||||
// Size returns the number of words in the dictionary.
|
||||
func (d *TrieDictionary) Size() int {
|
||||
d.mutex.Lock()
|
||||
defer d.mutex.Unlock()
|
||||
return d.count
|
||||
}
|
||||
|
||||
// CheckWord returns true if a word is in the dictionary, false if not.
|
||||
func (d *TrieDictionary) CheckWord(word string) bool {
|
||||
d.mutex.Lock()
|
||||
defer d.mutex.Unlock()
|
||||
_, rc := d.trie.Find(strings.ToLower(word))
|
||||
return rc
|
||||
}
|
||||
|
||||
// AddWord adds a new word to the dictionary.
|
||||
func (d *TrieDictionary) AddWord(word string) {
|
||||
d.mutex.Lock()
|
||||
defer d.mutex.Unlock()
|
||||
d.trie.Add(strings.ToLower(word), true)
|
||||
d.count++
|
||||
}
|
||||
|
||||
// DelWord deletes a word from the dictionary.
|
||||
func (d *TrieDictionary) DelWord(word string) {
|
||||
// not implemented for this type
|
||||
}
|
||||
|
||||
// Clear removes all words from the dictionary.
|
||||
func (d *TrieDictionary) Clear() {
|
||||
d.mutex.Lock()
|
||||
defer d.mutex.Unlock()
|
||||
d.trie = trie.New()
|
||||
d.count = 0
|
||||
}
|
||||
|
||||
// loadDict is a goroutine that loads the dictionary in the background.
|
||||
func loadDict(d *TrieDictionary, words []byte) {
|
||||
d.mutex.Lock()
|
||||
defer d.mutex.Unlock()
|
||||
scanner := bufio.NewScanner(strings.NewReader(string(words)))
|
||||
for scanner.Scan() {
|
||||
word := strings.TrimSpace(scanner.Text())
|
||||
if word != "" {
|
||||
d.trie.Add(strings.ToLower(word), true)
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
log.Fatalf("failed to load dictionary: %v", err)
|
||||
}
|
||||
d.loaded.Store(true)
|
||||
}
|
||||
|
||||
// LoadTrieDict creates a TrieDictionary from a byte array that represents a word list (one word per line).
|
||||
func LoadTrieDict(words []byte) *TrieDictionary {
|
||||
rc := TrieDictionary{
|
||||
loaded: atomic.Bool{},
|
||||
trie: trie.New(),
|
||||
count: 0,
|
||||
}
|
||||
rc.loaded.Store(false)
|
||||
go loadDict(&rc, words)
|
||||
return &rc
|
||||
}
|
||||
@@ -9,15 +9,86 @@
|
||||
// The htmlcheck package contains the HTML Checker.
|
||||
package htmlcheck
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"os"
|
||||
|
||||
"git.erbosoft.com/amy/amsterdam/config"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// SpellingDictionary is a simple dictionary interface.
|
||||
type SpellingDictionary interface {
|
||||
Ready() bool
|
||||
Size() int
|
||||
CheckWord(string) bool
|
||||
}
|
||||
|
||||
// ModSpellingDictionary is an intrerface to a modifiable spelling dictionary.
|
||||
type ModSpellingDictionary interface {
|
||||
SpellingDictionary
|
||||
AddWord(string)
|
||||
DelWord(string)
|
||||
Clear()
|
||||
}
|
||||
|
||||
//go:embed en-us.dict
|
||||
var mainDict []byte
|
||||
|
||||
//go:embed supplement.dict
|
||||
var supplementaryDict []byte
|
||||
|
||||
// SetupDicts sets up the dictionaries and the spelling rewriter.
|
||||
func SetupDicts() {
|
||||
dicts := make([]SpellingDictionary, 2, 3)
|
||||
dicts[0] = LoadTrieDict(mainDict)
|
||||
dicts[1] = LoadTrieDict(supplementaryDict)
|
||||
if config.GlobalConfig.Posting.ExternalDictionary != "" {
|
||||
data, err := os.ReadFile(config.GlobalConfig.Posting.ExternalDictionary)
|
||||
if err == nil {
|
||||
ndict := LoadTrieDict(data)
|
||||
dicts = append(dicts, ndict)
|
||||
} else {
|
||||
log.Errorf("failed to load external dictionary %s: %v", config.GlobalConfig.Posting.ExternalDictionary, err)
|
||||
}
|
||||
}
|
||||
rw := spellingRewriter{
|
||||
dict: NewCompositeDict(dicts),
|
||||
}
|
||||
rewriterRegistry[rw.Name()] = &rw
|
||||
}
|
||||
|
||||
// spellingRewriter is a rewriter that flags spelling errors.
|
||||
type spellingRewriter struct {
|
||||
dict SpellingDictionary
|
||||
}
|
||||
|
||||
// defaultBeginError is the markup that indicates the start of an error.
|
||||
const defaultBeginError = "<span class=\"text-red-600 font-bold\">"
|
||||
|
||||
// defaultEndError is the markup that indicates the end of an error.
|
||||
const defaultEndError = "</span>"
|
||||
|
||||
// Name returns the rewriter's name.
|
||||
func (rw *spellingRewriter) Name() string {
|
||||
return "spelling"
|
||||
}
|
||||
|
||||
/* Rewrite rewrites the given string data and adds markup before and after if needed.
|
||||
* Parameters:
|
||||
* data - The data to be rewritten.
|
||||
* svc - Services interface we can use.
|
||||
* Returns:
|
||||
* Pointer to markup data, or nil.
|
||||
*/
|
||||
func (rw *spellingRewriter) Rewrite(data string, svc rewriterServices) *markupData {
|
||||
if rw.dict.CheckWord(data) {
|
||||
return nil
|
||||
}
|
||||
return &markupData{
|
||||
beginMarkup: defaultBeginError,
|
||||
text: data,
|
||||
endMarkup: defaultEndError,
|
||||
rescan: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ import (
|
||||
"git.erbosoft.com/amy/amsterdam/config"
|
||||
"git.erbosoft.com/amy/amsterdam/database"
|
||||
"git.erbosoft.com/amy/amsterdam/email"
|
||||
"git.erbosoft.com/amy/amsterdam/htmlcheck"
|
||||
"git.erbosoft.com/amy/amsterdam/ui"
|
||||
"github.com/labstack/echo-contrib/session"
|
||||
"github.com/labstack/echo/v4"
|
||||
@@ -101,6 +102,7 @@ func main() {
|
||||
defer closer()
|
||||
closer = email.SetupMailSender()
|
||||
defer closer()
|
||||
htmlcheck.SetupDicts()
|
||||
ui.SetupTemplates()
|
||||
closer = ui.SetupSessionManager()
|
||||
defer closer()
|
||||
|
||||
Reference in New Issue
Block a user