landed the dictionary objects and spelling rewriter, which also allowed me to complete the configuration YAML file
This commit is contained in:
+5
-1
@@ -16,7 +16,7 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
|
|
||||||
argparse "github.com/alexflint/go-arg"
|
argparse "github.com/alexflint/go-arg"
|
||||||
"github.com/labstack/gommon/log"
|
log "github.com/sirupsen/logrus"
|
||||||
"gopkg.in/yaml.v3"
|
"gopkg.in/yaml.v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -86,6 +86,9 @@ type AmConfig struct {
|
|||||||
Prioritize string `yaml:"prioritize"`
|
Prioritize string `yaml:"prioritize"`
|
||||||
} `yaml:"countryList"`
|
} `yaml:"countryList"`
|
||||||
} `yaml:"rendering"`
|
} `yaml:"rendering"`
|
||||||
|
Posting struct {
|
||||||
|
ExternalDictionary string `yaml:"externalDictionary"`
|
||||||
|
} `yaml:"posting"`
|
||||||
}
|
}
|
||||||
|
|
||||||
//go:embed default.yaml
|
//go:embed default.yaml
|
||||||
@@ -164,6 +167,7 @@ func overlayConfig(dest *AmConfig, loaded *AmConfig, defaults *AmConfig) {
|
|||||||
dest.Rendering.TemplateDir = overlayString(loaded.Rendering.TemplateDir, defaults.Rendering.TemplateDir)
|
dest.Rendering.TemplateDir = overlayString(loaded.Rendering.TemplateDir, defaults.Rendering.TemplateDir)
|
||||||
dest.Rendering.CookieKey = overlayString(loaded.Rendering.CookieKey, defaults.Rendering.CookieKey)
|
dest.Rendering.CookieKey = overlayString(loaded.Rendering.CookieKey, defaults.Rendering.CookieKey)
|
||||||
dest.Rendering.CountryList.Prioritize = overlayString(loaded.Rendering.CountryList.Prioritize, defaults.Rendering.CountryList.Prioritize)
|
dest.Rendering.CountryList.Prioritize = overlayString(loaded.Rendering.CountryList.Prioritize, defaults.Rendering.CountryList.Prioritize)
|
||||||
|
dest.Posting.ExternalDictionary = overlayString(loaded.Posting.ExternalDictionary, defaults.Posting.ExternalDictionary)
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetupConfig loads the command line arguments, loads the config file, and prepares GlobalConfig.
|
// SetupConfig loads the command line arguments, loads the config file, and prepares GlobalConfig.
|
||||||
|
|||||||
@@ -43,3 +43,5 @@ rendering:
|
|||||||
cookiekey: ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
|
cookiekey: ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
|
||||||
countryList:
|
countryList:
|
||||||
prioritize: US
|
prioritize: US
|
||||||
|
posting:
|
||||||
|
externalDictionary: ""
|
||||||
|
|||||||
+1
-1
@@ -13,7 +13,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/labstack/gommon/log"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
// AuditRecord holds an audit record instance.
|
// AuditRecord holds an audit record instance.
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ require (
|
|||||||
github.com/alexflint/go-arg v1.6.0
|
github.com/alexflint/go-arg v1.6.0
|
||||||
github.com/biter777/countries v1.7.5
|
github.com/biter777/countries v1.7.5
|
||||||
github.com/bits-and-blooms/bitset v1.24.0
|
github.com/bits-and-blooms/bitset v1.24.0
|
||||||
|
github.com/derekparker/trie v0.0.0-20230829180723-39f4de51ef7d
|
||||||
github.com/disintegration/imaging v1.6.2
|
github.com/disintegration/imaging v1.6.2
|
||||||
github.com/go-sql-driver/mysql v1.9.3
|
github.com/go-sql-driver/mysql v1.9.3
|
||||||
github.com/gorilla/sessions v1.4.0
|
github.com/gorilla/sessions v1.4.0
|
||||||
|
|||||||
@@ -15,6 +15,8 @@ github.com/bits-and-blooms/bitset v1.24.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6
|
|||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/derekparker/trie v0.0.0-20230829180723-39f4de51ef7d h1:hUWoLdw5kvo2xCsqlsIBMvWUc1QCSsCYD2J2+Fg6YoU=
|
||||||
|
github.com/derekparker/trie v0.0.0-20230829180723-39f4de51ef7d/go.mod h1:C7Es+DLenIpPc9J6IYw4jrK0h7S9bKj4DNl8+KxGEXU=
|
||||||
github.com/disintegration/imaging v1.6.2 h1:w1LecBlG2Lnp8B3jk5zSuNqd7b4DXhcjwek1ei82L+c=
|
github.com/disintegration/imaging v1.6.2 h1:w1LecBlG2Lnp8B3jk5zSuNqd7b4DXhcjwek1ei82L+c=
|
||||||
github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4=
|
github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4=
|
||||||
github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
|
github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
|
||||||
|
|||||||
@@ -46,3 +46,54 @@ configs:
|
|||||||
- emoticon
|
- emoticon
|
||||||
- email
|
- email
|
||||||
- url
|
- url
|
||||||
|
wordRewriters:
|
||||||
|
- spelling
|
||||||
|
tagRewriters:
|
||||||
|
- emoticon_tag
|
||||||
|
- postlink
|
||||||
|
- userlink
|
||||||
|
- email
|
||||||
|
- url
|
||||||
|
parenRewriters:
|
||||||
|
- userlink
|
||||||
|
tagSet: normal
|
||||||
|
- name: "escaper"
|
||||||
|
wordWrap: 0
|
||||||
|
angles: false
|
||||||
|
parens: false
|
||||||
|
discardHTML: false
|
||||||
|
outputFilters:
|
||||||
|
- html
|
||||||
|
- name: "mail-post"
|
||||||
|
wordWrap: 55
|
||||||
|
angles: true
|
||||||
|
parens: false
|
||||||
|
dicardHTML: true
|
||||||
|
discardRejected: true
|
||||||
|
tagSet: normal
|
||||||
|
- name: "post-from-email"
|
||||||
|
wordWrap: 55
|
||||||
|
rewrap: true
|
||||||
|
angles: true
|
||||||
|
parens: true
|
||||||
|
discardHTML: false
|
||||||
|
dicardRejected: true
|
||||||
|
discardComments: true
|
||||||
|
discardXML: true
|
||||||
|
outputFilters:
|
||||||
|
- html
|
||||||
|
stringRewriters:
|
||||||
|
- emoticon
|
||||||
|
- email
|
||||||
|
- url
|
||||||
|
tagRewriters:
|
||||||
|
- emoticon_tag
|
||||||
|
- postlink
|
||||||
|
- userlink
|
||||||
|
- email
|
||||||
|
- url
|
||||||
|
parenRewriters:
|
||||||
|
- userlink
|
||||||
|
tagSet: normal
|
||||||
|
disallowTags:
|
||||||
|
- font
|
||||||
|
|||||||
@@ -0,0 +1,90 @@
|
|||||||
|
/*
|
||||||
|
* Amsterdam Web Communities System
|
||||||
|
* Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved
|
||||||
|
*
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
||||||
|
*/
|
||||||
|
// The htmlcheck package contains the HTML Checker.
|
||||||
|
package htmlcheck
|
||||||
|
|
||||||
|
import "strings"
|
||||||
|
|
||||||
|
// CompositeDictionary is a dictionary that wraps several base dictionaries, and adds some extra behavior.
|
||||||
|
type CompositeDictionary struct {
|
||||||
|
dicts []SpellingDictionary
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ready returns true if the dictionary has been fully loaded.
|
||||||
|
func (d *CompositeDictionary) Ready() bool {
|
||||||
|
for _, sd := range d.dicts {
|
||||||
|
if !sd.Ready() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Size returns the number of words in the dictionary.
|
||||||
|
func (d *CompositeDictionary) Size() int {
|
||||||
|
rc := 0
|
||||||
|
for _, sd := range d.dicts {
|
||||||
|
rc += sd.Size()
|
||||||
|
}
|
||||||
|
return rc
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkSimple passes a word to the subdictionaries to check it.
|
||||||
|
func (d *CompositeDictionary) checkSimple(word string) bool {
|
||||||
|
for _, sd := range d.dicts {
|
||||||
|
if sd.CheckWord(word) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkHyphenates breaks a hyphenatewd work up into parts and checks each one.
|
||||||
|
func (d *CompositeDictionary) checkHyphenates(word string) bool {
|
||||||
|
parts := strings.Split(word, "-")
|
||||||
|
if len(parts) == 1 {
|
||||||
|
return false // no hyphens
|
||||||
|
}
|
||||||
|
for _, frag := range parts {
|
||||||
|
// each fragment greater than 1 character must be in dictionary
|
||||||
|
if len(frag) > 1 {
|
||||||
|
if !d.checkSimple(frag) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// CheckWord returns true if a word appears in the dictionary.
|
||||||
|
func (d *CompositeDictionary) CheckWord(word string) bool {
|
||||||
|
if len(word) <= 1 {
|
||||||
|
return true // words of length 1 get a free pass
|
||||||
|
}
|
||||||
|
realWord := strings.ToLower(word)
|
||||||
|
if d.checkSimple(realWord) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if strings.HasSuffix(realWord, "'s") {
|
||||||
|
l := len(realWord)
|
||||||
|
base := realWord[:l-2]
|
||||||
|
if d.checkSimple(base) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return d.checkHyphenates(base)
|
||||||
|
}
|
||||||
|
return d.checkHyphenates(realWord)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewCompositeDict wraps an array of SpellingDictionary objects up in a composite.
|
||||||
|
func NewCompositeDict(dicts []SpellingDictionary) *CompositeDictionary {
|
||||||
|
return &CompositeDictionary{
|
||||||
|
dicts: dicts,
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,98 @@
|
|||||||
|
/*
|
||||||
|
* Amsterdam Web Communities System
|
||||||
|
* Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved
|
||||||
|
*
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
||||||
|
*/
|
||||||
|
// The htmlcheck package contains the HTML Checker.
|
||||||
|
package htmlcheck
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
|
||||||
|
"github.com/derekparker/trie"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TrieDictionary is a ModSpellingDictionary implemented using a trie.
|
||||||
|
type TrieDictionary struct {
|
||||||
|
mutex sync.Mutex
|
||||||
|
loaded atomic.Bool
|
||||||
|
trie *trie.Trie
|
||||||
|
count int
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ready lets us know if the dictionary is fully loaded.
|
||||||
|
func (d *TrieDictionary) Ready() bool {
|
||||||
|
return d.loaded.Load()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Size returns the number of words in the dictionary.
|
||||||
|
func (d *TrieDictionary) Size() int {
|
||||||
|
d.mutex.Lock()
|
||||||
|
defer d.mutex.Unlock()
|
||||||
|
return d.count
|
||||||
|
}
|
||||||
|
|
||||||
|
// CheckWord returns true if a word is in the dictionary, false if not.
|
||||||
|
func (d *TrieDictionary) CheckWord(word string) bool {
|
||||||
|
d.mutex.Lock()
|
||||||
|
defer d.mutex.Unlock()
|
||||||
|
_, rc := d.trie.Find(strings.ToLower(word))
|
||||||
|
return rc
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddWord adds a new word to the dictionary.
|
||||||
|
func (d *TrieDictionary) AddWord(word string) {
|
||||||
|
d.mutex.Lock()
|
||||||
|
defer d.mutex.Unlock()
|
||||||
|
d.trie.Add(strings.ToLower(word), true)
|
||||||
|
d.count++
|
||||||
|
}
|
||||||
|
|
||||||
|
// DelWord deletes a word from the dictionary.
|
||||||
|
func (d *TrieDictionary) DelWord(word string) {
|
||||||
|
// not implemented for this type
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear removes all words from the dictionary.
|
||||||
|
func (d *TrieDictionary) Clear() {
|
||||||
|
d.mutex.Lock()
|
||||||
|
defer d.mutex.Unlock()
|
||||||
|
d.trie = trie.New()
|
||||||
|
d.count = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// loadDict is a goroutine that loads the dictionary in the background.
|
||||||
|
func loadDict(d *TrieDictionary, words []byte) {
|
||||||
|
d.mutex.Lock()
|
||||||
|
defer d.mutex.Unlock()
|
||||||
|
scanner := bufio.NewScanner(strings.NewReader(string(words)))
|
||||||
|
for scanner.Scan() {
|
||||||
|
word := strings.TrimSpace(scanner.Text())
|
||||||
|
if word != "" {
|
||||||
|
d.trie.Add(strings.ToLower(word), true)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
log.Fatalf("failed to load dictionary: %v", err)
|
||||||
|
}
|
||||||
|
d.loaded.Store(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
// LoadTrieDict creates a TrieDictionary from a byte array that represents a word list (one word per line).
|
||||||
|
func LoadTrieDict(words []byte) *TrieDictionary {
|
||||||
|
rc := TrieDictionary{
|
||||||
|
loaded: atomic.Bool{},
|
||||||
|
trie: trie.New(),
|
||||||
|
count: 0,
|
||||||
|
}
|
||||||
|
rc.loaded.Store(false)
|
||||||
|
go loadDict(&rc, words)
|
||||||
|
return &rc
|
||||||
|
}
|
||||||
@@ -9,15 +9,86 @@
|
|||||||
// The htmlcheck package contains the HTML Checker.
|
// The htmlcheck package contains the HTML Checker.
|
||||||
package htmlcheck
|
package htmlcheck
|
||||||
|
|
||||||
|
import (
|
||||||
|
_ "embed"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"git.erbosoft.com/amy/amsterdam/config"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
)
|
||||||
|
|
||||||
|
// SpellingDictionary is a simple dictionary interface.
|
||||||
type SpellingDictionary interface {
|
type SpellingDictionary interface {
|
||||||
Ready() bool
|
Ready() bool
|
||||||
Size() int
|
Size() int
|
||||||
CheckWord(string) bool
|
CheckWord(string) bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ModSpellingDictionary is an intrerface to a modifiable spelling dictionary.
|
||||||
type ModSpellingDictionary interface {
|
type ModSpellingDictionary interface {
|
||||||
SpellingDictionary
|
SpellingDictionary
|
||||||
AddWord(string)
|
AddWord(string)
|
||||||
DelWord(string)
|
DelWord(string)
|
||||||
Clear()
|
Clear()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//go:embed en-us.dict
|
||||||
|
var mainDict []byte
|
||||||
|
|
||||||
|
//go:embed supplement.dict
|
||||||
|
var supplementaryDict []byte
|
||||||
|
|
||||||
|
// SetupDicts sets up the dictionaries and the spelling rewriter.
|
||||||
|
func SetupDicts() {
|
||||||
|
dicts := make([]SpellingDictionary, 2, 3)
|
||||||
|
dicts[0] = LoadTrieDict(mainDict)
|
||||||
|
dicts[1] = LoadTrieDict(supplementaryDict)
|
||||||
|
if config.GlobalConfig.Posting.ExternalDictionary != "" {
|
||||||
|
data, err := os.ReadFile(config.GlobalConfig.Posting.ExternalDictionary)
|
||||||
|
if err == nil {
|
||||||
|
ndict := LoadTrieDict(data)
|
||||||
|
dicts = append(dicts, ndict)
|
||||||
|
} else {
|
||||||
|
log.Errorf("failed to load external dictionary %s: %v", config.GlobalConfig.Posting.ExternalDictionary, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rw := spellingRewriter{
|
||||||
|
dict: NewCompositeDict(dicts),
|
||||||
|
}
|
||||||
|
rewriterRegistry[rw.Name()] = &rw
|
||||||
|
}
|
||||||
|
|
||||||
|
// spellingRewriter is a rewriter that flags spelling errors.
|
||||||
|
type spellingRewriter struct {
|
||||||
|
dict SpellingDictionary
|
||||||
|
}
|
||||||
|
|
||||||
|
// defaultBeginError is the markup that indicates the start of an error.
|
||||||
|
const defaultBeginError = "<span class=\"text-red-600 font-bold\">"
|
||||||
|
|
||||||
|
// defaultEndError is the markup that indicates the end of an error.
|
||||||
|
const defaultEndError = "</span>"
|
||||||
|
|
||||||
|
// Name returns the rewriter's name.
|
||||||
|
func (rw *spellingRewriter) Name() string {
|
||||||
|
return "spelling"
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Rewrite rewrites the given string data and adds markup before and after if needed.
|
||||||
|
* Parameters:
|
||||||
|
* data - The data to be rewritten.
|
||||||
|
* svc - Services interface we can use.
|
||||||
|
* Returns:
|
||||||
|
* Pointer to markup data, or nil.
|
||||||
|
*/
|
||||||
|
func (rw *spellingRewriter) Rewrite(data string, svc rewriterServices) *markupData {
|
||||||
|
if rw.dict.CheckWord(data) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return &markupData{
|
||||||
|
beginMarkup: defaultBeginError,
|
||||||
|
text: data,
|
||||||
|
endMarkup: defaultEndError,
|
||||||
|
rescan: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ import (
|
|||||||
"git.erbosoft.com/amy/amsterdam/config"
|
"git.erbosoft.com/amy/amsterdam/config"
|
||||||
"git.erbosoft.com/amy/amsterdam/database"
|
"git.erbosoft.com/amy/amsterdam/database"
|
||||||
"git.erbosoft.com/amy/amsterdam/email"
|
"git.erbosoft.com/amy/amsterdam/email"
|
||||||
|
"git.erbosoft.com/amy/amsterdam/htmlcheck"
|
||||||
"git.erbosoft.com/amy/amsterdam/ui"
|
"git.erbosoft.com/amy/amsterdam/ui"
|
||||||
"github.com/labstack/echo-contrib/session"
|
"github.com/labstack/echo-contrib/session"
|
||||||
"github.com/labstack/echo/v4"
|
"github.com/labstack/echo/v4"
|
||||||
@@ -101,6 +102,7 @@ func main() {
|
|||||||
defer closer()
|
defer closer()
|
||||||
closer = email.SetupMailSender()
|
closer = email.SetupMailSender()
|
||||||
defer closer()
|
defer closer()
|
||||||
|
htmlcheck.SetupDicts()
|
||||||
ui.SetupTemplates()
|
ui.SetupTemplates()
|
||||||
closer = ui.SetupSessionManager()
|
closer = ui.SetupSessionManager()
|
||||||
defer closer()
|
defer closer()
|
||||||
|
|||||||
Reference in New Issue
Block a user