fixed a first round of bugs and rewrote the URL rewriter using the old algorithm
This commit is contained in:
+12
-7
@@ -158,9 +158,12 @@ func AmNewHTMLChecker(configName string) (HTMLChecker, error) {
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("configuration %s not found", configName)
|
||||
}
|
||||
tset, ok := tagSetNameToSet[config.TagSet]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("tag set %s not found", config.TagSet)
|
||||
var tset *bitset.BitSet = nil
|
||||
if config.TagSet != "" {
|
||||
tset, ok = tagSetNameToSet[config.TagSet]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("tag set %s not found", config.TagSet)
|
||||
}
|
||||
}
|
||||
rc := htmlCheckerImpl{
|
||||
config: config,
|
||||
@@ -617,10 +620,12 @@ func (ht *htmlCheckerImpl) handleAsHTML() bool {
|
||||
// it's a closing tag and this tag doesn't permit the "close" form
|
||||
return false
|
||||
}
|
||||
tagSetID := tagIndexToSetId[tagIndex]
|
||||
if !ht.tagSet.Test(uint(tagSetID)) {
|
||||
// the tag is not allowed - discard it, if one of the flags is set in the config
|
||||
return ht.config.DiscardHTML || ht.config.DiscardRejected
|
||||
if ht.tagSet != nil {
|
||||
tagSetID := tagIndexToSetId[tagIndex]
|
||||
if !ht.tagSet.Test(uint(tagSetID)) {
|
||||
// the tag is not allowed - discard it, if one of the flags is set in the config
|
||||
return ht.config.DiscardHTML || ht.config.DiscardRejected
|
||||
}
|
||||
}
|
||||
if !ht.config.DiscardHTML && tag.balanceTags {
|
||||
// this tag needs to be balanced - here's where we manipulate the stack
|
||||
|
||||
@@ -23,7 +23,7 @@ emoticons:
|
||||
replace: "😳"
|
||||
- name: biggrin
|
||||
patterns:
|
||||
":D"
|
||||
- ":D"
|
||||
replace: "😁"
|
||||
- name: wink
|
||||
patterns:
|
||||
@@ -48,9 +48,9 @@ emoticons:
|
||||
replace: "😡"
|
||||
- name: eek
|
||||
patterns:
|
||||
":eek:"
|
||||
- ":eek:"
|
||||
replace: "😱"
|
||||
- name: confused
|
||||
patterns:
|
||||
":confused:"
|
||||
- ":confused:"
|
||||
replace: "😕"
|
||||
|
||||
+2
-61
@@ -51,12 +51,10 @@ var rewriterRegistry = make(map[string]rewriter)
|
||||
func init() {
|
||||
r1 := emailRewriter{}
|
||||
rewriterRegistry[r1.Name()] = &r1
|
||||
r2 := urlRewriter{}
|
||||
r2 := postLinkRewriter{}
|
||||
rewriterRegistry[r2.Name()] = &r2
|
||||
r3 := postLinkRewriter{}
|
||||
r3 := userLinkRewriter{}
|
||||
rewriterRegistry[r3.Name()] = &r3
|
||||
r4 := userLinkRewriter{}
|
||||
rewriterRegistry[r4.Name()] = &r4
|
||||
}
|
||||
|
||||
// emailRewriter is an implementation of Rewriter that recognizes E-mail addresses.
|
||||
@@ -97,63 +95,6 @@ func (rw *emailRewriter) Rewrite(data string, svc rewriterServices) *markupData
|
||||
rescan: false}
|
||||
}
|
||||
|
||||
// urlRewriter is an implementation of Rewriter that recognizes URLs.
|
||||
type urlRewriter struct{}
|
||||
|
||||
// Name returns the rewriter's name.
|
||||
func (rw *urlRewriter) Name() string {
|
||||
return "url"
|
||||
}
|
||||
|
||||
/* Rewrite rewrites the given string data and adds markup before and after if needed.
|
||||
* Parameters:
|
||||
* data - The data to be rewritten.
|
||||
* svc - Services interface we can use.
|
||||
* Returns:
|
||||
* Pointer to markup data, or nil.
|
||||
*/
|
||||
func (rw *urlRewriter) Rewrite(data string, svc rewriterServices) *markupData {
|
||||
url, err := url.Parse(data)
|
||||
if err != nil {
|
||||
secondChance := ""
|
||||
if strings.HasPrefix(data, "www.") {
|
||||
secondChance = "http://" + data
|
||||
} else if strings.HasPrefix(data, "ftp.") {
|
||||
secondChance = "ftp://" + data
|
||||
} else if strings.HasPrefix(data, "gopher.") {
|
||||
secondChance = "gopher://" + data
|
||||
}
|
||||
if secondChance == "" {
|
||||
return nil
|
||||
}
|
||||
url, err = url.Parse(secondChance)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
if url.Scheme == "http" || url.Scheme == "https" {
|
||||
svc.addExternalRef(url)
|
||||
}
|
||||
|
||||
var openA strings.Builder
|
||||
openA.WriteString("<a href=\"")
|
||||
openA.WriteString(url.String())
|
||||
openA.WriteString("\"")
|
||||
catenate := svc.rewriterAttrValue("ANCHORTAIL")
|
||||
if catenate != "" {
|
||||
openA.WriteString(" ")
|
||||
openA.WriteString(catenate)
|
||||
}
|
||||
openA.WriteString(">")
|
||||
return &markupData{
|
||||
beginMarkup: openA.String(),
|
||||
text: data,
|
||||
endMarkup: "</a>",
|
||||
rescan: false,
|
||||
}
|
||||
}
|
||||
|
||||
// postLinkRewriter is the rewriter that handles links to conference posts.
|
||||
type postLinkRewriter struct{}
|
||||
|
||||
|
||||
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
* Amsterdam Web Communities System
|
||||
* Copyright (c) 2025 Erbosoft Metaverse Design Solutions, All Rights Reserved
|
||||
*
|
||||
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
||||
*/
|
||||
// The htmlcheck package contains the HTML Checker.
|
||||
package htmlcheck
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// urlElement is a basic element
|
||||
type urlElement struct {
|
||||
re *regexp.Regexp
|
||||
prefix string
|
||||
}
|
||||
|
||||
// eval matches the argument against our regular expression and adds an optional prefix.
|
||||
func (e *urlElement) eval(s string) string {
|
||||
if e.re.FindString(s) == "" {
|
||||
return ""
|
||||
}
|
||||
return e.prefix + s
|
||||
}
|
||||
|
||||
// urlSetupData contains the data needed to set up the URL rewriter elements.
|
||||
var urlSetupData = [...]string{
|
||||
`^(?i:http)://[A-Za-z0-9_-]+(\.[A-Za-z0-9_-]+)+.*$`, "",
|
||||
`^(?i:https)://[A-Za-z0-9_-]+(\.[A-Za-z0-9_-]+)+.*$`, "",
|
||||
`^(?i:ftp)://[A-Za-z0-9_-]+(\.[A-Za-z0-9_-]+)+.*$`, "",
|
||||
`^(?i:gopher)://[A-Za-z0-9_-]+(\.[A-Za-z0-9_-]+)+.*$`, "",
|
||||
"^(?i:mailto):[A-Za-z0-9!#$%*+-/=?^_`{|}~.]+@[A-Za-z0-9_-]+(\\.[A-Za-z0-9_-]+)+$", "",
|
||||
`^(?i:news):[A-Za-z0-9_-]+(\.[A-Za-z0-9_-]+)+$`, "",
|
||||
`^(?i:nntp)://[A-Za-z0-9_-]+(\.[A-Za-z0-9_-]+)+.*$`, "",
|
||||
`^(?i:telnet)://[A-Za-z0-9_-]+(\.[A-Za-z0-9_-]+)+.*$`, "",
|
||||
`^(?i:tn3270)://[A-Za-z0-9_-]+(\.[A-Za-z0-9_-]+)+.*$`, "",
|
||||
`^(?i:www)\.[A-Za-z0-9_-]+(\.[A-Za-z0-9_-]+)*.*$`, "http://",
|
||||
`^(?i:ftp)\.[A-Za-z0-9_-]+(\.[A-Za-z0-9_-]+)*.*$`, "ftp://",
|
||||
`^(?i:gopher)\.[A-Za-z0-9_-]+(\.[A-Za-z0-9_-]+)*.*$`, "gopher://",
|
||||
}
|
||||
|
||||
// The URL elements we can match against.
|
||||
var urlElements []urlElement
|
||||
|
||||
// init builds the URL elements and registers the rewriter.
|
||||
func init() {
|
||||
urlElements = make([]urlElement, len(urlSetupData)/2)
|
||||
i, j := 0, 0
|
||||
for i < len(urlSetupData) {
|
||||
urlElements[j].re = regexp.MustCompile(urlSetupData[i])
|
||||
urlElements[j].prefix = urlSetupData[i+1]
|
||||
i += 2
|
||||
j++
|
||||
}
|
||||
r := urlRewriter{}
|
||||
rewriterRegistry[r.Name()] = &r
|
||||
}
|
||||
|
||||
// urlRewriter is an implementation of Rewriter that recognizes URLs.
|
||||
type urlRewriter struct{}
|
||||
|
||||
// Name returns the rewriter's name.
|
||||
func (rw *urlRewriter) Name() string {
|
||||
return "url"
|
||||
}
|
||||
|
||||
/* Rewrite rewrites the given string data and adds markup before and after if needed.
|
||||
* Parameters:
|
||||
* data - The data to be rewritten.
|
||||
* svc - Services interface we can use.
|
||||
* Returns:
|
||||
* Pointer to markup data, or nil.
|
||||
*/
|
||||
func (rw *urlRewriter) Rewrite(data string, svc rewriterServices) *markupData {
|
||||
for _, ue := range urlElements {
|
||||
s := ue.eval(data)
|
||||
if s != "" {
|
||||
ls := strings.ToLower(s)
|
||||
if strings.HasPrefix(ls, "http:") || strings.HasPrefix(ls, "https:") {
|
||||
url, err := url.Parse(s)
|
||||
if err == nil {
|
||||
svc.addExternalRef(url)
|
||||
}
|
||||
}
|
||||
var openA strings.Builder
|
||||
openA.WriteString("<a href=\"")
|
||||
openA.WriteString(s)
|
||||
openA.WriteString("\"")
|
||||
catenate := svc.rewriterAttrValue("ANCHORTAIL")
|
||||
if catenate != "" {
|
||||
openA.WriteString(" ")
|
||||
openA.WriteString(catenate)
|
||||
}
|
||||
openA.WriteString(">")
|
||||
return &markupData{
|
||||
beginMarkup: openA.String(),
|
||||
text: data,
|
||||
endMarkup: "</a>",
|
||||
rescan: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user