Do not convert anchors to absolute links

This commit is contained in:
Frédéric Guillot 2022-09-11 22:32:16 -07:00
parent 183cb491b3
commit 138fd926ee
2 changed files with 37 additions and 17 deletions

View File

@ -101,6 +101,7 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
var htmlAttrs, attrNames []string
var err error
var isImageLargerThanLayout bool
var isAnchorLink bool
if tagName == "img" {
imgWidth := getIntegerAttributeValue("width", attributes)
@ -137,6 +138,9 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
}
} else if tagName == "img" && attribute.Key == "src" && isValidDataAttribute(attribute.Val) {
value = attribute.Val
} else if isAnchor("a", attribute) {
value = attribute.Val
isAnchorLink = true
} else {
value, err = url.AbsoluteURL(baseURL, value)
if err != nil {
@ -153,10 +157,12 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
htmlAttrs = append(htmlAttrs, fmt.Sprintf(`%s="%s"`, attribute.Key, html.EscapeString(value)))
}
extraAttrNames, extraHTMLAttributes := getExtraAttributes(tagName)
if len(extraAttrNames) > 0 {
attrNames = append(attrNames, extraAttrNames...)
htmlAttrs = append(htmlAttrs, extraHTMLAttributes...)
if !isAnchorLink {
extraAttrNames, extraHTMLAttributes := getExtraAttributes(tagName)
if len(extraAttrNames) > 0 {
attrNames = append(attrNames, extraAttrNames...)
htmlAttrs = append(htmlAttrs, extraHTMLAttributes...)
}
}
return attrNames, strings.Join(htmlAttrs, " ")
@ -370,9 +376,9 @@ func getTagAllowList() map[string][]string {
whitelist["audio"] = []string{"src"}
whitelist["video"] = []string{"poster", "height", "width", "src"}
whitelist["source"] = []string{"src", "type", "srcset", "sizes", "media"}
whitelist["dt"] = []string{}
whitelist["dd"] = []string{}
whitelist["dl"] = []string{}
whitelist["dt"] = []string{"id"}
whitelist["dd"] = []string{"id"}
whitelist["dl"] = []string{"id"}
whitelist["table"] = []string{}
whitelist["caption"] = []string{}
whitelist["thead"] = []string{}
@ -380,12 +386,12 @@ func getTagAllowList() map[string][]string {
whitelist["tr"] = []string{}
whitelist["td"] = []string{"rowspan", "colspan"}
whitelist["th"] = []string{"rowspan", "colspan"}
whitelist["h1"] = []string{}
whitelist["h2"] = []string{}
whitelist["h3"] = []string{}
whitelist["h4"] = []string{}
whitelist["h5"] = []string{}
whitelist["h6"] = []string{}
whitelist["h1"] = []string{"id"}
whitelist["h2"] = []string{"id"}
whitelist["h3"] = []string{"id"}
whitelist["h4"] = []string{"id"}
whitelist["h5"] = []string{"id"}
whitelist["h6"] = []string{"id"}
whitelist["strong"] = []string{}
whitelist["em"] = []string{}
whitelist["code"] = []string{}
@ -393,12 +399,12 @@ func getTagAllowList() map[string][]string {
whitelist["blockquote"] = []string{}
whitelist["q"] = []string{"cite"}
whitelist["p"] = []string{}
whitelist["ul"] = []string{}
whitelist["li"] = []string{}
whitelist["ol"] = []string{}
whitelist["ul"] = []string{"id"}
whitelist["li"] = []string{"id"}
whitelist["ol"] = []string{"id"}
whitelist["br"] = []string{}
whitelist["del"] = []string{}
whitelist["a"] = []string{"href", "title"}
whitelist["a"] = []string{"href", "title", "id"}
whitelist["figure"] = []string{}
whitelist["figcaption"] = []string{}
whitelist["cite"] = []string{}
@ -492,6 +498,10 @@ func isValidDataAttribute(value string) bool {
return false
}
func isAnchor(tagName string, attribute html.Attribute) bool {
return tagName == "a" && attribute.Key == "href" && strings.HasPrefix(attribute.Val, "#")
}
func isPositiveInteger(value string) bool {
if number, err := strconv.Atoi(value); err == nil {
return number > 0

View File

@ -203,6 +203,16 @@ func TestIFrameWithChildElements(t *testing.T) {
}
}
func TestAnchorLink(t *testing.T) {
input := `<p>This link is <a href="#some-anchor">an anchor</a></p>`
expected := `<p>This link is <a href="#some-anchor">an anchor</a></p>`
output := Sanitize("http://example.org/", input)
if expected != output {
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
}
}
func TestInvalidURLScheme(t *testing.T) {
input := `<p>This link is <a src="file:///etc/passwd">not valid</a></p>`
expected := `<p>This link is not valid</p>`