Do not convert anchors to absolute links

This commit is contained in:
Frédéric Guillot 2022-09-11 22:32:16 -07:00
parent 183cb491b3
commit 138fd926ee
2 changed files with 37 additions and 17 deletions

View File

@ -101,6 +101,7 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
var htmlAttrs, attrNames []string var htmlAttrs, attrNames []string
var err error var err error
var isImageLargerThanLayout bool var isImageLargerThanLayout bool
var isAnchorLink bool
if tagName == "img" { if tagName == "img" {
imgWidth := getIntegerAttributeValue("width", attributes) imgWidth := getIntegerAttributeValue("width", attributes)
@ -137,6 +138,9 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
} }
} else if tagName == "img" && attribute.Key == "src" && isValidDataAttribute(attribute.Val) { } else if tagName == "img" && attribute.Key == "src" && isValidDataAttribute(attribute.Val) {
value = attribute.Val value = attribute.Val
} else if isAnchor("a", attribute) {
value = attribute.Val
isAnchorLink = true
} else { } else {
value, err = url.AbsoluteURL(baseURL, value) value, err = url.AbsoluteURL(baseURL, value)
if err != nil { if err != nil {
@ -153,10 +157,12 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
htmlAttrs = append(htmlAttrs, fmt.Sprintf(`%s="%s"`, attribute.Key, html.EscapeString(value))) htmlAttrs = append(htmlAttrs, fmt.Sprintf(`%s="%s"`, attribute.Key, html.EscapeString(value)))
} }
extraAttrNames, extraHTMLAttributes := getExtraAttributes(tagName) if !isAnchorLink {
if len(extraAttrNames) > 0 { extraAttrNames, extraHTMLAttributes := getExtraAttributes(tagName)
attrNames = append(attrNames, extraAttrNames...) if len(extraAttrNames) > 0 {
htmlAttrs = append(htmlAttrs, extraHTMLAttributes...) attrNames = append(attrNames, extraAttrNames...)
htmlAttrs = append(htmlAttrs, extraHTMLAttributes...)
}
} }
return attrNames, strings.Join(htmlAttrs, " ") return attrNames, strings.Join(htmlAttrs, " ")
@ -370,9 +376,9 @@ func getTagAllowList() map[string][]string {
whitelist["audio"] = []string{"src"} whitelist["audio"] = []string{"src"}
whitelist["video"] = []string{"poster", "height", "width", "src"} whitelist["video"] = []string{"poster", "height", "width", "src"}
whitelist["source"] = []string{"src", "type", "srcset", "sizes", "media"} whitelist["source"] = []string{"src", "type", "srcset", "sizes", "media"}
whitelist["dt"] = []string{} whitelist["dt"] = []string{"id"}
whitelist["dd"] = []string{} whitelist["dd"] = []string{"id"}
whitelist["dl"] = []string{} whitelist["dl"] = []string{"id"}
whitelist["table"] = []string{} whitelist["table"] = []string{}
whitelist["caption"] = []string{} whitelist["caption"] = []string{}
whitelist["thead"] = []string{} whitelist["thead"] = []string{}
@ -380,12 +386,12 @@ func getTagAllowList() map[string][]string {
whitelist["tr"] = []string{} whitelist["tr"] = []string{}
whitelist["td"] = []string{"rowspan", "colspan"} whitelist["td"] = []string{"rowspan", "colspan"}
whitelist["th"] = []string{"rowspan", "colspan"} whitelist["th"] = []string{"rowspan", "colspan"}
whitelist["h1"] = []string{} whitelist["h1"] = []string{"id"}
whitelist["h2"] = []string{} whitelist["h2"] = []string{"id"}
whitelist["h3"] = []string{} whitelist["h3"] = []string{"id"}
whitelist["h4"] = []string{} whitelist["h4"] = []string{"id"}
whitelist["h5"] = []string{} whitelist["h5"] = []string{"id"}
whitelist["h6"] = []string{} whitelist["h6"] = []string{"id"}
whitelist["strong"] = []string{} whitelist["strong"] = []string{}
whitelist["em"] = []string{} whitelist["em"] = []string{}
whitelist["code"] = []string{} whitelist["code"] = []string{}
@ -393,12 +399,12 @@ func getTagAllowList() map[string][]string {
whitelist["blockquote"] = []string{} whitelist["blockquote"] = []string{}
whitelist["q"] = []string{"cite"} whitelist["q"] = []string{"cite"}
whitelist["p"] = []string{} whitelist["p"] = []string{}
whitelist["ul"] = []string{} whitelist["ul"] = []string{"id"}
whitelist["li"] = []string{} whitelist["li"] = []string{"id"}
whitelist["ol"] = []string{} whitelist["ol"] = []string{"id"}
whitelist["br"] = []string{} whitelist["br"] = []string{}
whitelist["del"] = []string{} whitelist["del"] = []string{}
whitelist["a"] = []string{"href", "title"} whitelist["a"] = []string{"href", "title", "id"}
whitelist["figure"] = []string{} whitelist["figure"] = []string{}
whitelist["figcaption"] = []string{} whitelist["figcaption"] = []string{}
whitelist["cite"] = []string{} whitelist["cite"] = []string{}
@ -492,6 +498,10 @@ func isValidDataAttribute(value string) bool {
return false return false
} }
func isAnchor(tagName string, attribute html.Attribute) bool {
return tagName == "a" && attribute.Key == "href" && strings.HasPrefix(attribute.Val, "#")
}
func isPositiveInteger(value string) bool { func isPositiveInteger(value string) bool {
if number, err := strconv.Atoi(value); err == nil { if number, err := strconv.Atoi(value); err == nil {
return number > 0 return number > 0

View File

@ -203,6 +203,16 @@ func TestIFrameWithChildElements(t *testing.T) {
} }
} }
func TestAnchorLink(t *testing.T) {
input := `<p>This link is <a href="#some-anchor">an anchor</a></p>`
expected := `<p>This link is <a href="#some-anchor">an anchor</a></p>`
output := Sanitize("http://example.org/", input)
if expected != output {
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
}
}
func TestInvalidURLScheme(t *testing.T) { func TestInvalidURLScheme(t *testing.T) {
input := `<p>This link is <a src="file:///etc/passwd">not valid</a></p>` input := `<p>This link is <a src="file:///etc/passwd">not valid</a></p>`
expected := `<p>This link is not valid</p>` expected := `<p>This link is not valid</p>`