From 138fd926ee0030457d0628995c952c83dfaf436f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Sun, 11 Sep 2022 22:32:16 -0700 Subject: [PATCH] Do not convert anchors to absolute links --- reader/sanitizer/sanitizer.go | 44 ++++++++++++++++++------------ reader/sanitizer/sanitizer_test.go | 10 +++++++ 2 files changed, 37 insertions(+), 17 deletions(-) diff --git a/reader/sanitizer/sanitizer.go b/reader/sanitizer/sanitizer.go index ad7afbca..b9b4510c 100644 --- a/reader/sanitizer/sanitizer.go +++ b/reader/sanitizer/sanitizer.go @@ -101,6 +101,7 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([ var htmlAttrs, attrNames []string var err error var isImageLargerThanLayout bool + var isAnchorLink bool if tagName == "img" { imgWidth := getIntegerAttributeValue("width", attributes) @@ -137,6 +138,9 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([ } } else if tagName == "img" && attribute.Key == "src" && isValidDataAttribute(attribute.Val) { value = attribute.Val + } else if isAnchor("a", attribute) { + value = attribute.Val + isAnchorLink = true } else { value, err = url.AbsoluteURL(baseURL, value) if err != nil { @@ -153,10 +157,12 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([ htmlAttrs = append(htmlAttrs, fmt.Sprintf(`%s="%s"`, attribute.Key, html.EscapeString(value))) } - extraAttrNames, extraHTMLAttributes := getExtraAttributes(tagName) - if len(extraAttrNames) > 0 { - attrNames = append(attrNames, extraAttrNames...) - htmlAttrs = append(htmlAttrs, extraHTMLAttributes...) + if !isAnchorLink { + extraAttrNames, extraHTMLAttributes := getExtraAttributes(tagName) + if len(extraAttrNames) > 0 { + attrNames = append(attrNames, extraAttrNames...) + htmlAttrs = append(htmlAttrs, extraHTMLAttributes...) + } } return attrNames, strings.Join(htmlAttrs, " ") @@ -370,9 +376,9 @@ func getTagAllowList() map[string][]string { whitelist["audio"] = []string{"src"} whitelist["video"] = []string{"poster", "height", "width", "src"} whitelist["source"] = []string{"src", "type", "srcset", "sizes", "media"} - whitelist["dt"] = []string{} - whitelist["dd"] = []string{} - whitelist["dl"] = []string{} + whitelist["dt"] = []string{"id"} + whitelist["dd"] = []string{"id"} + whitelist["dl"] = []string{"id"} whitelist["table"] = []string{} whitelist["caption"] = []string{} whitelist["thead"] = []string{} @@ -380,12 +386,12 @@ func getTagAllowList() map[string][]string { whitelist["tr"] = []string{} whitelist["td"] = []string{"rowspan", "colspan"} whitelist["th"] = []string{"rowspan", "colspan"} - whitelist["h1"] = []string{} - whitelist["h2"] = []string{} - whitelist["h3"] = []string{} - whitelist["h4"] = []string{} - whitelist["h5"] = []string{} - whitelist["h6"] = []string{} + whitelist["h1"] = []string{"id"} + whitelist["h2"] = []string{"id"} + whitelist["h3"] = []string{"id"} + whitelist["h4"] = []string{"id"} + whitelist["h5"] = []string{"id"} + whitelist["h6"] = []string{"id"} whitelist["strong"] = []string{} whitelist["em"] = []string{} whitelist["code"] = []string{} @@ -393,12 +399,12 @@ func getTagAllowList() map[string][]string { whitelist["blockquote"] = []string{} whitelist["q"] = []string{"cite"} whitelist["p"] = []string{} - whitelist["ul"] = []string{} - whitelist["li"] = []string{} - whitelist["ol"] = []string{} + whitelist["ul"] = []string{"id"} + whitelist["li"] = []string{"id"} + whitelist["ol"] = []string{"id"} whitelist["br"] = []string{} whitelist["del"] = []string{} - whitelist["a"] = []string{"href", "title"} + whitelist["a"] = []string{"href", "title", "id"} whitelist["figure"] = []string{} whitelist["figcaption"] = []string{} whitelist["cite"] = []string{} @@ -492,6 +498,10 @@ func isValidDataAttribute(value string) bool { return false } +func isAnchor(tagName string, attribute html.Attribute) bool { + return tagName == "a" && attribute.Key == "href" && strings.HasPrefix(attribute.Val, "#") +} + func isPositiveInteger(value string) bool { if number, err := strconv.Atoi(value); err == nil { return number > 0 diff --git a/reader/sanitizer/sanitizer_test.go b/reader/sanitizer/sanitizer_test.go index 74452590..65961875 100644 --- a/reader/sanitizer/sanitizer_test.go +++ b/reader/sanitizer/sanitizer_test.go @@ -203,6 +203,16 @@ func TestIFrameWithChildElements(t *testing.T) { } } +func TestAnchorLink(t *testing.T) { + input := `

This link is an anchor

` + expected := `

This link is an anchor

` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + func TestInvalidURLScheme(t *testing.T) { input := `

This link is not valid

` expected := `

This link is not valid

`