From 0413daf76baf9de440ba1633066a76711c3a7554 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Sat, 13 Feb 2021 13:52:18 -0800 Subject: [PATCH] Remove iframe inner HTML contents An iframe element never has fallback content, as it will always create a nested browsing context, regardless of whether the specified initial contents are successfully used. https://www.w3.org/TR/2010/WD-html5-20101019/the-iframe-element.html#the-iframe-element --- reader/sanitizer/sanitizer.go | 10 +++++++++- reader/sanitizer/sanitizer_test.go | 10 ++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/reader/sanitizer/sanitizer.go b/reader/sanitizer/sanitizer.go index 2da7b483..3abafc87 100644 --- a/reader/sanitizer/sanitizer.go +++ b/reader/sanitizer/sanitizer.go @@ -24,11 +24,12 @@ var ( // Sanitize returns safe HTML. func Sanitize(baseURL, input string) string { - tokenizer := html.NewTokenizer(bytes.NewBufferString(input)) var buffer bytes.Buffer var tagStack []string + var parentTag string blacklistedTagDepth := 0 + tokenizer := html.NewTokenizer(bytes.NewBufferString(input)) for { if tokenizer.Next() == html.ErrorToken { err := tokenizer.Err() @@ -46,9 +47,16 @@ func Sanitize(baseURL, input string) string { continue } + // An iframe element never has fallback content. + // See https://www.w3.org/TR/2010/WD-html5-20101019/the-iframe-element.html#the-iframe-element + if parentTag == "iframe" { + continue + } + buffer.WriteString(html.EscapeString(token.Data)) case html.StartTagToken: tagName := token.DataAtom.String() + parentTag = tagName if !isPixelTracker(tagName, token.Attr) && isValidTag(tagName) { attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr) diff --git a/reader/sanitizer/sanitizer_test.go b/reader/sanitizer/sanitizer_test.go index 9bf4528e..72729b37 100644 --- a/reader/sanitizer/sanitizer_test.go +++ b/reader/sanitizer/sanitizer_test.go @@ -173,6 +173,16 @@ func TestInvalidIFrame(t *testing.T) { } } +func TestIFrameWithChildElements(t *testing.T) { + input := `` + expected := `` + output := Sanitize("http://example.com/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +} + func TestInvalidURLScheme(t *testing.T) { input := `

This link is not valid

` expected := `

This link is not valid

`