mirror of
https://github.com/miniflux/v2.git
synced 2024-09-27 21:02:41 +02:00
sanitizer: handle image URLs in srcset attribute with comma
This commit is contained in:
parent
d85908e3de
commit
806a069785
@ -5,18 +5,16 @@
|
|||||||
package proxy // import "miniflux.app/proxy"
|
package proxy // import "miniflux.app/proxy"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"regexp"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"miniflux.app/config"
|
"miniflux.app/config"
|
||||||
|
"miniflux.app/reader/sanitizer"
|
||||||
"miniflux.app/url"
|
"miniflux.app/url"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
)
|
)
|
||||||
|
|
||||||
var regexSplitSrcset = regexp.MustCompile(`,\s+`)
|
|
||||||
|
|
||||||
// ImageProxyRewriter replaces image URLs with internal proxy URLs.
|
// ImageProxyRewriter replaces image URLs with internal proxy URLs.
|
||||||
func ImageProxyRewriter(router *mux.Router, data string) string {
|
func ImageProxyRewriter(router *mux.Router, data string) string {
|
||||||
proxyImages := config.Opts.ProxyImages()
|
proxyImages := config.Opts.ProxyImages()
|
||||||
@ -30,24 +28,20 @@ func ImageProxyRewriter(router *mux.Router, data string) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
doc.Find("img").Each(func(i int, img *goquery.Selection) {
|
doc.Find("img").Each(func(i int, img *goquery.Selection) {
|
||||||
if srcAttr, ok := img.Attr("src"); ok {
|
if srcAttrValue, ok := img.Attr("src"); ok {
|
||||||
if !isDataURL(srcAttr) && (proxyImages == "all" || !url.IsHTTPS(srcAttr)) {
|
if !isDataURL(srcAttrValue) && (proxyImages == "all" || !url.IsHTTPS(srcAttrValue)) {
|
||||||
img.SetAttr("src", ProxifyURL(router, srcAttr))
|
img.SetAttr("src", ProxifyURL(router, srcAttrValue))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if srcsetAttr, ok := img.Attr("srcset"); ok {
|
if srcsetAttrValue, ok := img.Attr("srcset"); ok {
|
||||||
if proxyImages == "all" || !url.IsHTTPS(srcsetAttr) {
|
proxifySourceSet(img, router, proxyImages, srcsetAttrValue)
|
||||||
proxifySourceSet(img, router, srcsetAttr)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
doc.Find("picture source").Each(func(i int, sourceElement *goquery.Selection) {
|
doc.Find("picture source").Each(func(i int, sourceElement *goquery.Selection) {
|
||||||
if srcsetAttr, ok := sourceElement.Attr("srcset"); ok {
|
if srcsetAttrValue, ok := sourceElement.Attr("srcset"); ok {
|
||||||
if proxyImages == "all" || !url.IsHTTPS(srcsetAttr) {
|
proxifySourceSet(sourceElement, router, proxyImages, srcsetAttrValue)
|
||||||
proxifySourceSet(sourceElement, router, srcsetAttr)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -59,30 +53,16 @@ func ImageProxyRewriter(router *mux.Router, data string) string {
|
|||||||
return output
|
return output
|
||||||
}
|
}
|
||||||
|
|
||||||
func proxifySourceSet(element *goquery.Selection, router *mux.Router, attributeValue string) {
|
func proxifySourceSet(element *goquery.Selection, router *mux.Router, proxyImages, srcsetAttrValue string) {
|
||||||
var proxifiedSources []string
|
imageCandidates := sanitizer.ParseSrcSetAttribute(srcsetAttrValue)
|
||||||
|
|
||||||
for _, source := range regexSplitSrcset.Split(attributeValue, -1) {
|
for _, imageCandidate := range imageCandidates {
|
||||||
parts := strings.Split(strings.TrimSpace(source), " ")
|
if !isDataURL(imageCandidate.ImageURL) && (proxyImages == "all" || !url.IsHTTPS(imageCandidate.ImageURL)) {
|
||||||
nbParts := len(parts)
|
imageCandidate.ImageURL = ProxifyURL(router, imageCandidate.ImageURL)
|
||||||
|
|
||||||
if nbParts > 0 {
|
|
||||||
rewrittenSource := parts[0]
|
|
||||||
if !isDataURL(rewrittenSource) {
|
|
||||||
rewrittenSource = ProxifyURL(router, rewrittenSource)
|
|
||||||
}
|
|
||||||
|
|
||||||
if nbParts > 1 {
|
|
||||||
rewrittenSource += " " + parts[1]
|
|
||||||
}
|
|
||||||
|
|
||||||
proxifiedSources = append(proxifiedSources, rewrittenSource)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(proxifiedSources) > 0 {
|
element.SetAttr("srcset", imageCandidates.String())
|
||||||
element.SetAttr("srcset", strings.Join(proxifiedSources, ", "))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func isDataURL(s string) bool {
|
func isDataURL(s string) bool {
|
||||||
|
@ -234,8 +234,31 @@ func TestProxyFilterWithPictureSource(t *testing.T) {
|
|||||||
r := mux.NewRouter()
|
r := mux.NewRouter()
|
||||||
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
|
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
|
||||||
|
|
||||||
input := `<picture><source srcset="http://website/folder/image2.png 656w, http://website/folder/image3.png 360w"></picture>`
|
input := `<picture><source srcset="http://website/folder/image2.png 656w, http://website/folder/image3.png 360w, https://website/some,image.png 2x"></picture>`
|
||||||
expected := `<picture><source srcset="/proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlMi5wbmc= 656w, /proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlMy5wbmc= 360w"/></picture>`
|
expected := `<picture><source srcset="/proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlMi5wbmc= 656w, /proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlMy5wbmc= 360w, /proxy/aHR0cHM6Ly93ZWJzaXRlL3NvbWUsaW1hZ2UucG5n 2x"/></picture>`
|
||||||
|
output := ImageProxyRewriter(r, input)
|
||||||
|
|
||||||
|
if expected != output {
|
||||||
|
t.Errorf(`Not expected output: got %s`, output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestProxyFilterOnlyNonHTTPWithPictureSource(t *testing.T) {
|
||||||
|
os.Clearenv()
|
||||||
|
os.Setenv("PROXY_IMAGES", "https")
|
||||||
|
|
||||||
|
var err error
|
||||||
|
parser := config.NewParser()
|
||||||
|
config.Opts, err = parser.ParseEnvironmentVariables()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf(`Parsing failure: %v`, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
r := mux.NewRouter()
|
||||||
|
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
|
||||||
|
|
||||||
|
input := `<picture><source srcset="http://website/folder/image2.png 656w, https://website/some,image.png 2x"></picture>`
|
||||||
|
expected := `<picture><source srcset="/proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlMi5wbmc= 656w, https://website/some,image.png 2x"/></picture>`
|
||||||
output := ImageProxyRewriter(r, input)
|
output := ImageProxyRewriter(r, input)
|
||||||
|
|
||||||
if expected != output {
|
if expected != output {
|
||||||
|
@ -20,7 +20,6 @@ import (
|
|||||||
|
|
||||||
var (
|
var (
|
||||||
youtubeEmbedRegex = regexp.MustCompile(`//www\.youtube\.com/embed/(.*)`)
|
youtubeEmbedRegex = regexp.MustCompile(`//www\.youtube\.com/embed/(.*)`)
|
||||||
splitSrcsetRegex = regexp.MustCompile(`,\s?`)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Sanitize returns safe HTML.
|
// Sanitize returns safe HTML.
|
||||||
@ -447,52 +446,17 @@ func isBlockedTag(tagName string) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
|
|
||||||
One or more strings separated by commas, indicating possible image sources for the user agent to use.
|
|
||||||
|
|
||||||
Each string is composed of:
|
|
||||||
- A URL to an image
|
|
||||||
- Optionally, whitespace followed by one of:
|
|
||||||
- A width descriptor (a positive integer directly followed by w). The width descriptor is divided by the source size given in the sizes attribute to calculate the effective pixel density.
|
|
||||||
- A pixel density descriptor (a positive floating point number directly followed by x).
|
|
||||||
|
|
||||||
*/
|
|
||||||
func sanitizeSrcsetAttr(baseURL, value string) string {
|
func sanitizeSrcsetAttr(baseURL, value string) string {
|
||||||
var sanitizedSources []string
|
imageCandidates := ParseSrcSetAttribute(value)
|
||||||
rawSources := splitSrcsetRegex.Split(value, -1)
|
|
||||||
for _, rawSource := range rawSources {
|
|
||||||
parts := strings.Split(strings.TrimSpace(rawSource), " ")
|
|
||||||
nbParts := len(parts)
|
|
||||||
|
|
||||||
if nbParts > 0 {
|
for _, imageCandidate := range imageCandidates {
|
||||||
sanitizedSource, err := url.AbsoluteURL(baseURL, parts[0])
|
absoluteURL, err := url.AbsoluteURL(baseURL, imageCandidate.ImageURL)
|
||||||
if err != nil {
|
if err == nil {
|
||||||
continue
|
imageCandidate.ImageURL = absoluteURL
|
||||||
}
|
|
||||||
|
|
||||||
if nbParts == 2 && isValidWidthOrDensityDescriptor(parts[1]) {
|
|
||||||
sanitizedSource += " " + parts[1]
|
|
||||||
}
|
|
||||||
|
|
||||||
sanitizedSources = append(sanitizedSources, sanitizedSource)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return strings.Join(sanitizedSources, ", ")
|
|
||||||
}
|
|
||||||
|
|
||||||
func isValidWidthOrDensityDescriptor(value string) bool {
|
return imageCandidates.String()
|
||||||
if value == "" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
lastChar := value[len(value)-1:]
|
|
||||||
if lastChar != "w" && lastChar != "x" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err := strconv.ParseFloat(value[0:len(value)-1], 32)
|
|
||||||
return err == nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func isValidDataAttribute(value string) bool {
|
func isValidDataAttribute(value string) bool {
|
||||||
|
@ -85,16 +85,6 @@ func TestMediumImgWithSrcset(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestEconomistImgWithSrcset(t *testing.T) {
|
|
||||||
input := `<img loading="lazy" src="https://www.economist.com/img/b/608/634/90/sites/default/files/images/print-edition/20211009_WWC585.png" srcSet="https://www.economist.com/img/b/200/209/90/sites/default/files/images/print-edition/20211009_WWC585.png 200w,https://www.economist.com/img/b/300/313/90/sites/default/files/images/print-edition/20211009_WWC585.png 300w,https://www.economist.com/img/b/400/417/90/sites/default/files/images/print-edition/20211009_WWC585.png 400w,https://www.economist.com/img/b/600/626/90/sites/default/files/images/print-edition/20211009_WWC585.png 600w,https://www.economist.com/img/b/640/667/90/sites/default/files/images/print-edition/20211009_WWC585.png 640w,https://www.economist.com/img/b/800/834/90/sites/default/files/images/print-edition/20211009_WWC585.png 800w,https://www.economist.com/img/b/1000/1043/90/sites/default/files/images/print-edition/20211009_WWC585.png 1000w,https://www.economist.com/img/b/1280/1335/90/sites/default/files/images/print-edition/20211009_WWC585.png 1280w" sizes="300px" alt=""/>`
|
|
||||||
expected := `<img src="https://www.economist.com/img/b/608/634/90/sites/default/files/images/print-edition/20211009_WWC585.png" srcset="https://www.economist.com/img/b/200/209/90/sites/default/files/images/print-edition/20211009_WWC585.png 200w, https://www.economist.com/img/b/300/313/90/sites/default/files/images/print-edition/20211009_WWC585.png 300w, https://www.economist.com/img/b/400/417/90/sites/default/files/images/print-edition/20211009_WWC585.png 400w, https://www.economist.com/img/b/600/626/90/sites/default/files/images/print-edition/20211009_WWC585.png 600w, https://www.economist.com/img/b/640/667/90/sites/default/files/images/print-edition/20211009_WWC585.png 640w, https://www.economist.com/img/b/800/834/90/sites/default/files/images/print-edition/20211009_WWC585.png 800w, https://www.economist.com/img/b/1000/1043/90/sites/default/files/images/print-edition/20211009_WWC585.png 1000w, https://www.economist.com/img/b/1280/1335/90/sites/default/files/images/print-edition/20211009_WWC585.png 1280w" sizes="300px" alt="" loading="lazy"/>`
|
|
||||||
output := Sanitize("http://example.org/", input)
|
|
||||||
|
|
||||||
if output != expected {
|
|
||||||
t.Errorf(`Wrong output: %s`, output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestSelfClosingTags(t *testing.T) {
|
func TestSelfClosingTags(t *testing.T) {
|
||||||
input := `<p>This <br> is a <strong>text</strong> <br/>with an image: <img src="http://example.org/" alt="Test" loading="lazy"/>.</p>`
|
input := `<p>This <br> is a <strong>text</strong> <br/>with an image: <img src="http://example.org/" alt="Test" loading="lazy"/>.</p>`
|
||||||
output := Sanitize("http://example.org/", input)
|
output := Sanitize("http://example.org/", input)
|
||||||
|
82
reader/sanitizer/srcset.go
Normal file
82
reader/sanitizer/srcset.go
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
// Copyright 2022 Frédéric Guillot. All rights reserved.
|
||||||
|
// Use of this source code is governed by the Apache 2.0
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package sanitizer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ImageCandidate struct {
|
||||||
|
ImageURL string
|
||||||
|
Descriptor string
|
||||||
|
}
|
||||||
|
|
||||||
|
type ImageCandidates []*ImageCandidate
|
||||||
|
|
||||||
|
func (c ImageCandidates) String() string {
|
||||||
|
var htmlCandidates []string
|
||||||
|
|
||||||
|
for _, imageCandidate := range c {
|
||||||
|
var htmlCandidate string
|
||||||
|
if imageCandidate.Descriptor != "" {
|
||||||
|
htmlCandidate = fmt.Sprintf(`%s %s`, imageCandidate.ImageURL, imageCandidate.Descriptor)
|
||||||
|
} else {
|
||||||
|
htmlCandidate = imageCandidate.ImageURL
|
||||||
|
}
|
||||||
|
|
||||||
|
htmlCandidates = append(htmlCandidates, htmlCandidate)
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.Join(htmlCandidates, ", ")
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseSrcSetAttribute returns the list of image candidates from the set.
|
||||||
|
// https://html.spec.whatwg.org/#parse-a-srcset-attribute
|
||||||
|
func ParseSrcSetAttribute(attributeValue string) (imageCandidates ImageCandidates) {
|
||||||
|
unparsedCandidates := strings.Split(attributeValue, ", ")
|
||||||
|
|
||||||
|
for _, unparsedCandidate := range unparsedCandidates {
|
||||||
|
if candidate, err := parseImageCandidate(unparsedCandidate); err == nil {
|
||||||
|
imageCandidates = append(imageCandidates, candidate)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return imageCandidates
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseImageCandidate(input string) (*ImageCandidate, error) {
|
||||||
|
input = strings.TrimSpace(input)
|
||||||
|
parts := strings.Split(strings.TrimSpace(input), " ")
|
||||||
|
nbParts := len(parts)
|
||||||
|
|
||||||
|
if nbParts > 2 || nbParts == 0 {
|
||||||
|
return nil, fmt.Errorf(`srcset: invalid number of descriptors`)
|
||||||
|
}
|
||||||
|
|
||||||
|
if nbParts == 2 {
|
||||||
|
if !isValidWidthOrDensityDescriptor(parts[1]) {
|
||||||
|
return nil, fmt.Errorf(`srcset: invalid descriptor`)
|
||||||
|
}
|
||||||
|
return &ImageCandidate{ImageURL: parts[0], Descriptor: parts[1]}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return &ImageCandidate{ImageURL: parts[0]}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func isValidWidthOrDensityDescriptor(value string) bool {
|
||||||
|
if value == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
lastChar := value[len(value)-1:]
|
||||||
|
if lastChar != "w" && lastChar != "x" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err := strconv.ParseFloat(value[0:len(value)-1], 32)
|
||||||
|
return err == nil
|
||||||
|
}
|
85
reader/sanitizer/srcset_test.go
Normal file
85
reader/sanitizer/srcset_test.go
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
// Copyright 2022 Frédéric Guillot. All rights reserved.
|
||||||
|
// Use of this source code is governed by the Apache 2.0
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package sanitizer
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestParseSrcSetAttributeWithRelativeURLs(t *testing.T) {
|
||||||
|
input := `example-320w.jpg, example-480w.jpg 1.5x, example-640,w.jpg 2x, example-640w.jpg 640w`
|
||||||
|
candidates := ParseSrcSetAttribute(input)
|
||||||
|
|
||||||
|
if len(candidates) != 4 {
|
||||||
|
t.Error(`Incorrect number of candidates`)
|
||||||
|
}
|
||||||
|
|
||||||
|
if candidates.String() != `example-320w.jpg, example-480w.jpg 1.5x, example-640,w.jpg 2x, example-640w.jpg 640w` {
|
||||||
|
t.Errorf(`Unexpected string output`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseSrcSetAttributeWithAbsoluteURLs(t *testing.T) {
|
||||||
|
input := `http://example.org/example-320w.jpg 320w, http://example.org/example-480w.jpg 1.5x`
|
||||||
|
candidates := ParseSrcSetAttribute(input)
|
||||||
|
|
||||||
|
if len(candidates) != 2 {
|
||||||
|
t.Error(`Incorrect number of candidates`)
|
||||||
|
}
|
||||||
|
|
||||||
|
if candidates.String() != `http://example.org/example-320w.jpg 320w, http://example.org/example-480w.jpg 1.5x` {
|
||||||
|
t.Errorf(`Unexpected string output`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseSrcSetAttributeWithOneCandidate(t *testing.T) {
|
||||||
|
input := `http://example.org/example-320w.jpg`
|
||||||
|
candidates := ParseSrcSetAttribute(input)
|
||||||
|
|
||||||
|
if len(candidates) != 1 {
|
||||||
|
t.Error(`Incorrect number of candidates`)
|
||||||
|
}
|
||||||
|
|
||||||
|
if candidates.String() != `http://example.org/example-320w.jpg` {
|
||||||
|
t.Errorf(`Unexpected string output`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseSrcSetAttributeWithCommaURL(t *testing.T) {
|
||||||
|
input := `http://example.org/example,a:b/d.jpg , example-480w.jpg 1.5x`
|
||||||
|
candidates := ParseSrcSetAttribute(input)
|
||||||
|
|
||||||
|
if len(candidates) != 2 {
|
||||||
|
t.Error(`Incorrect number of candidates`)
|
||||||
|
}
|
||||||
|
|
||||||
|
if candidates.String() != `http://example.org/example,a:b/d.jpg, example-480w.jpg 1.5x` {
|
||||||
|
t.Errorf(`Unexpected string output`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseSrcSetAttributeWithIncorrectDescriptor(t *testing.T) {
|
||||||
|
input := `http://example.org/example-320w.jpg test`
|
||||||
|
candidates := ParseSrcSetAttribute(input)
|
||||||
|
|
||||||
|
if len(candidates) != 0 {
|
||||||
|
t.Error(`Incorrect number of candidates`)
|
||||||
|
}
|
||||||
|
|
||||||
|
if candidates.String() != `` {
|
||||||
|
t.Errorf(`Unexpected string output`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseSrcSetAttributeWithTooManyDescriptors(t *testing.T) {
|
||||||
|
input := `http://example.org/example-320w.jpg 10w 1x`
|
||||||
|
candidates := ParseSrcSetAttribute(input)
|
||||||
|
|
||||||
|
if len(candidates) != 0 {
|
||||||
|
t.Error(`Incorrect number of candidates`)
|
||||||
|
}
|
||||||
|
|
||||||
|
if candidates.String() != `` {
|
||||||
|
t.Errorf(`Unexpected string output`)
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user