From 2f3708d40c5007062470b0c89b293353ee4f6aa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Fri, 30 Oct 2020 22:46:43 -0700 Subject: [PATCH] Do not use charset.NewReader if the body is a valid UTF-8 document --- go.mod | 4 ++-- go.sum | 9 +++++++-- http/client/response.go | 42 ++++++++++++++++++----------------------- 3 files changed, 27 insertions(+), 28 deletions(-) diff --git a/go.mod b/go.mod index dcd9c6df..31c7e49b 100644 --- a/go.mod +++ b/go.mod @@ -11,9 +11,9 @@ require ( github.com/prometheus/client_golang v1.8.0 github.com/rylans/getlang v0.0.0-20200505200108-4c3188ff8a2d github.com/stretchr/testify v1.6.1 // indirect - github.com/tdewolff/minify/v2 v2.9.9 // indirect + github.com/tdewolff/minify/v2 v2.9.10 // indirect golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 - golang.org/x/net v0.0.0-20200625001655-4c5254603344 + golang.org/x/net v0.0.0-20201029221708-28c70e62bb1d golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d google.golang.org/appengine v1.6.6 // indirect google.golang.org/protobuf v1.25.0 // indirect diff --git a/go.sum b/go.sum index f408587f..ad730bd9 100644 --- a/go.sum +++ b/go.sum @@ -289,8 +289,8 @@ github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/tdewolff/minify v1.1.0 h1:nxHQi1ML+g3ZbZHffiZ6eC7vMqNvSRfX3KB5Y5y/kfw= github.com/tdewolff/minify v2.3.6+incompatible h1:2hw5/9ZvxhWLvBUnHE06gElGYz+Jv9R4Eys0XUzItYo= -github.com/tdewolff/minify/v2 v2.9.9 h1:5POLhoyTEWNNHADzlwH83AhvpKVAdpS7fOfaWIOWOTw= -github.com/tdewolff/minify/v2 v2.9.9/go.mod h1:U1Nc+/YBSB0FPEarqcgkYH3Ep4DNyyIbOyl5P4eWMuo= +github.com/tdewolff/minify/v2 v2.9.10 h1:p+ifTTl+JMFFLDYNAm7nxQ9XuCG10HTW00wlPAZ7aoE= +github.com/tdewolff/minify/v2 v2.9.10/go.mod h1:U1Nc+/YBSB0FPEarqcgkYH3Ep4DNyyIbOyl5P4eWMuo= github.com/tdewolff/parse/v2 v2.5.5 h1:b7ICJa4I/54JQGEGgTte8DiyJPKcC5g8V773QMzkeUM= github.com/tdewolff/parse/v2 v2.5.5/go.mod h1:WzaJpRSbwq++EIQHYIRTpbYKNA3gn9it1Ik++q4zyho= github.com/tdewolff/test v1.0.6/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE= @@ -346,6 +346,8 @@ golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200625001655-4c5254603344 h1:vGXIOMxbNfDTk/aXCmfdLgkrSV+Z2tcbze+pEc3v5W4= golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20201029221708-28c70e62bb1d h1:dOiJ2n2cMwGLce/74I/QHMbnpk5GfY7InR8rczoMqRM= +golang.org/x/net v0.0.0-20201029221708-28c70e62bb1d/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d h1:TzXSXBo42m9gQenoE3b9BGiEpg5IG2JkU5FkPIawgtw= @@ -377,11 +379,14 @@ golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200724161237-0e2f3a69832c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201015000850-e3ed0017c211 h1:9UQO31fZ+0aKQOFldThf7BKPMJTiBfWycGh/u3UoO88= golang.org/x/sys v0.0.0-20201015000850-e3ed0017c211/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/http/client/response.go b/http/client/response.go index 122c40c3..c9c124a5 100644 --- a/http/client/response.go +++ b/http/client/response.go @@ -87,32 +87,26 @@ func (r *Response) IsModified(etag, lastModified string) bool { // - Feeds with encoding specified only in XML document and not in HTTP header // - Feeds with wrong encoding defined and already in UTF-8 func (r *Response) EnsureUnicodeBody() (err error) { - if r.ContentType != "" { - // JSON feeds are always in UTF-8. - if strings.Contains(r.ContentType, "json") { - return + buffer, err := ioutil.ReadAll(r.Body) + if err != nil { + return err + } + + r.Body = bytes.NewReader(buffer) + if utf8.Valid(buffer) { + return nil + } + + if strings.Contains(r.ContentType, "xml") { + // We ignore documents with encoding specified in XML prolog. + // This is going to be handled by the XML parser. + length := 1024 + if len(buffer) < 1024 { + length = len(buffer) } - if strings.Contains(r.ContentType, "xml") { - buffer, _ := ioutil.ReadAll(r.Body) - r.Body = bytes.NewReader(buffer) - - // We ignore documents with encoding specified in XML prolog. - // This is going to be handled by the XML parser. - length := 1024 - if len(buffer) < 1024 { - length = len(buffer) - } - - if xmlEncodingRegex.Match(buffer[0:length]) { - return - } - - // If no encoding is specified in the XML prolog and - // the document is valid UTF-8, nothing needs to be done. - if utf8.Valid(buffer) { - return - } + if xmlEncodingRegex.Match(buffer[0:length]) { + return nil } }