Normalize URL query string before executing HTTP requests

- Make sure query strings parameters are encoded
- As opposed to the standard library, do not append equal sign
for query parameters with empty value
- Strip URL fragments like Web browsers
This commit is contained in:
Frédéric Guillot 2019-12-26 15:26:23 -08:00
parent 200b1c304b
commit 3debf75eb9
8 changed files with 128 additions and 26 deletions

View File

@ -22,6 +22,7 @@ import (
"miniflux.app/errors" "miniflux.app/errors"
"miniflux.app/logger" "miniflux.app/logger"
"miniflux.app/timer" "miniflux.app/timer"
url_helper "miniflux.app/url"
"miniflux.app/version" "miniflux.app/version"
) )
@ -37,7 +38,8 @@ var (
// Client is a HTTP Client :) // Client is a HTTP Client :)
type Client struct { type Client struct {
url string inputURL string
requestURL string
etagHeader string etagHeader string
lastModifiedHeader string lastModifiedHeader string
authorizationHeader string authorizationHeader string
@ -47,6 +49,18 @@ type Client struct {
Insecure bool Insecure bool
} }
func (c *Client) String() string {
return fmt.Sprintf(
`InputURL=%q RequestURL=%q ETag=%s LastModified=%q BasicAuth=%v UserAgent=%q`,
c.inputURL,
c.requestURL,
c.etagHeader,
c.lastModifiedHeader,
c.authorizationHeader != "" || (c.username != "" && c.password != ""),
c.userAgent,
)
}
// WithCredentials defines the username/password for HTTP Basic authentication. // WithCredentials defines the username/password for HTTP Basic authentication.
func (c *Client) WithCredentials(username, password string) *Client { func (c *Client) WithCredentials(username, password string) *Client {
if username != "" && password != "" { if username != "" && password != "" {
@ -115,7 +129,12 @@ func (c *Client) PostJSON(data interface{}) (*Response, error) {
} }
func (c *Client) executeRequest(request *http.Request) (*Response, error) { func (c *Client) executeRequest(request *http.Request) (*Response, error) {
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[HttpClient] url=%s", c.url)) defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[HttpClient] inputURL=%s", c.inputURL))
logger.Debug("[HttpClient:Before] Method=%s %s",
request.Method,
c.String(),
)
client := c.buildClient() client := c.buildClient()
resp, err := client.Do(request) resp, err := client.Do(request)
@ -162,21 +181,15 @@ func (c *Client) executeRequest(request *http.Request) (*Response, error) {
EffectiveURL: resp.Request.URL.String(), EffectiveURL: resp.Request.URL.String(),
LastModified: resp.Header.Get("Last-Modified"), LastModified: resp.Header.Get("Last-Modified"),
ETag: resp.Header.Get("ETag"), ETag: resp.Header.Get("ETag"),
Expires: resp.Header.Get("Expires"),
ContentType: resp.Header.Get("Content-Type"), ContentType: resp.Header.Get("Content-Type"),
ContentLength: resp.ContentLength, ContentLength: resp.ContentLength,
} }
logger.Debug("[HttpClient:%s] URL=%s, EffectiveURL=%s, Code=%d, Length=%d, Type=%s, ETag=%s, LastMod=%s, Expires=%s, Auth=%v", logger.Debug("[HttpClient:After] Method=%s %s; Response => %s",
request.Method, request.Method,
c.url, c.String(),
response.EffectiveURL, response,
response.StatusCode,
resp.ContentLength,
response.ContentType,
response.ETag,
response.LastModified,
resp.Header.Get("Expires"),
c.username != "",
) )
// Ignore caching headers for feeds that do not want any cache. // Ignore caching headers for feeds that do not want any cache.
@ -190,7 +203,8 @@ func (c *Client) executeRequest(request *http.Request) (*Response, error) {
} }
func (c *Client) buildRequest(method string, body io.Reader) (*http.Request, error) { func (c *Client) buildRequest(method string, body io.Reader) (*http.Request, error) {
request, err := http.NewRequest(method, c.url, body) c.requestURL = url_helper.RequestURI(c.inputURL)
request, err := http.NewRequest(method, c.requestURL, body)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -238,5 +252,5 @@ func (c *Client) buildHeaders() http.Header {
// New returns a new HTTP client. // New returns a new HTTP client.
func New(url string) *Client { func New(url string) *Client {
return &Client{url: url, userAgent: DefaultUserAgent, Insecure: false} return &Client{inputURL: url, userAgent: DefaultUserAgent, Insecure: false}
} }

View File

@ -6,6 +6,7 @@ package client // import "miniflux.app/http/client"
import ( import (
"bytes" "bytes"
"fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"regexp" "regexp"
@ -24,10 +25,24 @@ type Response struct {
EffectiveURL string EffectiveURL string
LastModified string LastModified string
ETag string ETag string
Expires string
ContentType string ContentType string
ContentLength int64 ContentLength int64
} }
func (r *Response) String() string {
return fmt.Sprintf(
`StatusCode=%d EffectiveURL=%q LastModified=%q ETag=%s Expires=%s ContentType=%q ContentLength=%d`,
r.StatusCode,
r.EffectiveURL,
r.LastModified,
r.ETag,
r.Expires,
r.ContentType,
r.ContentLength,
)
}
// IsNotFound returns true if the resource doesn't exists anymore. // IsNotFound returns true if the resource doesn't exists anymore.
func (r *Response) IsNotFound() bool { func (r *Response) IsNotFound() bool {
return r.StatusCode == 404 || r.StatusCode == 410 return r.StatusCode == 404 || r.StatusCode == 410
@ -105,8 +120,8 @@ func (r *Response) EnsureUnicodeBody() (err error) {
return err return err
} }
// String returns the response body as string. // BodyAsString returns the response body as string.
func (r *Response) String() string { func (r *Response) BodyAsString() string {
bytes, _ := ioutil.ReadAll(r.Body) bytes, _ := ioutil.ReadAll(r.Body)
return string(bytes) return string(bytes)
} }

View File

@ -95,7 +95,7 @@ func TestToString(t *testing.T) {
input := `test` input := `test`
r := &Response{Body: strings.NewReader(input)} r := &Response{Body: strings.NewReader(input)}
if r.String() != input { if r.BodyAsString() != input {
t.Error(`Unexpected ouput`) t.Error(`Unexpected ouput`)
} }
} }
@ -140,7 +140,7 @@ func TestEnsureUnicodeWithHTMLDocuments(t *testing.T) {
t.Fatalf(`Unicode conversion error for %q - %q: %v`, tc.filename, tc.contentType, parseErr) t.Fatalf(`Unicode conversion error for %q - %q: %v`, tc.filename, tc.contentType, parseErr)
} }
isUnicode := utf8.ValidString(r.String()) isUnicode := utf8.ValidString(r.BodyAsString())
if isUnicode != tc.convertedToUnicode { if isUnicode != tc.convertedToUnicode {
t.Errorf(`Unicode conversion %q - %q, got: %v, expected: %v`, t.Errorf(`Unicode conversion %q - %q, got: %v, expected: %v`,
tc.filename, tc.contentType, isUnicode, tc.convertedToUnicode) tc.filename, tc.contentType, isUnicode, tc.convertedToUnicode)

View File

@ -52,7 +52,7 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool,
return nil, errors.NewLocalizedError(errDuplicate, response.EffectiveURL) return nil, errors.NewLocalizedError(errDuplicate, response.EffectiveURL)
} }
subscription, parseErr := parser.ParseFeed(response.String()) subscription, parseErr := parser.ParseFeed(response.BodyAsString())
if parseErr != nil { if parseErr != nil {
return nil, parseErr return nil, parseErr
} }
@ -106,7 +106,7 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error {
if response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) { if response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) {
logger.Debug("[Handler:RefreshFeed] Feed #%d has been modified", feedID) logger.Debug("[Handler:RefreshFeed] Feed #%d has been modified", feedID)
updatedFeed, parseErr := parser.ParseFeed(response.String()) updatedFeed, parseErr := parser.ParseFeed(response.BodyAsString())
if parseErr != nil { if parseErr != nil {
originalFeed.WithError(parseErr.Localize(printer)) originalFeed.WithError(parseErr.Localize(printer))
h.store.UpdateFeedError(originalFeed) h.store.UpdateFeedError(originalFeed)

View File

@ -191,7 +191,7 @@ func TestDifferentEncodingWithResponse(t *testing.T) {
t.Fatalf(`Encoding error for %q: %v`, tc.filename, encodingErr) t.Fatalf(`Encoding error for %q: %v`, tc.filename, encodingErr)
} }
feed, parseErr := ParseFeed(r.String()) feed, parseErr := ParseFeed(r.BodyAsString())
if parseErr != nil { if parseErr != nil {
t.Fatalf(`Parsing error for %q - %q: %v`, tc.filename, tc.contentType, parseErr) t.Fatalf(`Parsing error for %q - %q: %v`, tc.filename, tc.contentType, parseErr)
} }

View File

@ -18,7 +18,7 @@ import (
) )
var ( var (
errUnreadableDoc = "Unable to analyze this page: %v" errUnreadableDoc = "Unable to analyze this page: %v"
) )
// FindSubscriptions downloads and try to find one or more subscriptions from an URL. // FindSubscriptions downloads and try to find one or more subscriptions from an URL.
@ -31,7 +31,7 @@ func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscr
return nil, err return nil, err
} }
body := response.String() body := response.BodyAsString()
if format := parser.DetectFeedFormat(body); format != parser.FormatUnknown { if format := parser.DetectFeedFormat(body); format != parser.FormatUnknown {
var subscriptions Subscriptions var subscriptions Subscriptions
subscriptions = append(subscriptions, &Subscription{ subscriptions = append(subscriptions, &Subscription{

View File

@ -4,9 +4,12 @@
package url // import "miniflux.app/url" package url // import "miniflux.app/url"
import "net/url" import (
import "fmt" "fmt"
import "strings" "net/url"
"sort"
"strings"
)
// AbsoluteURL converts the input URL as absolute URL if necessary. // AbsoluteURL converts the input URL as absolute URL if necessary.
func AbsoluteURL(baseURL, input string) (string, error) { func AbsoluteURL(baseURL, input string) (string, error) {
@ -69,3 +72,51 @@ func Domain(websiteURL string) string {
return parsedURL.Host return parsedURL.Host
} }
// RequestURI returns the encoded URI to be used in HTTP requests.
func RequestURI(websiteURL string) string {
u, err := url.Parse(websiteURL)
if err != nil {
return websiteURL
}
queryValues := u.Query()
u.RawQuery = "" // Clear RawQuery to make sure it's encoded properly.
u.Fragment = "" // Clear fragment because Web browsers strip #fragment before sending the URL to a web server.
var buf strings.Builder
buf.WriteString(u.String())
if len(queryValues) > 0 {
buf.WriteByte('?')
// Sort keys.
keys := make([]string, 0, len(queryValues))
for k := range queryValues {
keys = append(keys, k)
}
sort.Strings(keys)
i := 0
for _, key := range keys {
keyEscaped := url.QueryEscape(key)
values := queryValues[key]
for _, value := range values {
if i > 0 {
buf.WriteByte('&')
}
buf.WriteString(keyEscaped)
// As opposed to the standard library, we append the = only if the value is not empty.
if value != "" {
buf.WriteByte('=')
buf.WriteString(url.QueryEscape(value))
}
i++
}
}
}
return buf.String()
}

View File

@ -71,3 +71,25 @@ func TestDomain(t *testing.T) {
} }
} }
} }
func TestRequestURI(t *testing.T) {
scenarios := map[string]string{
"https://www.example.org": "https://www.example.org",
"https://user:password@www.example.org": "https://user:password@www.example.org",
"https://www.example.org/path with spaces": "https://www.example.org/path%20with%20spaces",
"https://www.example.org/path#test": "https://www.example.org/path",
"https://www.example.org/path?abc#test": "https://www.example.org/path?abc",
"https://www.example.org/path?a=b&a=c": "https://www.example.org/path?a=b&a=c",
"https://www.example.org/path?a=b&a=c&d": "https://www.example.org/path?a=b&a=c&d",
"https://www.example.org/path?atom": "https://www.example.org/path?atom",
"https://www.example.org/path?测试=测试": "https://www.example.org/path?%E6%B5%8B%E8%AF%95=%E6%B5%8B%E8%AF%95",
"https://www.example.org/url=http%3A%2F%2Fwww.example.com%2Ffeed%2F&max=20": "https://www.example.org/url=http%3A%2F%2Fwww.example.com%2Ffeed%2F&max=20",
}
for input, expected := range scenarios {
actual := RequestURI(input)
if actual != expected {
t.Errorf(`Unexpected result, got %q instead of %q`, actual, expected)
}
}
}