mirror of https://github.com/miniflux/v2.git
Normalize URL query string before executing HTTP requests
- Make sure query strings parameters are encoded - As opposed to the standard library, do not append equal sign for query parameters with empty value - Strip URL fragments like Web browsers
This commit is contained in:
parent
200b1c304b
commit
3debf75eb9
|
@ -22,6 +22,7 @@ import (
|
||||||
"miniflux.app/errors"
|
"miniflux.app/errors"
|
||||||
"miniflux.app/logger"
|
"miniflux.app/logger"
|
||||||
"miniflux.app/timer"
|
"miniflux.app/timer"
|
||||||
|
url_helper "miniflux.app/url"
|
||||||
"miniflux.app/version"
|
"miniflux.app/version"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -37,7 +38,8 @@ var (
|
||||||
|
|
||||||
// Client is a HTTP Client :)
|
// Client is a HTTP Client :)
|
||||||
type Client struct {
|
type Client struct {
|
||||||
url string
|
inputURL string
|
||||||
|
requestURL string
|
||||||
etagHeader string
|
etagHeader string
|
||||||
lastModifiedHeader string
|
lastModifiedHeader string
|
||||||
authorizationHeader string
|
authorizationHeader string
|
||||||
|
@ -47,6 +49,18 @@ type Client struct {
|
||||||
Insecure bool
|
Insecure bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *Client) String() string {
|
||||||
|
return fmt.Sprintf(
|
||||||
|
`InputURL=%q RequestURL=%q ETag=%s LastModified=%q BasicAuth=%v UserAgent=%q`,
|
||||||
|
c.inputURL,
|
||||||
|
c.requestURL,
|
||||||
|
c.etagHeader,
|
||||||
|
c.lastModifiedHeader,
|
||||||
|
c.authorizationHeader != "" || (c.username != "" && c.password != ""),
|
||||||
|
c.userAgent,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
// WithCredentials defines the username/password for HTTP Basic authentication.
|
// WithCredentials defines the username/password for HTTP Basic authentication.
|
||||||
func (c *Client) WithCredentials(username, password string) *Client {
|
func (c *Client) WithCredentials(username, password string) *Client {
|
||||||
if username != "" && password != "" {
|
if username != "" && password != "" {
|
||||||
|
@ -115,7 +129,12 @@ func (c *Client) PostJSON(data interface{}) (*Response, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Client) executeRequest(request *http.Request) (*Response, error) {
|
func (c *Client) executeRequest(request *http.Request) (*Response, error) {
|
||||||
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[HttpClient] url=%s", c.url))
|
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[HttpClient] inputURL=%s", c.inputURL))
|
||||||
|
|
||||||
|
logger.Debug("[HttpClient:Before] Method=%s %s",
|
||||||
|
request.Method,
|
||||||
|
c.String(),
|
||||||
|
)
|
||||||
|
|
||||||
client := c.buildClient()
|
client := c.buildClient()
|
||||||
resp, err := client.Do(request)
|
resp, err := client.Do(request)
|
||||||
|
@ -162,21 +181,15 @@ func (c *Client) executeRequest(request *http.Request) (*Response, error) {
|
||||||
EffectiveURL: resp.Request.URL.String(),
|
EffectiveURL: resp.Request.URL.String(),
|
||||||
LastModified: resp.Header.Get("Last-Modified"),
|
LastModified: resp.Header.Get("Last-Modified"),
|
||||||
ETag: resp.Header.Get("ETag"),
|
ETag: resp.Header.Get("ETag"),
|
||||||
|
Expires: resp.Header.Get("Expires"),
|
||||||
ContentType: resp.Header.Get("Content-Type"),
|
ContentType: resp.Header.Get("Content-Type"),
|
||||||
ContentLength: resp.ContentLength,
|
ContentLength: resp.ContentLength,
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.Debug("[HttpClient:%s] URL=%s, EffectiveURL=%s, Code=%d, Length=%d, Type=%s, ETag=%s, LastMod=%s, Expires=%s, Auth=%v",
|
logger.Debug("[HttpClient:After] Method=%s %s; Response => %s",
|
||||||
request.Method,
|
request.Method,
|
||||||
c.url,
|
c.String(),
|
||||||
response.EffectiveURL,
|
response,
|
||||||
response.StatusCode,
|
|
||||||
resp.ContentLength,
|
|
||||||
response.ContentType,
|
|
||||||
response.ETag,
|
|
||||||
response.LastModified,
|
|
||||||
resp.Header.Get("Expires"),
|
|
||||||
c.username != "",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Ignore caching headers for feeds that do not want any cache.
|
// Ignore caching headers for feeds that do not want any cache.
|
||||||
|
@ -190,7 +203,8 @@ func (c *Client) executeRequest(request *http.Request) (*Response, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Client) buildRequest(method string, body io.Reader) (*http.Request, error) {
|
func (c *Client) buildRequest(method string, body io.Reader) (*http.Request, error) {
|
||||||
request, err := http.NewRequest(method, c.url, body)
|
c.requestURL = url_helper.RequestURI(c.inputURL)
|
||||||
|
request, err := http.NewRequest(method, c.requestURL, body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -238,5 +252,5 @@ func (c *Client) buildHeaders() http.Header {
|
||||||
|
|
||||||
// New returns a new HTTP client.
|
// New returns a new HTTP client.
|
||||||
func New(url string) *Client {
|
func New(url string) *Client {
|
||||||
return &Client{url: url, userAgent: DefaultUserAgent, Insecure: false}
|
return &Client{inputURL: url, userAgent: DefaultUserAgent, Insecure: false}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@ package client // import "miniflux.app/http/client"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
@ -24,10 +25,24 @@ type Response struct {
|
||||||
EffectiveURL string
|
EffectiveURL string
|
||||||
LastModified string
|
LastModified string
|
||||||
ETag string
|
ETag string
|
||||||
|
Expires string
|
||||||
ContentType string
|
ContentType string
|
||||||
ContentLength int64
|
ContentLength int64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *Response) String() string {
|
||||||
|
return fmt.Sprintf(
|
||||||
|
`StatusCode=%d EffectiveURL=%q LastModified=%q ETag=%s Expires=%s ContentType=%q ContentLength=%d`,
|
||||||
|
r.StatusCode,
|
||||||
|
r.EffectiveURL,
|
||||||
|
r.LastModified,
|
||||||
|
r.ETag,
|
||||||
|
r.Expires,
|
||||||
|
r.ContentType,
|
||||||
|
r.ContentLength,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
// IsNotFound returns true if the resource doesn't exists anymore.
|
// IsNotFound returns true if the resource doesn't exists anymore.
|
||||||
func (r *Response) IsNotFound() bool {
|
func (r *Response) IsNotFound() bool {
|
||||||
return r.StatusCode == 404 || r.StatusCode == 410
|
return r.StatusCode == 404 || r.StatusCode == 410
|
||||||
|
@ -105,8 +120,8 @@ func (r *Response) EnsureUnicodeBody() (err error) {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// String returns the response body as string.
|
// BodyAsString returns the response body as string.
|
||||||
func (r *Response) String() string {
|
func (r *Response) BodyAsString() string {
|
||||||
bytes, _ := ioutil.ReadAll(r.Body)
|
bytes, _ := ioutil.ReadAll(r.Body)
|
||||||
return string(bytes)
|
return string(bytes)
|
||||||
}
|
}
|
||||||
|
|
|
@ -95,7 +95,7 @@ func TestToString(t *testing.T) {
|
||||||
input := `test`
|
input := `test`
|
||||||
r := &Response{Body: strings.NewReader(input)}
|
r := &Response{Body: strings.NewReader(input)}
|
||||||
|
|
||||||
if r.String() != input {
|
if r.BodyAsString() != input {
|
||||||
t.Error(`Unexpected ouput`)
|
t.Error(`Unexpected ouput`)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -140,7 +140,7 @@ func TestEnsureUnicodeWithHTMLDocuments(t *testing.T) {
|
||||||
t.Fatalf(`Unicode conversion error for %q - %q: %v`, tc.filename, tc.contentType, parseErr)
|
t.Fatalf(`Unicode conversion error for %q - %q: %v`, tc.filename, tc.contentType, parseErr)
|
||||||
}
|
}
|
||||||
|
|
||||||
isUnicode := utf8.ValidString(r.String())
|
isUnicode := utf8.ValidString(r.BodyAsString())
|
||||||
if isUnicode != tc.convertedToUnicode {
|
if isUnicode != tc.convertedToUnicode {
|
||||||
t.Errorf(`Unicode conversion %q - %q, got: %v, expected: %v`,
|
t.Errorf(`Unicode conversion %q - %q, got: %v, expected: %v`,
|
||||||
tc.filename, tc.contentType, isUnicode, tc.convertedToUnicode)
|
tc.filename, tc.contentType, isUnicode, tc.convertedToUnicode)
|
||||||
|
|
|
@ -52,7 +52,7 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool,
|
||||||
return nil, errors.NewLocalizedError(errDuplicate, response.EffectiveURL)
|
return nil, errors.NewLocalizedError(errDuplicate, response.EffectiveURL)
|
||||||
}
|
}
|
||||||
|
|
||||||
subscription, parseErr := parser.ParseFeed(response.String())
|
subscription, parseErr := parser.ParseFeed(response.BodyAsString())
|
||||||
if parseErr != nil {
|
if parseErr != nil {
|
||||||
return nil, parseErr
|
return nil, parseErr
|
||||||
}
|
}
|
||||||
|
@ -106,7 +106,7 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error {
|
||||||
if response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) {
|
if response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) {
|
||||||
logger.Debug("[Handler:RefreshFeed] Feed #%d has been modified", feedID)
|
logger.Debug("[Handler:RefreshFeed] Feed #%d has been modified", feedID)
|
||||||
|
|
||||||
updatedFeed, parseErr := parser.ParseFeed(response.String())
|
updatedFeed, parseErr := parser.ParseFeed(response.BodyAsString())
|
||||||
if parseErr != nil {
|
if parseErr != nil {
|
||||||
originalFeed.WithError(parseErr.Localize(printer))
|
originalFeed.WithError(parseErr.Localize(printer))
|
||||||
h.store.UpdateFeedError(originalFeed)
|
h.store.UpdateFeedError(originalFeed)
|
||||||
|
|
|
@ -191,7 +191,7 @@ func TestDifferentEncodingWithResponse(t *testing.T) {
|
||||||
t.Fatalf(`Encoding error for %q: %v`, tc.filename, encodingErr)
|
t.Fatalf(`Encoding error for %q: %v`, tc.filename, encodingErr)
|
||||||
}
|
}
|
||||||
|
|
||||||
feed, parseErr := ParseFeed(r.String())
|
feed, parseErr := ParseFeed(r.BodyAsString())
|
||||||
if parseErr != nil {
|
if parseErr != nil {
|
||||||
t.Fatalf(`Parsing error for %q - %q: %v`, tc.filename, tc.contentType, parseErr)
|
t.Fatalf(`Parsing error for %q - %q: %v`, tc.filename, tc.contentType, parseErr)
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
errUnreadableDoc = "Unable to analyze this page: %v"
|
errUnreadableDoc = "Unable to analyze this page: %v"
|
||||||
)
|
)
|
||||||
|
|
||||||
// FindSubscriptions downloads and try to find one or more subscriptions from an URL.
|
// FindSubscriptions downloads and try to find one or more subscriptions from an URL.
|
||||||
|
@ -31,7 +31,7 @@ func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscr
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
body := response.String()
|
body := response.BodyAsString()
|
||||||
if format := parser.DetectFeedFormat(body); format != parser.FormatUnknown {
|
if format := parser.DetectFeedFormat(body); format != parser.FormatUnknown {
|
||||||
var subscriptions Subscriptions
|
var subscriptions Subscriptions
|
||||||
subscriptions = append(subscriptions, &Subscription{
|
subscriptions = append(subscriptions, &Subscription{
|
||||||
|
|
57
url/url.go
57
url/url.go
|
@ -4,9 +4,12 @@
|
||||||
|
|
||||||
package url // import "miniflux.app/url"
|
package url // import "miniflux.app/url"
|
||||||
|
|
||||||
import "net/url"
|
import (
|
||||||
import "fmt"
|
"fmt"
|
||||||
import "strings"
|
"net/url"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
// AbsoluteURL converts the input URL as absolute URL if necessary.
|
// AbsoluteURL converts the input URL as absolute URL if necessary.
|
||||||
func AbsoluteURL(baseURL, input string) (string, error) {
|
func AbsoluteURL(baseURL, input string) (string, error) {
|
||||||
|
@ -69,3 +72,51 @@ func Domain(websiteURL string) string {
|
||||||
|
|
||||||
return parsedURL.Host
|
return parsedURL.Host
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RequestURI returns the encoded URI to be used in HTTP requests.
|
||||||
|
func RequestURI(websiteURL string) string {
|
||||||
|
u, err := url.Parse(websiteURL)
|
||||||
|
if err != nil {
|
||||||
|
return websiteURL
|
||||||
|
}
|
||||||
|
|
||||||
|
queryValues := u.Query()
|
||||||
|
u.RawQuery = "" // Clear RawQuery to make sure it's encoded properly.
|
||||||
|
u.Fragment = "" // Clear fragment because Web browsers strip #fragment before sending the URL to a web server.
|
||||||
|
|
||||||
|
var buf strings.Builder
|
||||||
|
buf.WriteString(u.String())
|
||||||
|
|
||||||
|
if len(queryValues) > 0 {
|
||||||
|
buf.WriteByte('?')
|
||||||
|
|
||||||
|
// Sort keys.
|
||||||
|
keys := make([]string, 0, len(queryValues))
|
||||||
|
for k := range queryValues {
|
||||||
|
keys = append(keys, k)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
|
||||||
|
i := 0
|
||||||
|
for _, key := range keys {
|
||||||
|
keyEscaped := url.QueryEscape(key)
|
||||||
|
values := queryValues[key]
|
||||||
|
for _, value := range values {
|
||||||
|
if i > 0 {
|
||||||
|
buf.WriteByte('&')
|
||||||
|
}
|
||||||
|
buf.WriteString(keyEscaped)
|
||||||
|
|
||||||
|
// As opposed to the standard library, we append the = only if the value is not empty.
|
||||||
|
if value != "" {
|
||||||
|
buf.WriteByte('=')
|
||||||
|
buf.WriteString(url.QueryEscape(value))
|
||||||
|
}
|
||||||
|
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf.String()
|
||||||
|
}
|
||||||
|
|
|
@ -71,3 +71,25 @@ func TestDomain(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRequestURI(t *testing.T) {
|
||||||
|
scenarios := map[string]string{
|
||||||
|
"https://www.example.org": "https://www.example.org",
|
||||||
|
"https://user:password@www.example.org": "https://user:password@www.example.org",
|
||||||
|
"https://www.example.org/path with spaces": "https://www.example.org/path%20with%20spaces",
|
||||||
|
"https://www.example.org/path#test": "https://www.example.org/path",
|
||||||
|
"https://www.example.org/path?abc#test": "https://www.example.org/path?abc",
|
||||||
|
"https://www.example.org/path?a=b&a=c": "https://www.example.org/path?a=b&a=c",
|
||||||
|
"https://www.example.org/path?a=b&a=c&d": "https://www.example.org/path?a=b&a=c&d",
|
||||||
|
"https://www.example.org/path?atom": "https://www.example.org/path?atom",
|
||||||
|
"https://www.example.org/path?测试=测试": "https://www.example.org/path?%E6%B5%8B%E8%AF%95=%E6%B5%8B%E8%AF%95",
|
||||||
|
"https://www.example.org/url=http%3A%2F%2Fwww.example.com%2Ffeed%2F&max=20": "https://www.example.org/url=http%3A%2F%2Fwww.example.com%2Ffeed%2F&max=20",
|
||||||
|
}
|
||||||
|
|
||||||
|
for input, expected := range scenarios {
|
||||||
|
actual := RequestURI(input)
|
||||||
|
if actual != expected {
|
||||||
|
t.Errorf(`Unexpected result, got %q instead of %q`, actual, expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue