From 827683ab59131ec38ed7cfa268bcaa6dc77d1412 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Wed, 13 Dec 2017 20:16:15 -0800 Subject: [PATCH] Make sure that item URL are absolute --- reader/atom/atom.go | 15 ++++++++++----- reader/atom/parser_test.go | 26 ++++++++++++++++++++++++++ reader/json/json.go | 6 ++++++ reader/json/parser_test.go | 25 +++++++++++++++++++++++++ reader/rdf/parser_test.go | 25 +++++++++++++++++++++++++ reader/rdf/rdf.go | 7 ++++++- reader/rss/parser_test.go | 21 +++++++++++++++++++++ reader/rss/rss.go | 15 ++++++++++----- 8 files changed, 129 insertions(+), 11 deletions(-) diff --git a/reader/atom/atom.go b/reader/atom/atom.go index f72a150c..315961ed 100644 --- a/reader/atom/atom.go +++ b/reader/atom/atom.go @@ -14,6 +14,7 @@ import ( "github.com/miniflux/miniflux/helper" "github.com/miniflux/miniflux/model" "github.com/miniflux/miniflux/reader/date" + "github.com/miniflux/miniflux/url" ) type atomFeed struct { @@ -70,10 +71,19 @@ func (a *atomFeed) Transform() *model.Feed { for _, entry := range a.Entries { item := entry.Transform() + entryURL, err := url.AbsoluteURL(feed.SiteURL, item.URL) + if err == nil { + item.URL = entryURL + } + if item.Author == "" { item.Author = getAuthor(a.Author) } + if item.Title == "" { + item.Title = item.URL + } + feed.Entries = append(feed.Entries, item) } @@ -89,11 +99,6 @@ func (a *atomEntry) Transform() *model.Entry { entry.Content = getContent(a) entry.Title = strings.TrimSpace(a.Title) entry.Enclosures = getEnclosures(a) - - if entry.Title == "" { - entry.Title = entry.URL - } - return entry } diff --git a/reader/atom/parser_test.go b/reader/atom/parser_test.go index 66ca0f05..be07383c 100644 --- a/reader/atom/parser_test.go +++ b/reader/atom/parser_test.go @@ -152,6 +152,32 @@ func TestParseFeedURL(t *testing.T) { } } +func TestParseEntryWithRelativeURL(t *testing.T) { + data := ` + + Example Feed + + + + Test + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if feed.Entries[0].URL != "http://example.org/something.html" { + t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + } +} + func TestParseEntryTitleWithWhitespaces(t *testing.T) { data := ` diff --git a/reader/json/json.go b/reader/json/json.go index 59249163..ad920686 100644 --- a/reader/json/json.go +++ b/reader/json/json.go @@ -13,6 +13,7 @@ import ( "github.com/miniflux/miniflux/model" "github.com/miniflux/miniflux/reader/date" "github.com/miniflux/miniflux/reader/sanitizer" + "github.com/miniflux/miniflux/url" ) type jsonFeed struct { @@ -66,6 +67,11 @@ func (j *jsonFeed) Transform() *model.Feed { for _, item := range j.Items { entry := item.Transform() + entryURL, err := url.AbsoluteURL(feed.SiteURL, entry.URL) + if err == nil { + entry.URL = entryURL + } + if entry.Author == "" { entry.Author = j.GetAuthor() } diff --git a/reader/json/parser_test.go b/reader/json/parser_test.go index e6fa9ed5..a3d5b5be 100644 --- a/reader/json/parser_test.go +++ b/reader/json/parser_test.go @@ -174,6 +174,31 @@ func TestParsePodcast(t *testing.T) { } } +func TestParseFeedWithRelativeURL(t *testing.T) { + data := `{ + "version": "https://jsonfeed.org/version/1", + "title": "Example", + "home_page_url": "https://example.org/", + "feed_url": "https://example.org/feed.json", + "items": [ + { + "id": "2347259", + "url": "something.html", + "date_published": "2016-02-09T14:22:00-07:00" + } + ] + }` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if feed.Entries[0].URL != "https://example.org/something.html" { + t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + } +} + func TestParseAuthor(t *testing.T) { data := `{ "version": "https://jsonfeed.org/version/1", diff --git a/reader/rdf/parser_test.go b/reader/rdf/parser_test.go index 6447bf2e..e025e537 100644 --- a/reader/rdf/parser_test.go +++ b/reader/rdf/parser_test.go @@ -266,6 +266,31 @@ func TestParseItemWithOnlyFeedAuthor(t *testing.T) { } } +func TestParseItemRelativeURL(t *testing.T) { + data := ` + + + Example + http://example.org + + + + Title + Test + something.html + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if feed.Entries[0].URL != "http://example.org/something.html" { + t.Errorf("Incorrect entry url, got: %s", feed.Entries[0].URL) + } +} + func TestParseItemWithoutLink(t *testing.T) { data := ` diff --git a/reader/rdf/rdf.go b/reader/rdf/rdf.go index ad4d53a0..3586f6e9 100644 --- a/reader/rdf/rdf.go +++ b/reader/rdf/rdf.go @@ -12,6 +12,7 @@ import ( "github.com/miniflux/miniflux/helper" "github.com/miniflux/miniflux/model" "github.com/miniflux/miniflux/reader/sanitizer" + "github.com/miniflux/miniflux/url" ) type rdfFeed struct { @@ -29,13 +30,17 @@ func (r *rdfFeed) Transform() *model.Feed { for _, item := range r.Items { entry := item.Transform() - if entry.Author == "" && r.Creator != "" { entry.Author = sanitizer.StripTags(r.Creator) } if entry.URL == "" { entry.URL = feed.SiteURL + } else { + entryURL, err := url.AbsoluteURL(feed.SiteURL, entry.URL) + if err == nil { + entry.URL = entryURL + } } feed.Entries = append(feed.Entries, entry) diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go index 982d4f0b..7d724177 100644 --- a/reader/rss/parser_test.go +++ b/reader/rss/parser_test.go @@ -537,6 +537,27 @@ func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) { } } +func TestParseEntryWithRelativeURL(t *testing.T) { + data := ` + + + https://example.org/ + + item.html + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if feed.Entries[0].Title != "https://example.org/item.html" { + t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) + } +} + func TestParseInvalidXml(t *testing.T) { data := `garbage` _, err := Parse(bytes.NewBufferString(data)) diff --git a/reader/rss/rss.go b/reader/rss/rss.go index b80692c0..fcfccbd1 100644 --- a/reader/rss/rss.go +++ b/reader/rss/rss.go @@ -15,6 +15,7 @@ import ( "github.com/miniflux/miniflux/helper" "github.com/miniflux/miniflux/model" "github.com/miniflux/miniflux/reader/date" + "github.com/miniflux/miniflux/url" ) type rssFeed struct { @@ -103,6 +104,15 @@ func (r *rssFeed) Transform() *model.Feed { if entry.URL == "" { entry.URL = feed.SiteURL + } else { + entryURL, err := url.AbsoluteURL(feed.SiteURL, entry.URL) + if err == nil { + entry.URL = entryURL + } + } + + if entry.Title == "" { + entry.Title = entry.URL } feed.Entries = append(feed.Entries, entry) @@ -213,11 +223,6 @@ func (r *rssItem) Transform() *model.Entry { entry.Content = r.GetContent() entry.Title = strings.TrimSpace(r.Title) entry.Enclosures = r.GetEnclosures() - - if entry.Title == "" { - entry.Title = entry.URL - } - return entry }